dataset_name_to_object[row['rei_name']] = newdataset
                            existing_datasets_list.append(newdataset)
                            newsource = Source.objects.get(name=row['rei_name'], datasetId=newdataset.pk)
                            newsource.description = json.dumps(source_description)
                            newsource.save()
                            source_name_to_object[row['rei_name']] = newsource

                    variable_name = '%s - %s - Sex: %s - Age: %s (%s)' % (
                        row['measure_name'], row['rei_name'], row['sex_name'], row['age_name'], row['metric_name'])
                    variable_code = '%s %s %s %s %s' % (
                    row['measure_id'], row['rei_id'], row['sex_id'], row['age_id'], row['metric_id'])

                    if variable_name not in existing_variables_list:
                        newvariable = Variable(name=variable_name,
                                               unit=row['metric_name'],
                                               code=variable_code,
                                               fk_dst_id=dataset_name_to_object[row['rei_name']],
                                               fk_var_type_id=VariableType.objects.get(pk=4),
                                               sourceId=source_name_to_object[row['rei_name']])
                        newvariable.save()
                        variable_name_to_object[variable_name] = newvariable
                        existing_variables_list.add(newvariable.name)
                    else:
                        if variable_name not in variable_name_to_object:
                            newvariable = Variable.objects.get(name=variable_name,
                                                               fk_dst_id=dataset_name_to_object[row['rei_name']])
                            while DataValue.objects.filter(fk_var_id__pk=newvariable.pk).first():
                                with connection.cursor() as c:  # if we don't limit the deleted values, the db might just hang
                                    c.execute('DELETE FROM %s WHERE fk_var_id = %s LIMIT 10000;' %
                                              (DataValue._meta.db_table, newvariable.pk))
                            variable_name_to_object[variable_name] = newvariable
Esempio n. 2
0
                                if section == 'Hygiene':
                                    varname = 'Hygiene'
                                varname += ' - ' + columns[section][
                                    column_number]['name'] + ' - ' + columns[
                                        section][column_number]['type']

                                percent_varname = varname + ' - Percent'
                                varunit = 'Percent'

                                if percent_varname.lower(
                                ) not in existing_variables_list:
                                    newvariable = Variable(
                                        name=percent_varname,
                                        unit=varunit,
                                        code=None,
                                        datasetId=newdataset,
                                        variableTypeId=VariableType.objects.
                                        get(pk=4),
                                        sourceId=source_name_to_object[
                                            source_name])
                                    newvariable.save()
                                    variable_name_to_object[
                                        percent_varname.lower()] = newvariable
                                    existing_variables_list.add(
                                        newvariable.name.lower())
                                else:

                                    if percent_varname.lower(
                                    ) not in variable_name_to_object:
                                        newvariable = Variable.objects.get(
                                            name=percent_varname,
Esempio n. 3
0
                                    the_subcategory.name,
                                    namespace='clioinfra')
                            source_description['link'] = filename_to_pagelink[
                                one_file]
                            newsource = Source(
                                name=varname,
                                description=json.dumps(source_description),
                                datasetId=newdataset.pk)
                            newsource.save()

                            newvariable = Variable(
                                name=varname,
                                unit=varunit if varunit else '',
                                short_unit=short_unit_extract(varunit),
                                description='',
                                code=filename_to_pagelink[one_file]
                                [filename_to_pagelink[one_file].rfind('/') +
                                 1:],
                                timespan='',
                                datasetId=newdataset,
                                variableTypeId=VariableType.objects.get(pk=4),
                                sourceId=newsource)

                            newvariable.save()

                        if row_number == 3 and column_number > 6:
                            try:
                                column_to_year[column_number] = int(cell.value)
                            except ValueError:
                                pass

                        if row_number > 3:
                            newdataset = Dataset.objects.get(name=row['cause_name'], categoryId=the_category)
                            dataset_name_to_object[row['cause_name']] = newdataset
                            existing_datasets_list.append(newdataset)
                            newsource = Source.objects.get(name=row['cause_name'], datasetId=newdataset.pk)
                            newsource.description = json.dumps(source_description)
                            newsource.save()
                            source_name_to_object[row['cause_name']] = newsource

                    variable_name = '%s - %s - Sex: %s - Age: %s (%s)' % (
                    row['measure_name'], row['cause_name'], row['sex_name'], row['age_name'], row['metric_name'])
                    variable_code = '%s %s %s %s %s' % (row['measure_id'], row['cause_id'], row['sex_id'], row['age_id'], row['metric_id'])

                    if variable_name not in existing_variables_list:
                        newvariable = Variable(name=variable_name,
                                               unit=row['metric_name'],
                                               code=variable_code,
                                               datasetId=dataset_name_to_object[row['cause_name']], variableTypeId=VariableType.objects.get(pk=4),
                                               sourceId=source_name_to_object[row['cause_name']])
                        newvariable.save()
                        variable_name_to_object[variable_name] = newvariable
                        existing_variables_list.add(newvariable.name)
                    else:
                        if variable_name not in variable_name_to_object:
                            newvariable = Variable.objects.get(name=variable_name, datasetId=dataset_name_to_object[row['cause_name']])
                            while DataValue.objects.filter(variableId__pk=newvariable.pk).first():
                                with connection.cursor() as c:  # if we don't limit the deleted values, the db might just hang
                                    c.execute('DELETE FROM %s WHERE variableId = %s LIMIT 10000;' %
                                              (DataValue._meta.db_table, newvariable.pk))
                            variable_name_to_object[variable_name] = newvariable

                    if row['location_name'] not in c_name_entity_ref:
Esempio n. 5
0
                                    if not global_cat[indicator_code]['saved']:
                                        source_description['additionalInfo'] = "Definitions and characteristics of countries and other territories: " + "https://ourworldindata.org" + reverse("servepovstatscountryinfo") + "\n"
                                        source_description['additionalInfo'] += "Limitations and exceptions:\n" + global_cat[indicator_code]['limitations'] + "\n" if global_cat[indicator_code]['limitations'] else ''
                                        source_description['additionalInfo'] += "Notes from original source:\n" + global_cat[indicator_code]['sourcenotes'] + "\n" if global_cat[indicator_code]['sourcenotes'] else ''
                                        source_description['additionalInfo'] += "General comments:\n" + global_cat[indicator_code]['comments'] + "\n" if global_cat[indicator_code]['comments'] else ''
                                        source_description['additionalInfo'] += "Statistical concept and methodology:\n" + global_cat[indicator_code]['concept'] + "\n" if global_cat[indicator_code]['concept'] else ''
                                        source_description['additionalInfo'] += "Related source links:\n" + global_cat[indicator_code]['sourcelinks'] + "\n" if global_cat[indicator_code]['sourcelinks'] else ''
                                        source_description['additionalInfo'] += "Other web links:\n" + global_cat[indicator_code]['weblinks'] + "\n" if global_cat[indicator_code]['weblinks'] else ''
                                        source_description['dataPublisherSource'] = global_cat[indicator_code]['source']
                                        newsource = Source(name='World Bank Poverty and Equity database: ' + global_cat[indicator_code]['name'],
                                                           description=json.dumps(source_description),
                                                           datasetId=newdataset.pk)
                                        newsource.save()
                                        logger.info("Inserting a source %s." % newsource.name.encode('utf8'))
                                        s_unit = short_unit_extract(global_cat[indicator_code]['unitofmeasure'])
                                        newvariable = Variable(name=global_cat[indicator_code]['name'], unit=global_cat[indicator_code]['unitofmeasure'] if global_cat[indicator_code]['unitofmeasure'] else '', short_unit=s_unit, description=global_cat[indicator_code]['description'],
                                                               code=indicator_code, timespan='', datasetId=newdataset, variableTypeId=VariableType.objects.get(pk=4), sourceId=newsource)
                                        newvariable.save()
                                        logger.info("Inserting a variable %s." % newvariable.name.encode('utf8'))
                                        global_cat[indicator_code]['variable_object'] = newvariable
                                        global_cat[indicator_code]['saved'] = True
                                    else:
                                        newvariable = global_cat[indicator_code]['variable_object']
                                    for i in range(0, len(data_values)):
                                        data_values_tuple_list.append((data_values[i]['value'], data_values[i]['year'], country_name_entity_ref[country_code].pk, newvariable.pk))
                                    if len(data_values_tuple_list) > 3000:  # insert when the length of the list goes over 3000
                                        with connection.cursor() as c:
                                            c.executemany(insert_string, data_values_tuple_list)
                                        logger.info("Dumping data values...")
                                        data_values_tuple_list = []

                column_number = 0
Esempio n. 6
0
        with open(file, 'r', encoding='utf8') as f:
            print('Processing: %s' % file)
            reader = csv.DictReader(f)
            for row in reader:
                row_number += 1

                variable_name = '%s - %s' % (row['ihme_indicator_description'], row['estimate_type'])
                if row.get('unscaled_value'):
                    variable_name += ' - Unscaled'
                else:
                    variable_name += ' - Scaled'

                if variable_name not in existing_variables_list:
                    newvariable = Variable(name=variable_name,
                                           unit=row['indicator_unit'],
                                           code=None,
                                           datasetId=newdataset, variableTypeId=VariableType.objects.get(pk=4),
                                           sourceId=newsource)
                    newvariable.save()
                    variable_name_to_object[variable_name] = newvariable
                    existing_variables_list.add(newvariable.name)
                else:
                    if variable_name not in variable_name_to_object:
                        newvariable = Variable.objects.get(name=variable_name, datasetId=newdataset)
                        while DataValue.objects.filter(variableId__pk=newvariable.pk).first():
                            with connection.cursor() as c:  # if we don't limit the deleted values, the db might just hang
                                c.execute('DELETE FROM %s WHERE variableId = %s LIMIT 10000;' %
                                          (DataValue._meta.db_table, newvariable.pk))
                        variable_name_to_object[variable_name] = newvariable

                if row['location_name'] not in c_name_entity_ref:
Esempio n. 7
0
def process_one_row(year, value, countryname, variablecode, variablename,
                    existing_fao_variables_dict, unit, source, dataset,
                    var_desc, data_values_tuple_list):

    global unique_data_tracker
    global processed_values

    processed_values += 1
    if processed_values % 300 == 0:
        time.sleep(
            0.001
        )  # this is done in order to not keep the CPU busy all the time

    insert_string = 'INSERT into data_values (value, year, fk_ent_id, fk_var_id) VALUES (%s, %s, %s, %s)'  # this is used for constructing the query for mass inserting to the data_values table

    if year is not False and value is not False:
        if tuple([countryname, variablecode]) not in unique_data_tracker:
            if countryname not in country_name_entity_ref:
                if countryname.lower() in existing_entities_list:
                    newentity = Entity.objects.get(name=countryname)
                elif country_tool_names_dict.get(
                        unidecode.unidecode(countryname.lower()), 0):
                    newentity = Entity.objects.get(
                        name=country_tool_names_dict[unidecode.unidecode(
                            countryname.lower())].owid_name)
                else:
                    newentity = Entity(name=countryname, validated=False)
                    newentity.save()
                country_name_entity_ref[countryname] = newentity

            if variablename not in existing_fao_variables_dict:
                s_unit = short_unit_extract(unit)
                newvariable = Variable(
                    name=variablename,
                    unit=unit if unit else '',
                    short_unit=s_unit,
                    description=var_desc,
                    code=variablecode,
                    timespan='',
                    fk_dst_id=dataset,
                    fk_var_type_id=VariableType.objects.get(pk=4),
                    sourceId=source)
                try:
                    with transaction.atomic():
                        newvariable.save()
                except django.db.utils.IntegrityError:
                    newvariable = Variable(
                        name=variablename,
                        unit=unit if unit else '',
                        short_unit=s_unit,
                        description=var_desc,
                        code=None,
                        timespan='',
                        fk_dst_id=dataset,
                        fk_var_type_id=VariableType.objects.get(pk=4),
                        sourceId=source)
                    newvariable.save()
                existing_fao_variables_dict[variablename] = newvariable
            data_values_tuple_list.append(
                (str(value), int(year),
                 country_name_entity_ref[countryname].pk,
                 existing_fao_variables_dict[variablename].pk))
            if len(
                    data_values_tuple_list
            ) > 3000:  # insert when the length of the list goes over 3000
                with connection.cursor() as c:
                    c.executemany(insert_string, data_values_tuple_list)
                del data_values_tuple_list[:]
Esempio n. 8
0
                    datasetId=datasets_ref_models[vardata['category']].pk)
                try:
                    with transaction.atomic():
                        newsource.save()
                except django.db.utils.IntegrityError:
                    newsource.name = '%s via the Quality of Government dataset' % (
                        qog_sources[source_name]['original_dataset'])
                    newsource.save()
                logger.info("Inserting a source %s." %
                            newsource.name.encode('utf8'))
                saved_sources[source_name] = {vardata['category']: newsource}
            newvariable = Variable(
                name='%s - %s' % (vardata['name'], varcode),
                unit='',
                description=vardata['description'],
                code=varcode,
                timespan=vardata['timespan'],
                fk_dst_id=datasets_ref_models[vardata['category']],
                fk_var_type_id=VariableType.objects.get(pk=4),
                sourceId=newsource)
            newvariable.save()
            logger.info("Inserting a variable %s." %
                        newvariable.name.encode('utf8'))
            vars_ref_models[varcode] = newvariable

        insert_string = 'INSERT into data_values (value, year, entityId, fk_var_id) VALUES (%s, %s, %s, %s)'  # this is used for constructing the query for mass inserting to the data_values table
        data_values_tuple_list = []

        # now saving the data values
        with open(qog_downloads_save_location + 'qog.csv') as csvfile:
            reader = csv.DictReader(csvfile)
Esempio n. 9
0
                    datasetId=dataset_name_to_object[subcategory_name].pk)
                newsource.save()
                source_name_to_object[varname] = newsource
            else:
                newsource = Source.objects.get(
                    name=varname,
                    datasetId=dataset_name_to_object[subcategory_name].pk)
                newsource.description = json.dumps(source_description)
                newsource.save()
                source_name_to_object[varname] = newsource

            if varname not in existing_variables_list:

                newvariable = Variable(
                    name=varname,
                    unit=varunit,
                    fk_dst_id=dataset_name_to_object[subcategory_name],
                    fk_var_type_id=VariableType.objects.get(pk=4),
                    sourceId=source_name_to_object[varname])
                newvariable.save()
                variable_name_to_object[varname] = newvariable
                existing_variables_list.add(newvariable.name)
            else:
                if varname not in variable_name_to_object:
                    newvariable = Variable.objects.get(
                        name=varname,
                        fk_dst_id=dataset_name_to_object[subcategory_name])
                    while DataValue.objects.filter(
                            fk_var_id__pk=newvariable.pk).first():
                        with connection.cursor(
                        ) as c:  # if we don't limit the deleted values, the db might just hang
                            c.execute(
Esempio n. 10
0
                                    if not global_cat[indicator_code]['saved']:
                                        source_description['additionalInfo'] = "Definitions and characteristics of countries and other territories: " + "https://ourworldindata.org" + reverse("servehnpstatscountryinfo") + "\n"
                                        source_description['additionalInfo'] += "Limitations and exceptions:\n" + global_cat[indicator_code]['limitations'] + "\n" if global_cat[indicator_code]['limitations'] else ''
                                        source_description['additionalInfo'] += "Notes from original source:\n" + global_cat[indicator_code]['sourcenotes'] + "\n" if global_cat[indicator_code]['sourcenotes'] else ''
                                        source_description['additionalInfo'] += "General comments:\n" + global_cat[indicator_code]['comments'] + "\n" if global_cat[indicator_code]['comments'] else ''
                                        source_description['additionalInfo'] += "Statistical concept and methodology:\n" + global_cat[indicator_code]['concept'] + "\n" if global_cat[indicator_code]['concept'] else ''
                                        source_description['additionalInfo'] += "Related source links:\n" + global_cat[indicator_code]['sourcelinks'] + "\n" if global_cat[indicator_code]['sourcelinks'] else ''
                                        source_description['additionalInfo'] += "Other web links:\n" + global_cat[indicator_code]['weblinks'] + "\n" if global_cat[indicator_code]['weblinks'] else ''
                                        source_description['dataPublisherSource'] = global_cat[indicator_code]['source']
                                        newsource = Source(name='World Bank Health Nutrition and Population Statistics: ' + global_cat[indicator_code]['name'],
                                                           description=json.dumps(source_description),
                                                           datasetId=newdataset.pk)
                                        newsource.save()
                                        logger.info("Inserting a source %s." % newsource.name.encode('utf8'))
                                        s_unit = short_unit_extract(global_cat[indicator_code]['unitofmeasure'])
                                        newvariable = Variable(name=global_cat[indicator_code]['name'], unit=global_cat[indicator_code]['unitofmeasure'] if global_cat[indicator_code]['unitofmeasure'] else '', short_unit=s_unit, description=global_cat[indicator_code]['description'],
                                                               code=indicator_code, timespan='1960-' + str(last_available_year), fk_dst_id=newdataset, fk_var_type_id=VariableType.objects.get(pk=4), sourceId=newsource)
                                        newvariable.save()
                                        logger.info("Inserting a variable %s." % newvariable.name.encode('utf8'))
                                        global_cat[indicator_code]['variable_object'] = newvariable
                                        global_cat[indicator_code]['saved'] = True
                                    else:
                                        newvariable = global_cat[indicator_code]['variable_object']
                                    for i in range(0, len(data_values)):
                                        data_values_tuple_list.append((data_values[i]['value'], data_values[i]['year'], country_name_entity_ref[country_code].pk, newvariable.pk))
                                    if len(data_values_tuple_list) > 3000:  # insert when the length of the list goes over 3000
                                        with connection.cursor() as c:
                                            c.executemany(insert_string, data_values_tuple_list)
                                        logger.info("Dumping data values...")
                                        data_values_tuple_list = []

                column_number = 0
Esempio n. 11
0
                            )
                        elif row[
                                each] == 'Proportion of teachers in upper secondary education who have received at least the minimum organized teacher training (e.g. pedagogical training) pre-service or in-service required for teaching at the relevant level in a given country':
                            per_row_var.append(
                                'Proportion of teachers in upper secondary education who have received pedagogical training required for teaching at the relevant level in a given country'
                            )
                        else:
                            per_row_var.append(row[each])

                variable_name = ' - '.join(per_row_var)

                if variable_name not in existing_variables_list:

                    newvariable = Variable(
                        name=variable_name,
                        unit=row['Unit'],
                        datasetId=dataset_name_to_object[subcategory_name],
                        variableTypeId=VariableType.objects.get(pk=4),
                        sourceId=source_name_to_object[subcategory_name])
                    newvariable.save()
                    variable_name_to_object[variable_name] = newvariable
                    existing_variables_list.add(newvariable.name)
                else:
                    if variable_name not in variable_name_to_object:
                        newvariable = Variable.objects.get(
                            name=variable_name,
                            datasetId=dataset_name_to_object[subcategory_name])
                        while DataValue.objects.filter(
                                variableId__pk=newvariable.pk).first():
                            with connection.cursor(
                            ) as c:  # if we don't limit the deleted values, the db might just hang
                                c.execute(
Esempio n. 12
0
             unit_of_measure = global_cat[
                 indicator_code][
                     'unitofmeasure']
         else:
             unit_of_measure = ''
     else:
         unit_of_measure = ''
     s_unit = short_unit_extract(
         unit_of_measure)
     newvariable = Variable(
         name=global_cat[indicator_code]
         ['name'],
         unit=unit_of_measure,
         short_unit=s_unit,
         description=global_cat[
             indicator_code]['description'],
         code=indicator_code,
         timespan='',
         fk_dst_id=newdataset,
         fk_var_type_id=VariableType.
         objects.get(pk=4),
         sourceId=newsource)
     newvariable.save()
     logger.info(
         "Inserting a variable %s." %
         newvariable.name.encode('utf8'))
     global_cat[indicator_code][
         'variable_object'] = newvariable
     global_cat[indicator_code][
         'saved'] = True
 else:
Esempio n. 13
0
                            variable_name += ' - ' + row['Country - distribution'][row['Country - distribution'].rfind('-'):]
                        variable_name += ' - ' + file_name
                        variable_code = None

                        if 'Unit' in reader.fieldnames:
                            if row['Unit']:
                                varunit = row['Unit']
                            else:
                                varunit = ''
                        else:
                            varunit = ''

                        if variable_name.lower() not in existing_variables_list:
                            newvariable = Variable(name=variable_name,
                                                   unit=varunit,
                                                   code=variable_code,
                                                   datasetId=newdataset, variableTypeId=VariableType.objects.get(pk=4),
                                                   sourceId=source_name_to_object[source_name])
                            newvariable.save()
                            variable_name_to_object[variable_name.lower()] = newvariable
                            existing_variables_list.add(newvariable.name.lower())
                        else:

                            if variable_name.lower() not in variable_name_to_object:
                                newvariable = Variable.objects.get(name=variable_name, datasetId=newdataset)
                                while DataValue.objects.filter(variableId__pk=newvariable.pk).first():
                                    with connection.cursor() as c:  # if we don't limit the deleted values, the db might just hang
                                        c.execute('DELETE FROM %s WHERE variableId = %s LIMIT 10000;' %
                                                  (DataValue._meta.db_table, newvariable.pk))
                                variable_name_to_object[variable_name.lower()] = newvariable
Esempio n. 14
0
                            newsource = Source(
                                name=varname,
                                description=source_template % (
                                    newdataset.name,
                                    filename_to_pagelink[one_file],
                                    filename_to_pagelink[one_file],
                                ),
                                datasetId=newdataset.pk)
                            newsource.save()

                            newvariable = Variable(
                                name=varname,
                                unit=varunit if varunit else '',
                                short_unit=short_unit_extract(varunit),
                                description='',
                                code=None,
                                timespan='',
                                fk_dst_id=newdataset,
                                fk_var_type_id=VariableType.objects.get(pk=4),
                                sourceId=newsource)

                            newvariable.save()

                        if row_number == 3 and column_number > 6:
                            try:
                                column_to_year[column_number] = int(cell.value)
                            except ValueError:
                                pass

                        if row_number > 3:
                            if column_number == 4 and cell.value is not None:
Esempio n. 15
0
                                        source_description['additionalInfo'] += "General comments:\n" + global_cat[indicator_code]['comments'] + "\n" if global_cat[indicator_code]['comments'] else ''
                                        source_description['additionalInfo'] += "Statistical concept and methodology:\n" + global_cat[indicator_code]['concept'] if global_cat[indicator_code]['concept'] else ''
                                        source_description['additionalInfo'] += "Related source links:\n" + global_cat[indicator_code]['sourcelinks'] + "\n" if global_cat[indicator_code]['sourcelinks'] else ''
                                        source_description['additionalInfo'] += "Other web links:\n" + global_cat[indicator_code]['weblinks'] + "\n" if global_cat[indicator_code]['weblinks'] else ''
                                        source_description['dataPublisherSource'] = global_cat[indicator_code]['source']
                                        if 'iea.org' in json.dumps(source_description).lower() or 'iea stat' in json.dumps(source_description).lower() or 'iea 2014' in json.dumps(source_description).lower():
                                            source_description['dataPublishedBy'] = 'International Energy Agency (IEA) via The World Bank'
                                        else:
                                            source_description['dataPublishedBy'] = 'World Bank – World Development Indicators'
                                        newsource = Source(name='World Bank – WDI: ' + global_cat[indicator_code]['name'],
                                                           description=json.dumps(source_description),
                                                           datasetId=newdataset)
                                        newsource.save()
                                        logger.info("Inserting a source %s." % newsource.name.encode('utf8'))
                                        s_unit = extract_short_unit(global_cat[indicator_code]['unitofmeasure'])
                                        newvariable = Variable(name=global_cat[indicator_code]['name'], unit=global_cat[indicator_code]['unitofmeasure'] if global_cat[indicator_code]['unitofmeasure'] else '', short_unit=s_unit, description=global_cat[indicator_code]['description'],
                                                               code=indicator_code, timespan='1960-' + str(last_available_year), datasetId=newdataset, sourceId=newsource) # rewrite removed VariableType
                                        newvariable.save()
                                        logger.info("Inserting a variable %s." % newvariable.name.encode('utf8'))
                                        global_cat[indicator_code]['variable_object'] = newvariable
                                        global_cat[indicator_code]['saved'] = True
                                    else:
                                        newvariable = global_cat[indicator_code]['variable_object']
                                    for i in range(0, len(data_values)):
                                        data_values_tuple_list.append((data_values[i]['value'], data_values[i]['year'], country_name_entity_ref[country_code].pk, newvariable.pk))
                                    if len(data_values_tuple_list) > 3000:  # insert when the length of the list goes over 3000
                                        with connection.cursor() as c:
                                            c.executemany(insert_string, data_values_tuple_list)
                                        logger.info("Dumping data values...")
                                        data_values_tuple_list = []

                column_number = 0
Esempio n. 16
0
                        variable_code = None

                        if 'Unit' in reader.fieldnames:
                            if row['Unit']:
                                varunit = row['Unit']
                            else:
                                varunit = ''
                        else:
                            varunit = ''

                        if variable_name.lower(
                        ) not in existing_variables_list:
                            newvariable = Variable(
                                name=variable_name,
                                unit=varunit,
                                code=variable_code,
                                fk_dst_id=dataset_name_to_object[
                                    metadata_dict[file_name]['category']],
                                fk_var_type_id=VariableType.objects.get(pk=4),
                                sourceId=source_name_to_object[source_name])
                            newvariable.save()
                            variable_name_to_object[
                                variable_name.lower()] = newvariable
                            existing_variables_list.add(
                                newvariable.name.lower())
                        else:

                            if variable_name.lower(
                            ) not in variable_name_to_object:
                                newvariable = Variable.objects.get(
                                    name=variable_name,
                                    fk_dst_id=dataset_name_to_object[
Esempio n. 17
0
                                                   description=json.dumps(source_description),
                                                   datasetId=newdataset.pk)
                                newsource.save()

                            if not variables_saved:
                                for columnnum, varname in var_to_add_dict.items():
                                    if '(' not in varname:
                                        unit_of_measure = ''
                                    else:
                                        unit_of_measure = varname[varname.index('('):varname.index(')') + 1].replace('(', '').replace(')','')
                                        s_unit = short_unit_extract(unit_of_measure)
                                    newvariable = Variable(name=varname,
                                                           unit=unit_of_measure,
                                                           short_unit=s_unit,
                                                           description='',
                                                           code=None,
                                                           timespan=timespan,
                                                           fk_dst_id=newdataset,
                                                           fk_var_type_id=VariableType.objects.get(pk=4),
                                                           sourceId=newsource)
                                    newvariable.save()

                                    column_var_dict[columnnum] = newvariable

                                variables_saved = True

                        if row_number > 17:
                            if column_number == 5:
                                country_code = cell.value
                            if column_number == 6:
                                year = cell.value
Esempio n. 18
0
                    # discarding values for subnational regions
                    if row.get('Subnational region'):
                        continue

                    thevarname = []
                    for key in columns_to_process:
                        if row[key]:
                            thevarname.append("{}:{}".format(key, row[key]))
                    variable_name = ' - '.join(thevarname)

                    if variable_name.lower() not in existing_variables_list:

                        newvariable = Variable(
                            name=variable_name,
                            unit='',
                            datasetId=dataset_name_to_object[subcategory_name],
                            variableTypeId=VariableType.objects.get(pk=4),
                            sourceId=source_name_to_object[
                                source_name.lower()])
                        newvariable.save()
                        variable_name_to_object[
                            variable_name.lower()] = newvariable
                        existing_variables_list.add(newvariable.name.lower())
                    else:
                        if variable_name.lower(
                        ) not in variable_name_to_object:
                            newvariable = Variable.objects.get(
                                name=variable_name,
                                datasetId=dataset_name_to_object[
                                    subcategory_name])
                            newvariable.sourceId = source_name_to_object[
Esempio n. 19
0
    for eachfile in glob.glob(unaids_downloads + '/*.csv'):
        print("Processing: {}".format(eachfile))
        with open(eachfile, mode='rt', encoding='utf-8-sig') as f:
            reader = csv.DictReader(f)

            for row in reader:
                row_number += 1

                variable_name = "{} - {}".format(row['Indicator'].strip(),
                                                 row['Subgroup'].strip())
                if variable_name.lower() not in existing_variables_list:
                    newvariable = Variable(
                        name=variable_name,
                        unit=row['Unit'],
                        code=None,
                        datasetId=newdataset,
                        variableTypeId=VariableType.objects.get(pk=4),
                        sourceId=source_name_to_object[source_name])
                    newvariable.save()
                    variable_name_to_object[
                        variable_name.lower()] = newvariable
                    existing_variables_list.add(newvariable.name.lower())
                else:
                    if variable_name.lower() not in variable_name_to_object:
                        newvariable = Variable.objects.get(
                            name=variable_name, datasetId=newdataset)
                        while DataValue.objects.filter(
                                variableId__pk=newvariable.pk).first():
                            with connection.cursor(
                            ) as c:  # if we don't limit the deleted values, the db might just hang
Esempio n. 20
0
                    if not source_description['additionalInfo']:
                        source_description['additionalInfo'] = None

                    newsource = Source(name='%s %s: %s' % ('ILOSTAT', sourcedata['category'], variable_name),
                                       description=json.dumps(source_description),
                                       datasetId=Dataset.objects.get(name=file_name_to_category[row['indicator']], namespace='ilostat').pk)

                    newsource.save()

                    if '(' in the_indicator_label and ')' in the_indicator_label:
                        varunit = the_indicator_label[the_indicator_label.index('('):-1].replace('(', '').replace(')','')
                    newvariable = Variable(name=variable_name,
                                           unit=varunit if
                                           varunit else '', short_unit=short_unit_extract(varunit),
                                           description='See concepts and methods provided by ILOSTAT at http://www.ilo.org/ilostat/faces/ilostat-home/metadata',
                                           code=varcode_for_reference,
                                           timespan='',
                                           datasetId=Dataset.objects.get(name=file_name_to_category[row['indicator']], namespace='ilostat'), variableTypeId=VariableType.objects.get(pk=4),
                                           sourceId=newsource)

                    varcode_to_object[varcode_for_reference] = newvariable
                    newvariable.save()

                variables = None

                insert_string = 'INSERT into data_values (value, year, entityId, variableId) VALUES (%s, %s, %s, %s)'  # this is used for constructing the query for mass inserting to the data_values table
                data_values_tuple_list = []
                with open(file.replace('.gz', ''), 'r', encoding='utf8') as f:
                    reader = csv.DictReader(f)
                    for row in reader:  # actually importing the values
                        row_number += 1