dataset_name_to_object[row['rei_name']] = newdataset existing_datasets_list.append(newdataset) newsource = Source.objects.get(name=row['rei_name'], datasetId=newdataset.pk) newsource.description = json.dumps(source_description) newsource.save() source_name_to_object[row['rei_name']] = newsource variable_name = '%s - %s - Sex: %s - Age: %s (%s)' % ( row['measure_name'], row['rei_name'], row['sex_name'], row['age_name'], row['metric_name']) variable_code = '%s %s %s %s %s' % ( row['measure_id'], row['rei_id'], row['sex_id'], row['age_id'], row['metric_id']) if variable_name not in existing_variables_list: newvariable = Variable(name=variable_name, unit=row['metric_name'], code=variable_code, fk_dst_id=dataset_name_to_object[row['rei_name']], fk_var_type_id=VariableType.objects.get(pk=4), sourceId=source_name_to_object[row['rei_name']]) newvariable.save() variable_name_to_object[variable_name] = newvariable existing_variables_list.add(newvariable.name) else: if variable_name not in variable_name_to_object: newvariable = Variable.objects.get(name=variable_name, fk_dst_id=dataset_name_to_object[row['rei_name']]) while DataValue.objects.filter(fk_var_id__pk=newvariable.pk).first(): with connection.cursor() as c: # if we don't limit the deleted values, the db might just hang c.execute('DELETE FROM %s WHERE fk_var_id = %s LIMIT 10000;' % (DataValue._meta.db_table, newvariable.pk)) variable_name_to_object[variable_name] = newvariable
if section == 'Hygiene': varname = 'Hygiene' varname += ' - ' + columns[section][ column_number]['name'] + ' - ' + columns[ section][column_number]['type'] percent_varname = varname + ' - Percent' varunit = 'Percent' if percent_varname.lower( ) not in existing_variables_list: newvariable = Variable( name=percent_varname, unit=varunit, code=None, datasetId=newdataset, variableTypeId=VariableType.objects. get(pk=4), sourceId=source_name_to_object[ source_name]) newvariable.save() variable_name_to_object[ percent_varname.lower()] = newvariable existing_variables_list.add( newvariable.name.lower()) else: if percent_varname.lower( ) not in variable_name_to_object: newvariable = Variable.objects.get( name=percent_varname,
the_subcategory.name, namespace='clioinfra') source_description['link'] = filename_to_pagelink[ one_file] newsource = Source( name=varname, description=json.dumps(source_description), datasetId=newdataset.pk) newsource.save() newvariable = Variable( name=varname, unit=varunit if varunit else '', short_unit=short_unit_extract(varunit), description='', code=filename_to_pagelink[one_file] [filename_to_pagelink[one_file].rfind('/') + 1:], timespan='', datasetId=newdataset, variableTypeId=VariableType.objects.get(pk=4), sourceId=newsource) newvariable.save() if row_number == 3 and column_number > 6: try: column_to_year[column_number] = int(cell.value) except ValueError: pass if row_number > 3:
newdataset = Dataset.objects.get(name=row['cause_name'], categoryId=the_category) dataset_name_to_object[row['cause_name']] = newdataset existing_datasets_list.append(newdataset) newsource = Source.objects.get(name=row['cause_name'], datasetId=newdataset.pk) newsource.description = json.dumps(source_description) newsource.save() source_name_to_object[row['cause_name']] = newsource variable_name = '%s - %s - Sex: %s - Age: %s (%s)' % ( row['measure_name'], row['cause_name'], row['sex_name'], row['age_name'], row['metric_name']) variable_code = '%s %s %s %s %s' % (row['measure_id'], row['cause_id'], row['sex_id'], row['age_id'], row['metric_id']) if variable_name not in existing_variables_list: newvariable = Variable(name=variable_name, unit=row['metric_name'], code=variable_code, datasetId=dataset_name_to_object[row['cause_name']], variableTypeId=VariableType.objects.get(pk=4), sourceId=source_name_to_object[row['cause_name']]) newvariable.save() variable_name_to_object[variable_name] = newvariable existing_variables_list.add(newvariable.name) else: if variable_name not in variable_name_to_object: newvariable = Variable.objects.get(name=variable_name, datasetId=dataset_name_to_object[row['cause_name']]) while DataValue.objects.filter(variableId__pk=newvariable.pk).first(): with connection.cursor() as c: # if we don't limit the deleted values, the db might just hang c.execute('DELETE FROM %s WHERE variableId = %s LIMIT 10000;' % (DataValue._meta.db_table, newvariable.pk)) variable_name_to_object[variable_name] = newvariable if row['location_name'] not in c_name_entity_ref:
if not global_cat[indicator_code]['saved']: source_description['additionalInfo'] = "Definitions and characteristics of countries and other territories: " + "https://ourworldindata.org" + reverse("servepovstatscountryinfo") + "\n" source_description['additionalInfo'] += "Limitations and exceptions:\n" + global_cat[indicator_code]['limitations'] + "\n" if global_cat[indicator_code]['limitations'] else '' source_description['additionalInfo'] += "Notes from original source:\n" + global_cat[indicator_code]['sourcenotes'] + "\n" if global_cat[indicator_code]['sourcenotes'] else '' source_description['additionalInfo'] += "General comments:\n" + global_cat[indicator_code]['comments'] + "\n" if global_cat[indicator_code]['comments'] else '' source_description['additionalInfo'] += "Statistical concept and methodology:\n" + global_cat[indicator_code]['concept'] + "\n" if global_cat[indicator_code]['concept'] else '' source_description['additionalInfo'] += "Related source links:\n" + global_cat[indicator_code]['sourcelinks'] + "\n" if global_cat[indicator_code]['sourcelinks'] else '' source_description['additionalInfo'] += "Other web links:\n" + global_cat[indicator_code]['weblinks'] + "\n" if global_cat[indicator_code]['weblinks'] else '' source_description['dataPublisherSource'] = global_cat[indicator_code]['source'] newsource = Source(name='World Bank Poverty and Equity database: ' + global_cat[indicator_code]['name'], description=json.dumps(source_description), datasetId=newdataset.pk) newsource.save() logger.info("Inserting a source %s." % newsource.name.encode('utf8')) s_unit = short_unit_extract(global_cat[indicator_code]['unitofmeasure']) newvariable = Variable(name=global_cat[indicator_code]['name'], unit=global_cat[indicator_code]['unitofmeasure'] if global_cat[indicator_code]['unitofmeasure'] else '', short_unit=s_unit, description=global_cat[indicator_code]['description'], code=indicator_code, timespan='', datasetId=newdataset, variableTypeId=VariableType.objects.get(pk=4), sourceId=newsource) newvariable.save() logger.info("Inserting a variable %s." % newvariable.name.encode('utf8')) global_cat[indicator_code]['variable_object'] = newvariable global_cat[indicator_code]['saved'] = True else: newvariable = global_cat[indicator_code]['variable_object'] for i in range(0, len(data_values)): data_values_tuple_list.append((data_values[i]['value'], data_values[i]['year'], country_name_entity_ref[country_code].pk, newvariable.pk)) if len(data_values_tuple_list) > 3000: # insert when the length of the list goes over 3000 with connection.cursor() as c: c.executemany(insert_string, data_values_tuple_list) logger.info("Dumping data values...") data_values_tuple_list = [] column_number = 0
with open(file, 'r', encoding='utf8') as f: print('Processing: %s' % file) reader = csv.DictReader(f) for row in reader: row_number += 1 variable_name = '%s - %s' % (row['ihme_indicator_description'], row['estimate_type']) if row.get('unscaled_value'): variable_name += ' - Unscaled' else: variable_name += ' - Scaled' if variable_name not in existing_variables_list: newvariable = Variable(name=variable_name, unit=row['indicator_unit'], code=None, datasetId=newdataset, variableTypeId=VariableType.objects.get(pk=4), sourceId=newsource) newvariable.save() variable_name_to_object[variable_name] = newvariable existing_variables_list.add(newvariable.name) else: if variable_name not in variable_name_to_object: newvariable = Variable.objects.get(name=variable_name, datasetId=newdataset) while DataValue.objects.filter(variableId__pk=newvariable.pk).first(): with connection.cursor() as c: # if we don't limit the deleted values, the db might just hang c.execute('DELETE FROM %s WHERE variableId = %s LIMIT 10000;' % (DataValue._meta.db_table, newvariable.pk)) variable_name_to_object[variable_name] = newvariable if row['location_name'] not in c_name_entity_ref:
def process_one_row(year, value, countryname, variablecode, variablename, existing_fao_variables_dict, unit, source, dataset, var_desc, data_values_tuple_list): global unique_data_tracker global processed_values processed_values += 1 if processed_values % 300 == 0: time.sleep( 0.001 ) # this is done in order to not keep the CPU busy all the time insert_string = 'INSERT into data_values (value, year, fk_ent_id, fk_var_id) VALUES (%s, %s, %s, %s)' # this is used for constructing the query for mass inserting to the data_values table if year is not False and value is not False: if tuple([countryname, variablecode]) not in unique_data_tracker: if countryname not in country_name_entity_ref: if countryname.lower() in existing_entities_list: newentity = Entity.objects.get(name=countryname) elif country_tool_names_dict.get( unidecode.unidecode(countryname.lower()), 0): newentity = Entity.objects.get( name=country_tool_names_dict[unidecode.unidecode( countryname.lower())].owid_name) else: newentity = Entity(name=countryname, validated=False) newentity.save() country_name_entity_ref[countryname] = newentity if variablename not in existing_fao_variables_dict: s_unit = short_unit_extract(unit) newvariable = Variable( name=variablename, unit=unit if unit else '', short_unit=s_unit, description=var_desc, code=variablecode, timespan='', fk_dst_id=dataset, fk_var_type_id=VariableType.objects.get(pk=4), sourceId=source) try: with transaction.atomic(): newvariable.save() except django.db.utils.IntegrityError: newvariable = Variable( name=variablename, unit=unit if unit else '', short_unit=s_unit, description=var_desc, code=None, timespan='', fk_dst_id=dataset, fk_var_type_id=VariableType.objects.get(pk=4), sourceId=source) newvariable.save() existing_fao_variables_dict[variablename] = newvariable data_values_tuple_list.append( (str(value), int(year), country_name_entity_ref[countryname].pk, existing_fao_variables_dict[variablename].pk)) if len( data_values_tuple_list ) > 3000: # insert when the length of the list goes over 3000 with connection.cursor() as c: c.executemany(insert_string, data_values_tuple_list) del data_values_tuple_list[:]
datasetId=datasets_ref_models[vardata['category']].pk) try: with transaction.atomic(): newsource.save() except django.db.utils.IntegrityError: newsource.name = '%s via the Quality of Government dataset' % ( qog_sources[source_name]['original_dataset']) newsource.save() logger.info("Inserting a source %s." % newsource.name.encode('utf8')) saved_sources[source_name] = {vardata['category']: newsource} newvariable = Variable( name='%s - %s' % (vardata['name'], varcode), unit='', description=vardata['description'], code=varcode, timespan=vardata['timespan'], fk_dst_id=datasets_ref_models[vardata['category']], fk_var_type_id=VariableType.objects.get(pk=4), sourceId=newsource) newvariable.save() logger.info("Inserting a variable %s." % newvariable.name.encode('utf8')) vars_ref_models[varcode] = newvariable insert_string = 'INSERT into data_values (value, year, entityId, fk_var_id) VALUES (%s, %s, %s, %s)' # this is used for constructing the query for mass inserting to the data_values table data_values_tuple_list = [] # now saving the data values with open(qog_downloads_save_location + 'qog.csv') as csvfile: reader = csv.DictReader(csvfile)
datasetId=dataset_name_to_object[subcategory_name].pk) newsource.save() source_name_to_object[varname] = newsource else: newsource = Source.objects.get( name=varname, datasetId=dataset_name_to_object[subcategory_name].pk) newsource.description = json.dumps(source_description) newsource.save() source_name_to_object[varname] = newsource if varname not in existing_variables_list: newvariable = Variable( name=varname, unit=varunit, fk_dst_id=dataset_name_to_object[subcategory_name], fk_var_type_id=VariableType.objects.get(pk=4), sourceId=source_name_to_object[varname]) newvariable.save() variable_name_to_object[varname] = newvariable existing_variables_list.add(newvariable.name) else: if varname not in variable_name_to_object: newvariable = Variable.objects.get( name=varname, fk_dst_id=dataset_name_to_object[subcategory_name]) while DataValue.objects.filter( fk_var_id__pk=newvariable.pk).first(): with connection.cursor( ) as c: # if we don't limit the deleted values, the db might just hang c.execute(
if not global_cat[indicator_code]['saved']: source_description['additionalInfo'] = "Definitions and characteristics of countries and other territories: " + "https://ourworldindata.org" + reverse("servehnpstatscountryinfo") + "\n" source_description['additionalInfo'] += "Limitations and exceptions:\n" + global_cat[indicator_code]['limitations'] + "\n" if global_cat[indicator_code]['limitations'] else '' source_description['additionalInfo'] += "Notes from original source:\n" + global_cat[indicator_code]['sourcenotes'] + "\n" if global_cat[indicator_code]['sourcenotes'] else '' source_description['additionalInfo'] += "General comments:\n" + global_cat[indicator_code]['comments'] + "\n" if global_cat[indicator_code]['comments'] else '' source_description['additionalInfo'] += "Statistical concept and methodology:\n" + global_cat[indicator_code]['concept'] + "\n" if global_cat[indicator_code]['concept'] else '' source_description['additionalInfo'] += "Related source links:\n" + global_cat[indicator_code]['sourcelinks'] + "\n" if global_cat[indicator_code]['sourcelinks'] else '' source_description['additionalInfo'] += "Other web links:\n" + global_cat[indicator_code]['weblinks'] + "\n" if global_cat[indicator_code]['weblinks'] else '' source_description['dataPublisherSource'] = global_cat[indicator_code]['source'] newsource = Source(name='World Bank Health Nutrition and Population Statistics: ' + global_cat[indicator_code]['name'], description=json.dumps(source_description), datasetId=newdataset.pk) newsource.save() logger.info("Inserting a source %s." % newsource.name.encode('utf8')) s_unit = short_unit_extract(global_cat[indicator_code]['unitofmeasure']) newvariable = Variable(name=global_cat[indicator_code]['name'], unit=global_cat[indicator_code]['unitofmeasure'] if global_cat[indicator_code]['unitofmeasure'] else '', short_unit=s_unit, description=global_cat[indicator_code]['description'], code=indicator_code, timespan='1960-' + str(last_available_year), fk_dst_id=newdataset, fk_var_type_id=VariableType.objects.get(pk=4), sourceId=newsource) newvariable.save() logger.info("Inserting a variable %s." % newvariable.name.encode('utf8')) global_cat[indicator_code]['variable_object'] = newvariable global_cat[indicator_code]['saved'] = True else: newvariable = global_cat[indicator_code]['variable_object'] for i in range(0, len(data_values)): data_values_tuple_list.append((data_values[i]['value'], data_values[i]['year'], country_name_entity_ref[country_code].pk, newvariable.pk)) if len(data_values_tuple_list) > 3000: # insert when the length of the list goes over 3000 with connection.cursor() as c: c.executemany(insert_string, data_values_tuple_list) logger.info("Dumping data values...") data_values_tuple_list = [] column_number = 0
) elif row[ each] == 'Proportion of teachers in upper secondary education who have received at least the minimum organized teacher training (e.g. pedagogical training) pre-service or in-service required for teaching at the relevant level in a given country': per_row_var.append( 'Proportion of teachers in upper secondary education who have received pedagogical training required for teaching at the relevant level in a given country' ) else: per_row_var.append(row[each]) variable_name = ' - '.join(per_row_var) if variable_name not in existing_variables_list: newvariable = Variable( name=variable_name, unit=row['Unit'], datasetId=dataset_name_to_object[subcategory_name], variableTypeId=VariableType.objects.get(pk=4), sourceId=source_name_to_object[subcategory_name]) newvariable.save() variable_name_to_object[variable_name] = newvariable existing_variables_list.add(newvariable.name) else: if variable_name not in variable_name_to_object: newvariable = Variable.objects.get( name=variable_name, datasetId=dataset_name_to_object[subcategory_name]) while DataValue.objects.filter( variableId__pk=newvariable.pk).first(): with connection.cursor( ) as c: # if we don't limit the deleted values, the db might just hang c.execute(
unit_of_measure = global_cat[ indicator_code][ 'unitofmeasure'] else: unit_of_measure = '' else: unit_of_measure = '' s_unit = short_unit_extract( unit_of_measure) newvariable = Variable( name=global_cat[indicator_code] ['name'], unit=unit_of_measure, short_unit=s_unit, description=global_cat[ indicator_code]['description'], code=indicator_code, timespan='', fk_dst_id=newdataset, fk_var_type_id=VariableType. objects.get(pk=4), sourceId=newsource) newvariable.save() logger.info( "Inserting a variable %s." % newvariable.name.encode('utf8')) global_cat[indicator_code][ 'variable_object'] = newvariable global_cat[indicator_code][ 'saved'] = True else:
variable_name += ' - ' + row['Country - distribution'][row['Country - distribution'].rfind('-'):] variable_name += ' - ' + file_name variable_code = None if 'Unit' in reader.fieldnames: if row['Unit']: varunit = row['Unit'] else: varunit = '' else: varunit = '' if variable_name.lower() not in existing_variables_list: newvariable = Variable(name=variable_name, unit=varunit, code=variable_code, datasetId=newdataset, variableTypeId=VariableType.objects.get(pk=4), sourceId=source_name_to_object[source_name]) newvariable.save() variable_name_to_object[variable_name.lower()] = newvariable existing_variables_list.add(newvariable.name.lower()) else: if variable_name.lower() not in variable_name_to_object: newvariable = Variable.objects.get(name=variable_name, datasetId=newdataset) while DataValue.objects.filter(variableId__pk=newvariable.pk).first(): with connection.cursor() as c: # if we don't limit the deleted values, the db might just hang c.execute('DELETE FROM %s WHERE variableId = %s LIMIT 10000;' % (DataValue._meta.db_table, newvariable.pk)) variable_name_to_object[variable_name.lower()] = newvariable
newsource = Source( name=varname, description=source_template % ( newdataset.name, filename_to_pagelink[one_file], filename_to_pagelink[one_file], ), datasetId=newdataset.pk) newsource.save() newvariable = Variable( name=varname, unit=varunit if varunit else '', short_unit=short_unit_extract(varunit), description='', code=None, timespan='', fk_dst_id=newdataset, fk_var_type_id=VariableType.objects.get(pk=4), sourceId=newsource) newvariable.save() if row_number == 3 and column_number > 6: try: column_to_year[column_number] = int(cell.value) except ValueError: pass if row_number > 3: if column_number == 4 and cell.value is not None:
source_description['additionalInfo'] += "General comments:\n" + global_cat[indicator_code]['comments'] + "\n" if global_cat[indicator_code]['comments'] else '' source_description['additionalInfo'] += "Statistical concept and methodology:\n" + global_cat[indicator_code]['concept'] if global_cat[indicator_code]['concept'] else '' source_description['additionalInfo'] += "Related source links:\n" + global_cat[indicator_code]['sourcelinks'] + "\n" if global_cat[indicator_code]['sourcelinks'] else '' source_description['additionalInfo'] += "Other web links:\n" + global_cat[indicator_code]['weblinks'] + "\n" if global_cat[indicator_code]['weblinks'] else '' source_description['dataPublisherSource'] = global_cat[indicator_code]['source'] if 'iea.org' in json.dumps(source_description).lower() or 'iea stat' in json.dumps(source_description).lower() or 'iea 2014' in json.dumps(source_description).lower(): source_description['dataPublishedBy'] = 'International Energy Agency (IEA) via The World Bank' else: source_description['dataPublishedBy'] = 'World Bank – World Development Indicators' newsource = Source(name='World Bank – WDI: ' + global_cat[indicator_code]['name'], description=json.dumps(source_description), datasetId=newdataset) newsource.save() logger.info("Inserting a source %s." % newsource.name.encode('utf8')) s_unit = extract_short_unit(global_cat[indicator_code]['unitofmeasure']) newvariable = Variable(name=global_cat[indicator_code]['name'], unit=global_cat[indicator_code]['unitofmeasure'] if global_cat[indicator_code]['unitofmeasure'] else '', short_unit=s_unit, description=global_cat[indicator_code]['description'], code=indicator_code, timespan='1960-' + str(last_available_year), datasetId=newdataset, sourceId=newsource) # rewrite removed VariableType newvariable.save() logger.info("Inserting a variable %s." % newvariable.name.encode('utf8')) global_cat[indicator_code]['variable_object'] = newvariable global_cat[indicator_code]['saved'] = True else: newvariable = global_cat[indicator_code]['variable_object'] for i in range(0, len(data_values)): data_values_tuple_list.append((data_values[i]['value'], data_values[i]['year'], country_name_entity_ref[country_code].pk, newvariable.pk)) if len(data_values_tuple_list) > 3000: # insert when the length of the list goes over 3000 with connection.cursor() as c: c.executemany(insert_string, data_values_tuple_list) logger.info("Dumping data values...") data_values_tuple_list = [] column_number = 0
variable_code = None if 'Unit' in reader.fieldnames: if row['Unit']: varunit = row['Unit'] else: varunit = '' else: varunit = '' if variable_name.lower( ) not in existing_variables_list: newvariable = Variable( name=variable_name, unit=varunit, code=variable_code, fk_dst_id=dataset_name_to_object[ metadata_dict[file_name]['category']], fk_var_type_id=VariableType.objects.get(pk=4), sourceId=source_name_to_object[source_name]) newvariable.save() variable_name_to_object[ variable_name.lower()] = newvariable existing_variables_list.add( newvariable.name.lower()) else: if variable_name.lower( ) not in variable_name_to_object: newvariable = Variable.objects.get( name=variable_name, fk_dst_id=dataset_name_to_object[
description=json.dumps(source_description), datasetId=newdataset.pk) newsource.save() if not variables_saved: for columnnum, varname in var_to_add_dict.items(): if '(' not in varname: unit_of_measure = '' else: unit_of_measure = varname[varname.index('('):varname.index(')') + 1].replace('(', '').replace(')','') s_unit = short_unit_extract(unit_of_measure) newvariable = Variable(name=varname, unit=unit_of_measure, short_unit=s_unit, description='', code=None, timespan=timespan, fk_dst_id=newdataset, fk_var_type_id=VariableType.objects.get(pk=4), sourceId=newsource) newvariable.save() column_var_dict[columnnum] = newvariable variables_saved = True if row_number > 17: if column_number == 5: country_code = cell.value if column_number == 6: year = cell.value
# discarding values for subnational regions if row.get('Subnational region'): continue thevarname = [] for key in columns_to_process: if row[key]: thevarname.append("{}:{}".format(key, row[key])) variable_name = ' - '.join(thevarname) if variable_name.lower() not in existing_variables_list: newvariable = Variable( name=variable_name, unit='', datasetId=dataset_name_to_object[subcategory_name], variableTypeId=VariableType.objects.get(pk=4), sourceId=source_name_to_object[ source_name.lower()]) newvariable.save() variable_name_to_object[ variable_name.lower()] = newvariable existing_variables_list.add(newvariable.name.lower()) else: if variable_name.lower( ) not in variable_name_to_object: newvariable = Variable.objects.get( name=variable_name, datasetId=dataset_name_to_object[ subcategory_name]) newvariable.sourceId = source_name_to_object[
for eachfile in glob.glob(unaids_downloads + '/*.csv'): print("Processing: {}".format(eachfile)) with open(eachfile, mode='rt', encoding='utf-8-sig') as f: reader = csv.DictReader(f) for row in reader: row_number += 1 variable_name = "{} - {}".format(row['Indicator'].strip(), row['Subgroup'].strip()) if variable_name.lower() not in existing_variables_list: newvariable = Variable( name=variable_name, unit=row['Unit'], code=None, datasetId=newdataset, variableTypeId=VariableType.objects.get(pk=4), sourceId=source_name_to_object[source_name]) newvariable.save() variable_name_to_object[ variable_name.lower()] = newvariable existing_variables_list.add(newvariable.name.lower()) else: if variable_name.lower() not in variable_name_to_object: newvariable = Variable.objects.get( name=variable_name, datasetId=newdataset) while DataValue.objects.filter( variableId__pk=newvariable.pk).first(): with connection.cursor( ) as c: # if we don't limit the deleted values, the db might just hang
if not source_description['additionalInfo']: source_description['additionalInfo'] = None newsource = Source(name='%s %s: %s' % ('ILOSTAT', sourcedata['category'], variable_name), description=json.dumps(source_description), datasetId=Dataset.objects.get(name=file_name_to_category[row['indicator']], namespace='ilostat').pk) newsource.save() if '(' in the_indicator_label and ')' in the_indicator_label: varunit = the_indicator_label[the_indicator_label.index('('):-1].replace('(', '').replace(')','') newvariable = Variable(name=variable_name, unit=varunit if varunit else '', short_unit=short_unit_extract(varunit), description='See concepts and methods provided by ILOSTAT at http://www.ilo.org/ilostat/faces/ilostat-home/metadata', code=varcode_for_reference, timespan='', datasetId=Dataset.objects.get(name=file_name_to_category[row['indicator']], namespace='ilostat'), variableTypeId=VariableType.objects.get(pk=4), sourceId=newsource) varcode_to_object[varcode_for_reference] = newvariable newvariable.save() variables = None insert_string = 'INSERT into data_values (value, year, entityId, variableId) VALUES (%s, %s, %s, %s)' # this is used for constructing the query for mass inserting to the data_values table data_values_tuple_list = [] with open(file.replace('.gz', ''), 'r', encoding='utf8') as f: reader = csv.DictReader(f) for row in reader: # actually importing the values row_number += 1