new_datasets_list.append(newdataset) else: newdataset = Dataset.objects.get(name=subcategory_name, categoryId=the_category) source_name = 'UNAIDS' if source_name not in source_name_to_object: newsource = Source(name=source_name, description=json.dumps(source_description), datasetId=newdataset.pk) newsource.save() source_name_to_object[source_name] = newsource else: newsource = Source.objects.get(name=source_name, datasetId=newdataset.pk) newsource.description = json.dumps(source_description) newsource.save() source_name_to_object[source_name] = newsource for eachfile in glob.glob(unaids_downloads + '/*.csv'): print("Processing: {}".format(eachfile)) with open(eachfile, mode='rt', encoding='utf-8-sig') as f: reader = csv.DictReader(f) for row in reader: row_number += 1 variable_name = "{} - {}".format(row['Indicator'].strip(), row['Subgroup'].strip()) if variable_name.lower() not in existing_variables_list: newvariable = Variable(
except Dataset.DoesNotExist: newdataset = Dataset.objects.get( name__startswith='Clio-Infra - %s' % the_subcategory.name, namespace='clioinfra') if newdataset not in old_datasets_list: old_datasets_list.append(newdataset) newsource = Variable.objects.get( code=filename_to_pagelink[one_file] [filename_to_pagelink[one_file]. rfind('/') + 1:], fk_dst_id__namespace='clioinfra').sourceId newsource.description = source_template % ( newdataset.name, filename_to_pagelink[one_file], filename_to_pagelink[one_file], ) newsource.datasetId = newdataset.pk newsource.save() newvariable = Variable.objects.get( code=filename_to_pagelink[one_file] [filename_to_pagelink[one_file]. rfind('/') + 1:], fk_dst_id__namespace='clioinfra') newvariable.name = varname newvariable.unit = varunit if varunit else '' newvariable.short_unit = short_unit_extract( varunit) newvariable.description = ''
column_number] = '%s: %s - %s' % ( variant, main_var_name, cell.value) if row_number == 18: if not dataset_saved: newdataset = Dataset.objects.get( name='UN WPP - %s' % dataset_name, namespace='unwpp') newdataset.fk_dst_cat_id = the_category newdataset.fk_dst_subcat_id = the_subcategory newdataset.save() dataset_saved = True newsource = Source.objects.get( name='UN WPP - %s' % dataset_name) newsource.description = source_template % ( dataset_info['description']) newsource.save() if not variables_saved: for columnnum, varname in var_to_add_dict.items( ): if varname in vars_to_add: if '(' not in varname: unit_of_measure = '' else: unit_of_measure = varname[ varname. index('('):varname.index(')') + 1].replace('(', '').replace( ')', '')
vars_ref_models[each] = newvariable for varcode, vardata in qog_vars.items(): if varcode not in vars_to_add: source_name = varcode[:varcode.index('_') + 1] category = qog_vars[varcode]['category'] if source_name in existing_sources: if category in existing_sources[source_name]: if category not in up_to_date_sources: source = existing_sources[source_name][category] source.name = '%s via the Quality of Government dataset' % ( qog_sources[source_name]['name']) source.description = source_template % ( qog_sources[source_name]['description'], qog_sources[source_name]['name'], qog_sources[source_name]['original_dataset'], qog_sources[source_name]['url'], qog_sources[source_name]['url']) source.datasetId = datasets_ref_models[category].pk try: with transaction.atomic(): source.save() except django.db.utils.IntegrityError: source.name = '%s via the Quality of Government dataset' % ( qog_sources[source_name] ['original_dataset']) source.save() logger.info("Updating the source %s." % source.name.encode('utf8')) elif category not in up_to_date_sources[source_name]: source = existing_sources[source_name][category]