Example #1
0
        new_datasets_list.append(newdataset)
    else:
        newdataset = Dataset.objects.get(name=subcategory_name,
                                         categoryId=the_category)

    source_name = 'UNAIDS'
    if source_name not in source_name_to_object:
        newsource = Source(name=source_name,
                           description=json.dumps(source_description),
                           datasetId=newdataset.pk)
        newsource.save()
        source_name_to_object[source_name] = newsource
    else:
        newsource = Source.objects.get(name=source_name,
                                       datasetId=newdataset.pk)
        newsource.description = json.dumps(source_description)
        newsource.save()
        source_name_to_object[source_name] = newsource

    for eachfile in glob.glob(unaids_downloads + '/*.csv'):
        print("Processing: {}".format(eachfile))
        with open(eachfile, mode='rt', encoding='utf-8-sig') as f:
            reader = csv.DictReader(f)

            for row in reader:
                row_number += 1

                variable_name = "{} - {}".format(row['Indicator'].strip(),
                                                 row['Subgroup'].strip())
                if variable_name.lower() not in existing_variables_list:
                    newvariable = Variable(
Example #2
0
                                    except Dataset.DoesNotExist:
                                        newdataset = Dataset.objects.get(
                                            name__startswith='Clio-Infra - %s'
                                            % the_subcategory.name,
                                            namespace='clioinfra')
                                    if newdataset not in old_datasets_list:
                                        old_datasets_list.append(newdataset)

                                newsource = Variable.objects.get(
                                    code=filename_to_pagelink[one_file]
                                    [filename_to_pagelink[one_file].
                                     rfind('/') + 1:],
                                    fk_dst_id__namespace='clioinfra').sourceId
                                newsource.description = source_template % (
                                    newdataset.name,
                                    filename_to_pagelink[one_file],
                                    filename_to_pagelink[one_file],
                                )
                                newsource.datasetId = newdataset.pk
                                newsource.save()

                                newvariable = Variable.objects.get(
                                    code=filename_to_pagelink[one_file]
                                    [filename_to_pagelink[one_file].
                                     rfind('/') + 1:],
                                    fk_dst_id__namespace='clioinfra')
                                newvariable.name = varname
                                newvariable.unit = varunit if varunit else ''
                                newvariable.short_unit = short_unit_extract(
                                    varunit)
                                newvariable.description = ''
Example #3
0
                                        column_number] = '%s: %s - %s' % (
                                            variant, main_var_name, cell.value)

                        if row_number == 18:
                            if not dataset_saved:
                                newdataset = Dataset.objects.get(
                                    name='UN WPP - %s' % dataset_name,
                                    namespace='unwpp')
                                newdataset.fk_dst_cat_id = the_category
                                newdataset.fk_dst_subcat_id = the_subcategory
                                newdataset.save()
                                dataset_saved = True

                                newsource = Source.objects.get(
                                    name='UN WPP - %s' % dataset_name)
                                newsource.description = source_template % (
                                    dataset_info['description'])
                                newsource.save()

                            if not variables_saved:

                                for columnnum, varname in var_to_add_dict.items(
                                ):
                                    if varname in vars_to_add:
                                        if '(' not in varname:
                                            unit_of_measure = ''
                                        else:
                                            unit_of_measure = varname[
                                                varname.
                                                index('('):varname.index(')') +
                                                1].replace('(', '').replace(
                                                    ')', '')
Example #4
0
            vars_ref_models[each] = newvariable

        for varcode, vardata in qog_vars.items():
            if varcode not in vars_to_add:
                source_name = varcode[:varcode.index('_') + 1]
                category = qog_vars[varcode]['category']

                if source_name in existing_sources:
                    if category in existing_sources[source_name]:
                        if category not in up_to_date_sources:
                            source = existing_sources[source_name][category]
                            source.name = '%s via the Quality of Government dataset' % (
                                qog_sources[source_name]['name'])
                            source.description = source_template % (
                                qog_sources[source_name]['description'],
                                qog_sources[source_name]['name'],
                                qog_sources[source_name]['original_dataset'],
                                qog_sources[source_name]['url'],
                                qog_sources[source_name]['url'])
                            source.datasetId = datasets_ref_models[category].pk
                            try:
                                with transaction.atomic():
                                    source.save()
                            except django.db.utils.IntegrityError:
                                source.name = '%s via the Quality of Government dataset' % (
                                    qog_sources[source_name]
                                    ['original_dataset'])
                                source.save()
                            logger.info("Updating the source %s." %
                                        source.name.encode('utf8'))
                        elif category not in up_to_date_sources[source_name]:
                            source = existing_sources[source_name][category]