except ValueError:
                    pass

                if len(
                        data_values_tuple_list
                ) > 3000:  # insert when the length of the list goes over 3000

                    with connection.cursor() as c:
                        c.executemany(insert_string, data_values_tuple_list)
                    data_values_tuple_list = []

    if len(data_values_tuple_list):  # insert any leftover data_values
        with connection.cursor() as c:
            c.executemany(insert_string, data_values_tuple_list)
        data_values_tuple_list = []

# for dataset in existing_datasets_list:
#     write_dataset_csv(dataset.pk, dataset.name, dataset.name, 'gbd_cause_fetcher', '')
# for dataset in new_datasets_list:
#     write_dataset_csv(dataset.pk, dataset.name, None, 'gbd_cause_fetcher', '')

newimport = ImportHistory(
    import_type='unaids',
    import_time=timezone.now().strftime('%Y-%m-%d %H:%M:%S'),
    import_notes='A unaids import was performed',
    import_state='There are a total of %s unaids variables after the import' %
    Variable.objects.filter(datasetId__namespace='unaids').count())
newimport.save()

print("--- %s seconds ---" % (time.time() - start_time))
Exemple #2
0
                                        with connection.cursor() as c:
                                            c.executemany(insert_string, data_values_tuple_list)
                                        logger.info("Dumping data values...")
                                        data_values_tuple_list = []

                column_number = 0
                if row_number % 10 == 0:
                    time.sleep(0.001)  # this is done in order to not keep the CPU busy all the time, the delay after each 10th row is 1 millisecond

        if len(data_values_tuple_list):  # insert any leftover data_values
            with connection.cursor() as c:
                c.executemany(insert_string, data_values_tuple_list)
            logger.info("Dumping data values...")

        newimport = ImportHistory(import_type=DATASET_NAMESPACE, import_time=timezone.now().strftime('%Y-%m-%d %H:%M:%S'),
                                  import_notes='Initial import of WDI',
                                  import_state=json.dumps({'file_hash': file_checksum(WDI_DOWNLOADS_PATH + 'wdi.zip')}))
        newimport.save()
        # for dataset in datasets_list:
            # write_dataset_csv(dataset.pk, dataset.name, None, 'wdi_fetcher', '')
        logger.info("Import complete.")

    else:
        last_import = import_history.last()

        if json.loads(last_import.import_state)['file_hash'] == file_checksum(WDI_DOWNLOADS_PATH + 'wdi.zip'):
            logger.info('No updates available.')
            sys.exit(0)

        logger.info('New data is available.')
Exemple #3
0
                    column_number = 0

                    if len(data_values_tuple_list) > 3000:  # insert when the length of the list goes over 3000
                        with connection.cursor() as c:
                            c.executemany(insert_string, data_values_tuple_list)
                        data_values_tuple_list = []

                if len(data_values_tuple_list):  # insert any leftover data_values
                    with connection.cursor() as c:
                        c.executemany(insert_string, data_values_tuple_list)

        newimport = ImportHistory(import_type='unwpp',
                                      import_time=timezone.now().strftime('%Y-%m-%d %H:%M:%S'),
                                      import_notes='Importing file %s' % file_to_parse,
                                      import_state=json.dumps(
                                      {'file_hash': file_checksum(os.path.join(wpp_downloads_save_location, file_to_parse)),
                                       'file_name': file_to_parse
                                       }))
        newimport.save()
        write_dataset_csv(newdataset.pk, newdataset.name, None, 'unwpp_fetcher', '')
    else:

        if imported_before_hash == file_checksum(os.path.join(wpp_downloads_save_location, file_to_parse)):
            sys.exit('No updates available.')

        country_name_entity_ref = process_entities(country_names_dict)

        existing_categories = DatasetCategory.objects.values('name')
        existing_categories_list = {item['name'] for item in existing_categories}
                    column_number = 0

                data_values_tuple_list = []
                for country, data_value in data_values_dict.items():
                    for year, value in data_value.items():
                        data_values_tuple_list.append(
                            (value, year, country, newvariable.pk))

                with connection.cursor() as c:
                    c.executemany(insert_string, data_values_tuple_list)

                newimport = ImportHistory(
                    import_type='clioinfra',
                    import_time=timezone.now().strftime('%Y-%m-%d %H:%M:%S'),
                    import_notes='Importing file %s' % one_file,
                    import_state=json.dumps({
                        'file_hash': file_checksum(file),
                        'file_name': one_file
                    }))
                newimport.save()

        else:
            if imported_before_hash == file_checksum(file):
                print('No updates available for file %s.' % one_file)
            else:
                if 'historical' in one_file.lower():
                    print('Processing: %s' % one_file)
                    wb = load_workbook(file, read_only=True)

                    data_ws = wb['Data']
                                                   variable_name_to_object[variable_name].pk))

                    if len(data_values_tuple_list) > 3000:  # insert when the length of the list goes over 3000

                        with connection.cursor() as c:
                            c.executemany(insert_string, data_values_tuple_list)
                        data_values_tuple_list = []

                if row_number % 100 == 0:
                    time.sleep(0.001)  # this is done in order to not keep the CPU busy all the time, the delay after each 100th row is 1 millisecond

        if len(data_values_tuple_list):  # insert any leftover data_values
            with connection.cursor() as c:
                c.executemany(insert_string, data_values_tuple_list)
            data_values_tuple_list = []

        os.remove(csv_filename)

# for dataset in existing_datasets_list:
#     write_dataset_csv(dataset.pk, dataset.name, dataset.name, 'gbd_risk_fetcher', '')
# for dataset in new_datasets_list:
#     write_dataset_csv(dataset.pk, dataset.name, None, 'gbd_risk_fetcher', '')

newimport = ImportHistory(import_type='gbd_risk', import_time=timezone.now().strftime('%Y-%m-%d %H:%M:%S'),
                          import_notes='A gbd import was performed',
                          import_state='There are a total of %s gbd_risk variables after the import' % Variable.objects.filter(
                              fk_dst_id__namespace='gbd_risk').count())
newimport.save()

print("--- %s seconds ---" % (time.time() - start_time))
                                        with connection.cursor() as c:
                                            c.executemany(insert_string, data_values_tuple_list)
                                        logger.info("Dumping data values...")
                                        data_values_tuple_list = []

                column_number = 0
                if row_number % 10 == 0:
                    time.sleep(0.001)  # this is done in order to not keep the CPU busy all the time, the delay after each 10th row is 1 millisecond

        if len(data_values_tuple_list):  # insert any leftover data_values
            with connection.cursor() as c:
                c.executemany(insert_string, data_values_tuple_list)
            logger.info("Dumping data values...")

        newimport = ImportHistory(import_type='povstats', import_time=timezone.now().strftime('%Y-%m-%d %H:%M:%S'),
                                  import_notes='Initial import of POVSTATS datasets',
                                  import_state=json.dumps({'file_hash': file_checksum(povstats_downloads_save_location + 'povstats.zip')}))
        newimport.save()
        for dataset in datasets_list:
            write_dataset_csv(dataset.pk, dataset.name, None, 'povstats_fetcher', '')
        logger.info("Import complete.")

    else:
        last_import = import_history.last()
        deleted_indicators = {}  # This is used to keep track which variables' data values were already deleted before writing new values

        if json.loads(last_import.import_state)['file_hash'] == file_checksum(povstats_downloads_save_location + 'povstats.zip'):
            logger.info('No updates available.')
            sys.exit('No updates available.')

        logger.info('New data is available.')
                    data_values_tuple_list.append((str(float(row['val'])*100) if row['metric_name'] == 'Percent' else row['val'], int(row['year']),
                                                   c_name_entity_ref[row['location_name']].pk, variable_name_to_object[variable_name].pk))

                    if len(data_values_tuple_list) > 3000:  # insert when the length of the list goes over 3000

                        with connection.cursor() as c:
                            c.executemany(insert_string, data_values_tuple_list)
                        data_values_tuple_list = []

                if row_number % 100 == 0:
                    time.sleep(0.001)  # this is done in order to not keep the CPU busy all the time, the delay after each 100th row is 1 millisecond

        if len(data_values_tuple_list):  # insert any leftover data_values
            with connection.cursor() as c:
                c.executemany(insert_string, data_values_tuple_list)
            data_values_tuple_list = []

        os.remove(csv_filename)

# for dataset in existing_datasets_list:
#     write_dataset_csv(dataset.pk, dataset.name, dataset.name, 'gbd_prevalence_by_gender', '')
# for dataset in new_datasets_list:
#     write_dataset_csv(dataset.pk, dataset.name, None, 'gbd_prevalence_by_gender', '')

newimport = ImportHistory(import_type='gbd_prevalence_by_gender', import_time=timezone.now().strftime('%Y-%m-%d %H:%M:%S'),
                                  import_notes='A gbd import was performed',
                                  import_state='There are a total of %s gbd_prevalence_by_gender variables after the import' % Variable.objects.filter(datasetId__namespace='gbd_prevalence_by_gender').count())
newimport.save()

print("--- %s seconds ---" % (time.time() - start_time))
                    time.sleep(
                        0.001
                    )  # this is done in order to not keep the CPU busy all the time, the delay after each 10th row is 1 millisecond

        if len(data_values_tuple_list):  # insert any leftover data_values
            with connection.cursor() as dbconnection:
                dbconnection.executemany(insert_string, data_values_tuple_list)
                logger.info("Dumping data values...")

        logger.info("Imported a total of %s data values." % total_data_values)

        newimport = ImportHistory(
            import_type='qog',
            import_time=timezone.now().strftime('%Y-%m-%d %H:%M:%S'),
            import_notes='Initial import of QoG data. %s data values imported.'
            % total_data_values,
            import_state=json.dumps({
                'file_hash':
                file_checksum(qog_downloads_save_location + 'qog.csv')
            }))
        newimport.save()
        # now exporting csvs to the repo
        for category, dataset in datasets_ref_models.items():
            write_dataset_csv(dataset.pk, dataset.name, None, 'qog_fetcher',
                              '')

        logger.info("Import complete.")
    else:
        logger.info("Importing the QoG dataset.")
        last_import = import_history.last()
Exemple #9
0
                                        data_values_tuple_list = []

                column_number = 0
                if row_number % 10 == 0:
                    time.sleep(
                        0.001
                    )  # this is done in order to not keep the CPU busy all the time, the delay after each 10th row is 1 millisecond

        if len(data_values_tuple_list):  # insert any leftover data_values
            with connection.cursor() as c:
                c.executemany(insert_string, data_values_tuple_list)
            logger.info("Dumping data values...")

        newimport = ImportHistory(
            import_type='climatech',
            import_time=timezone.now().strftime('%Y-%m-%d %H:%M:%S'),
            import_notes='Initial import of climatech datasets',
            import_state=json.dumps(
                {'file_hash': file_checksum(excel_filename)}))
        newimport.save()
        for dataset in datasets_list:
            write_dataset_csv(dataset.pk, dataset.name, None,
                              'climatech_fetcher', '')
        logger.info("Import complete.")

    else:
        last_import = import_history.last()
        deleted_indicators = {
        }  # This is used to keep track which variables' data values were already deleted before writing new values

        if json.loads(last_import.import_state)['file_hash'] == file_checksum(
                excel_filename):
                for oneimport in import_history:
                    if json.loads(oneimport.import_state
                                  )['file_name'] == os.path.basename(eachfile):
                        file_imported_before = True
                        imported_before_hash = json.loads(
                            oneimport.import_state)['file_hash']
                if not file_imported_before:
                    process_csv_file_insert("/tmp/%s" % csv_filename,
                                            os.path.basename(eachfile))
                    os.remove("/tmp/%s" % csv_filename)
                    newimport = ImportHistory(
                        import_type='faostat',
                        import_time=timezone.now().strftime(
                            '%Y-%m-%d %H:%M:%S'),
                        import_notes='Importing file %s' %
                        os.path.basename(eachfile),
                        import_state=json.dumps({
                            'file_hash':
                            file_checksum(eachfile),
                            'file_name':
                            os.path.basename(eachfile)
                        }))
                    newimport.save()
                else:
                    if imported_before_hash == file_checksum(eachfile):
                        print('No updates available for file %s.' %
                              os.path.basename(eachfile))

    for eachfile in glob.glob(all_dataset_files_dir + "/*.csv"):
        if os.path.basename(eachfile) not in files_to_exclude:
            file_imported_before = False
            for oneimport in import_history: