Example #1
0
def process_entities(country_names_dictionary):
    existing_entities = Entity.objects.values('name')
    existing_entities_list = {
        item['name'].lower()
        for item in existing_entities
    }

    country_tool_names = CountryName.objects.all()
    country_tool_names_dict = {}

    for each_country in country_tool_names:
        country_tool_names_dict[
            each_country.country_name.lower()] = each_country.owid_country

    c_name_entity_ref = {
    }  # this dict will hold the country names from excel and the appropriate entity object (this is used when saving the variables and their values)

    for c_code, country_name in country_names_dictionary.items():
        if country_tool_names_dict.get(
                unidecode.unidecode(country_name.lower()), 0):
            newentity = Entity.objects.get(name=country_tool_names_dict[
                unidecode.unidecode(country_name.lower())].owid_name)
        elif country_name.lower() in existing_entities_list:
            newentity = Entity.objects.get(name__iexact=country_name)
        else:
            newentity = Entity(name=country_name, validated=False)
            newentity.save()
        c_name_entity_ref[c_code] = newentity

    return c_name_entity_ref
Example #2
0
 if country_col not in c_name_entity_ref:
     if country_col == 'All countries':
         newentity = Entity.objects.get(name='World')
     elif country_col == 'Côte d\'Ivoire':
         newentity = Entity.objects.get(name='Cote d\'Ivoire')
     elif country_tool_names_dict.get(
             unidecode.unidecode(country_col.lower()), 0):
         newentity = Entity.objects.get(
             name=country_tool_names_dict[unidecode.unidecode(
                 country_col.lower())].owid_name)
     elif country_col.lower() in existing_entities_list:
         newentity = Entity.objects.get(
             name__iexact=country_col)
     else:
         newentity = Entity(name=country_col, validated=False)
         newentity.save()
     c_name_entity_ref[country_col] = newentity
 try:
     if (int(row['Time Period']),
             c_name_entity_ref[country_col].pk,
             variable_name_to_object[variable_name.lower()].pk
         ) not in duplicate_tracker:
         data_values_tuple_list.append((
             str(float(row['Data Value'])),
             int(row['Time Period']),
             c_name_entity_ref[country_col].pk,
             variable_name_to_object[variable_name.lower()].pk))
         duplicate_tracker.add((
             int(row['Time Period']),
             c_name_entity_ref[country_col].pk,
             variable_name_to_object[variable_name.lower()].pk))
Example #3
0
def process_one_row(year, value, countryname, variablecode, variablename,
                    existing_fao_variables_dict, unit, source, dataset,
                    var_desc, data_values_tuple_list):

    global unique_data_tracker
    global processed_values

    processed_values += 1
    if processed_values % 300 == 0:
        time.sleep(
            0.001
        )  # this is done in order to not keep the CPU busy all the time

    insert_string = 'INSERT into data_values (value, year, fk_ent_id, fk_var_id) VALUES (%s, %s, %s, %s)'  # this is used for constructing the query for mass inserting to the data_values table

    if year is not False and value is not False:
        if tuple([countryname, variablecode]) not in unique_data_tracker:
            if countryname not in country_name_entity_ref:
                if countryname.lower() in existing_entities_list:
                    newentity = Entity.objects.get(name=countryname)
                elif country_tool_names_dict.get(
                        unidecode.unidecode(countryname.lower()), 0):
                    newentity = Entity.objects.get(
                        name=country_tool_names_dict[unidecode.unidecode(
                            countryname.lower())].owid_name)
                else:
                    newentity = Entity(name=countryname, validated=False)
                    newentity.save()
                country_name_entity_ref[countryname] = newentity

            if variablename not in existing_fao_variables_dict:
                s_unit = short_unit_extract(unit)
                newvariable = Variable(
                    name=variablename,
                    unit=unit if unit else '',
                    short_unit=s_unit,
                    description=var_desc,
                    code=variablecode,
                    timespan='',
                    fk_dst_id=dataset,
                    fk_var_type_id=VariableType.objects.get(pk=4),
                    sourceId=source)
                try:
                    with transaction.atomic():
                        newvariable.save()
                except django.db.utils.IntegrityError:
                    newvariable = Variable(
                        name=variablename,
                        unit=unit if unit else '',
                        short_unit=s_unit,
                        description=var_desc,
                        code=None,
                        timespan='',
                        fk_dst_id=dataset,
                        fk_var_type_id=VariableType.objects.get(pk=4),
                        sourceId=source)
                    newvariable.save()
                existing_fao_variables_dict[variablename] = newvariable
            data_values_tuple_list.append(
                (str(value), int(year),
                 country_name_entity_ref[countryname].pk,
                 existing_fao_variables_dict[variablename].pk))
            if len(
                    data_values_tuple_list
            ) > 3000:  # insert when the length of the list goes over 3000
                with connection.cursor() as c:
                    c.executemany(insert_string, data_values_tuple_list)
                del data_values_tuple_list[:]