del rdict['pop_50'] del rdict['pop_60'] rdict_50 = rdict.copy() rdict_60 = rdict.copy() rdict_50['begin_date'] = begin_date_50 rdict_50['end_date'] = end_date_50 rdict_50['population_value'] = pop_50 rdict_60['begin_date'] = begin_date_60 rdict_60['end_date'] = end_date_60 rdict_60['population_value'] = pop_60 try: entry_50 = MainDataEntry(**rdict_50) entry_50.save() num_mig += 1 entry_60 = MainDataEntry(**rdict_60) entry_60.save() num_mig += 1 sys.stdout.write("[%i] %s\n" % (i, rdict['location'])) except (ValueError, DatabaseError, ValidationError) as e: sys.stderr.write('[%i] Failed to save data row: %s\n' % (i, e)) num_err_rows += 1 infile = sys.argv[2] reader = csv.reader(migtools.UTF8Recoder(open(infile, "r"), migtools.STRING_ENCODING), delimiter='\t', quotechar = '"') last_region = None last_country = None
if rdict["value_unit"] == "Million": rdict["value_unit"] = "millions" else: sys.stderr.write("Unexpected value unit (row %i): %s\n" % (i, rdict["value_unit"])) num_err_rows += 1 continue if rdict["table_num"] == "1.2": del rdict["table_num"] rdict["source"] = src else: sys.stderr.write("Unexpected table number (row %i): %s\n" % (i, rdict["table_num"])) num_err_rows += 1 continue if len(rdict["population_value"].split('.')) > 2: rdict["population_value"] = rdict["population_value"].replace('.', '', 1) rdict["active"] = True rdict["submitted_by"] = migtools.mig_user try: entry = MainDataEntry(**rdict) entry.save() num_mig += 1 except (ValueError, DatabaseError, ValidationError) as e: sys.stderr.write('Failed to save data row (%i): %s\n' % (i, e)) num_err_rows += 1 print 'Migration complete. %i rows migrated, %i locations created, and %i row errors encountered and ignored' % (num_mig, num_locs, num_err_rows)
def add_row(rdict, num_err_rows): if rdict['old_combined_id']: cid_matches = re.match(r'([^\.-]+)[\.-]([^\.-]+)', rdict['old_combined_id']) if not cid_matches: sys.stderr.write('Failed to match combined id %s in row (%i)\n' % (rdict['old_combined_id'], i)) sys.stderr.write('%s\n' % rdict) return num_err_rows + 1 source_id = cid_matches.group(1) table_id = cid_matches.group(2) #if source_id != rdict['old_source_id']: # sys.stderr.write('Mismatch of old source ID in row (%i)\n' % i) # sys.stderr.write('%s\n' % rdict) # return num_err_rows + 1 table = None try: table = Table.objects.get(old_id = table_id) except Table.DoesNotExist as e: sys.stderr.write('Source table does not exist in row (%i)\n' % i) sys.stderr.write('%s\n' % rdict) return num_err_rows + 1 #nr = cid_matches.group(3) #if not nr and table.nr != nr: # sys.stderr.write('Table NR mismatch in row (%i)\n' % i) # sys.stderr.write('%s\n' % rdict) # return num_err_rows + 1 rdict['source'] = table else: source = None try: Source.objects.get(old_id = rdict['old_source_id']) except Source.DoesNotExist as e: sys.stderr.write('Source does not exist in row (%i)\n' % i) sys.stderr.write('%s\n' % rdict) return num_err_rows + 1 rdict['source'] = source val_specified = False if rdict.has_key('individuals_population_value'): if len(rdict['individuals_population_value']) > 0 and rdict['individuals_population_value'] != 0: val_specified = True rdict['individ_fam'] = 0 rdict['population_value'] = rdict['individuals_population_value'] del rdict['individuals_population_value'] if rdict.has_key('families_population_value'): if len(rdict['families_population_value']) > 0 and rdict['families_population_value'] != 0: if val_specified: num_err_rows = add_row(rdict.copy(), num_err_rows) else: val_specified = True rdict['individ_fam'] = 1 rdict['population_value'] = rdict['families_population_value'] del rdict['families_population_value'] if rdict.has_key('male_population_value'): if len(rdict['male_population_value']) > 0 and rdict['male_population_value'] != 0: if val_specified: num_err_rows = add_row(rdict.copy(), num_err_rows) else: val_specified = True rdict['individ_fam'] = 0 rdict['population_value'] = rdict['male_population_value'] rdict['population_gender'] = 'm' del rdict['male_population_value'] if rdict.has_key('female_population_value'): if len(rdict['female_population_value']) > 0 and rdict['female_population_value'] != 0: if val_specified: num_err_rows = add_row(rdict.copy(), num_err_rows) else: val_specified = True rdict['individ_fam'] = 0 rdict['population_value'] = rdict['female_population_value'] rdict['population_gender'] = 'f' del rdict['female_population_value'] if not val_specified: #sys.stderr.write('Data entry with no data in row (%i)\n' % i) #sys.stderr.write('%s\n' % rdict) return num_err_rows + 1 try: print i, rdict['place_origin'].decode(migtools.STRING_ENCODING), u", ", rdict['large1'].decode(migtools.STRING_ENCODING), u", ", rdict['large2'].decode(migtools.STRING_ENCODING), u", ", rdict['large3'].decode(migtools.STRING_ENCODING) except UnicodeEncodeError: # Windows decode error workaround print i, "<UnicodeEncodeError Encountered, ignoring for now>" try: rdict['location'] = migtools.get_or_add_location(unicode(rdict['place_origin'], migtools.STRING_ENCODING), mig_user, unicode(rdict['large1'], migtools.STRING_ENCODING), unicode(rdict['large2'], migtools.STRING_ENCODING), unicode(rdict['large3'], migtools.STRING_ENCODING)) except Location.DatabaseError as e: sys.stderr.write('Database error on getting or adding location in row (%i): %s\n' % (i, e)) sys.stderr.write('%s\n' % rdict) return num_err_rows + 1 except migtools.LocationTooComplicated as e: sys.stderr.write('Location too complicated in row (%i): %s\n' % (i, e)) sys.stderr.write('%s\n' % rdict) return num_err_rows + 1 #import pdb; pdb.set_trace() del rdict['place_origin'] del rdict['large1'] del rdict['large2'] del rdict['large3'] del rdict['link'] del rdict['place_english'] # No longer storing these del rdict['old_combined_id'] del rdict['old_source_id'] for k in rdict.keys(): if isinstance(rdict[k], basestring) and not rdict[k]: del rdict[k] for col_name, add_fun in { 'religion' : get_or_add_religion, 'race' : get_or_add_race, 'ethnicity' : get_or_add_ethnicity, 'ethnic_origin' : get_or_add_ethnic_origin, 'population_condition' : get_or_add_pop_cond }.iteritems(): if rdict.has_key(col_name): try: rdict[col_name] = add_fun(unicode(rdict[col_name], migtools.STRING_ENCODING)) except DatabaseError as e: sys.stderr.write("Error on get_or_add_%s in row (%i): %s\n" % (col_name, i, e)) sys.stderr.write("%s\n" % rdict) return num_err_rows + 1 if rdict.has_key('remarks'): rdict['remarks'] = rdict['remarks'].decode(migtools.STRING_ENCODING) if rdict.has_key('alternate_location_name'): rdict['alternate_location_name'] = rdict['alternate_location_name'].decode(migtools.STRING_ENCODING) try: if rdict.has_key('begin_date'): mon, day, year = [int(j) for j in rdict['begin_date'].split('/')] rdict['begin_date'] = datetime.date(year, mon, day) if rdict.has_key('end_date'): mon, day, year = [int(j) for j in rdict['end_date'].split('/')] rdict['end_date'] = datetime.date(year, mon, day) except ValueError as e: sys.stderr.write('Encountered error in date format at row (%i): %s\n' % (i, e)) sys.stderr.write('%s\n' % rdict) return num_err_rows + 1 for age_col in ('age_start', 'age_end'): if rdict.has_key(age_col): if rdict[age_col] in ('Unknown', 'Age unkown', 'Death', 'death'): # Yes, the typo is in the data to migrate del rdict[age_col] elif rdict[age_col] in ('Under 1', 'Total', 'Total all ages', 'All ages'): del rdict['age_start'] if rdict.has_key('age_end'): del rdict['age_end'] break elif rdict[age_col] in ('Not specified','Unspecified', 'Period not indicated'): del rdict[age_col] else: over_match = re.match(r'Over\s(\d+)', rdict[age_col]) if over_match: if rdict.has_key('age_end'): del rdict['age_end'] rdict['age_start'] = over_match.group(1) break under_match = re.match(r'Under\s(\d+)', rdict[age_col]) if under_match: if rdict.has_key('age_start'): del rdict['age_start'] rdict['age_end'] = under_match.group(1) break total_range_match = re.match(r'Total,\s(\d+)-(\d+)', rdict[age_col]) if total_range_match: rdict['age_start'] = total_range_match.group(1) rdict['age_end'] = total_range_match.group(2) break rdict['active'] = True rdict['submitted_by'] = mig_user try: entry = MainDataEntry(**rdict) entry.save() except (ValueError, DatabaseError, ValidationError) as e: sys.stderr.write('Failed to save data row (%i): %s\n' % (i, e)) sys.stderr.write('%s\n' % rdict) num_err_rows += 1 return num_err_rows