Ejemplo n.º 1
0
    del rdict['pop_50']
    del rdict['pop_60']

    rdict_50 = rdict.copy()
    rdict_60 = rdict.copy()

    rdict_50['begin_date'] = begin_date_50
    rdict_50['end_date'] = end_date_50
    rdict_50['population_value'] = pop_50
    rdict_60['begin_date'] = begin_date_60
    rdict_60['end_date'] = end_date_60
    rdict_60['population_value'] = pop_60

    try:
      entry_50 = MainDataEntry(**rdict_50)
      entry_50.save()
      num_mig += 1
      entry_60 = MainDataEntry(**rdict_60)
      entry_60.save()
      num_mig += 1
      sys.stdout.write("[%i] %s\n" % (i, rdict['location']))
    except (ValueError, DatabaseError, ValidationError) as e:
      sys.stderr.write('[%i] Failed to save data row: %s\n' % (i, e))
      num_err_rows += 1

  infile = sys.argv[2]
  reader = csv.reader(migtools.UTF8Recoder(open(infile, "r"), migtools.STRING_ENCODING), delimiter='\t', quotechar = '"')

  last_region = None
  last_country = None
Ejemplo n.º 2
0
    if rdict["value_unit"] == "Million":
      rdict["value_unit"] = "millions"
    else:
      sys.stderr.write("Unexpected value unit (row %i): %s\n" % (i, rdict["value_unit"])) 
      num_err_rows += 1 
      continue

    if rdict["table_num"] == "1.2":
      del rdict["table_num"]
      rdict["source"] = src
    else:
      sys.stderr.write("Unexpected table number (row %i): %s\n" % (i, rdict["table_num"])) 
      num_err_rows += 1 
      continue

    if len(rdict["population_value"].split('.')) > 2:
      rdict["population_value"] = rdict["population_value"].replace('.', '', 1)

    rdict["active"] = True
    rdict["submitted_by"] = migtools.mig_user
    
    try:
      entry = MainDataEntry(**rdict)
      entry.save()
      num_mig += 1
    except (ValueError, DatabaseError, ValidationError) as e:
      sys.stderr.write('Failed to save data row (%i): %s\n' % (i, e))
      num_err_rows += 1

  print 'Migration complete. %i rows migrated, %i locations created, and %i row errors encountered and ignored' % (num_mig, num_locs, num_err_rows)
Ejemplo n.º 3
0
def add_row(rdict, num_err_rows):
  if rdict['old_combined_id']:
    cid_matches = re.match(r'([^\.-]+)[\.-]([^\.-]+)', rdict['old_combined_id'])

    if not cid_matches:
      sys.stderr.write('Failed to match combined id %s in row (%i)\n' % (rdict['old_combined_id'], i))
      sys.stderr.write('%s\n' % rdict)
      return num_err_rows + 1

    source_id = cid_matches.group(1)
    table_id = cid_matches.group(2)

    #if source_id != rdict['old_source_id']:
    #  sys.stderr.write('Mismatch of old source ID in row (%i)\n' % i)
    #  sys.stderr.write('%s\n' % rdict)
    #  return num_err_rows + 1

    table = None

    try:
      table = Table.objects.get(old_id = table_id)
    except Table.DoesNotExist as e:
      sys.stderr.write('Source table does not exist in row (%i)\n' % i)
      sys.stderr.write('%s\n' % rdict)
      return num_err_rows + 1

    #nr = cid_matches.group(3)

    #if not nr and table.nr != nr:
    #  sys.stderr.write('Table NR mismatch in row (%i)\n' % i)
    #  sys.stderr.write('%s\n' % rdict)
    #  return num_err_rows + 1

    rdict['source'] = table
  else:
    source = None
    try:
      Source.objects.get(old_id = rdict['old_source_id'])
    except Source.DoesNotExist as e:
      sys.stderr.write('Source does not exist in row (%i)\n' % i)
      sys.stderr.write('%s\n' % rdict)
      return num_err_rows + 1
    rdict['source'] = source

  val_specified = False

  if rdict.has_key('individuals_population_value'): 
    if len(rdict['individuals_population_value']) > 0 and rdict['individuals_population_value'] != 0:
      val_specified = True
      rdict['individ_fam'] = 0
      rdict['population_value'] = rdict['individuals_population_value']

    del rdict['individuals_population_value']

  if rdict.has_key('families_population_value'): 
    if len(rdict['families_population_value']) > 0 and rdict['families_population_value'] != 0:    
      if val_specified:
        num_err_rows = add_row(rdict.copy(), num_err_rows)      
      else:
        val_specified = True
        rdict['individ_fam'] = 1
        rdict['population_value'] = rdict['families_population_value']

    del rdict['families_population_value']

  if rdict.has_key('male_population_value'): 
    if len(rdict['male_population_value']) > 0 and rdict['male_population_value'] != 0:
      if val_specified:
        num_err_rows = add_row(rdict.copy(), num_err_rows)
      else:
        val_specified = True
        rdict['individ_fam'] = 0
        rdict['population_value'] = rdict['male_population_value']
        rdict['population_gender'] = 'm'

    del rdict['male_population_value']

  if rdict.has_key('female_population_value'): 
    if len(rdict['female_population_value']) > 0 and rdict['female_population_value'] != 0:
      if val_specified:
       num_err_rows = add_row(rdict.copy(), num_err_rows)
      else:
        val_specified = True
        rdict['individ_fam'] = 0
        rdict['population_value'] = rdict['female_population_value']
        rdict['population_gender'] = 'f'

    del rdict['female_population_value']

  if not val_specified:
    #sys.stderr.write('Data entry with no data in row (%i)\n' % i)
    #sys.stderr.write('%s\n' % rdict)
    return num_err_rows + 1

  try:
    print i, rdict['place_origin'].decode(migtools.STRING_ENCODING), u", ", rdict['large1'].decode(migtools.STRING_ENCODING), u", ", rdict['large2'].decode(migtools.STRING_ENCODING), u", ", rdict['large3'].decode(migtools.STRING_ENCODING)
  except UnicodeEncodeError:
    # Windows decode error workaround
    print i, "<UnicodeEncodeError Encountered, ignoring for now>"

  try:
    rdict['location'] = migtools.get_or_add_location(unicode(rdict['place_origin'], migtools.STRING_ENCODING), mig_user, unicode(rdict['large1'], migtools.STRING_ENCODING), unicode(rdict['large2'], migtools.STRING_ENCODING), unicode(rdict['large3'], migtools.STRING_ENCODING))
  except Location.DatabaseError as e:
    sys.stderr.write('Database error on getting or adding location in row (%i): %s\n' % (i, e))
    sys.stderr.write('%s\n' % rdict)
    return num_err_rows + 1
  except migtools.LocationTooComplicated as e:
    sys.stderr.write('Location too complicated in row (%i): %s\n' % (i, e))
    sys.stderr.write('%s\n' % rdict)
    return num_err_rows + 1

  #import pdb; pdb.set_trace()

  del rdict['place_origin']
  del rdict['large1']
  del rdict['large2']
  del rdict['large3']
  del rdict['link']
  del rdict['place_english']

  # No longer storing these
  del rdict['old_combined_id']
  del rdict['old_source_id']
  
  for k in rdict.keys():
    if isinstance(rdict[k], basestring) and not rdict[k]:
      del rdict[k]

  for col_name, add_fun in { 'religion' : get_or_add_religion, 'race' : get_or_add_race, 'ethnicity' : get_or_add_ethnicity, 'ethnic_origin' : get_or_add_ethnic_origin, 'population_condition' : get_or_add_pop_cond }.iteritems():
    if rdict.has_key(col_name):
      try:
        rdict[col_name] = add_fun(unicode(rdict[col_name], migtools.STRING_ENCODING))
      except DatabaseError as e:
        sys.stderr.write("Error on get_or_add_%s in row (%i): %s\n" % (col_name, i, e))
        sys.stderr.write("%s\n" % rdict)
        return num_err_rows + 1

  if rdict.has_key('remarks'):
    rdict['remarks'] = rdict['remarks'].decode(migtools.STRING_ENCODING)

  if rdict.has_key('alternate_location_name'):
    rdict['alternate_location_name'] = rdict['alternate_location_name'].decode(migtools.STRING_ENCODING)

  try:
    if rdict.has_key('begin_date'):
      mon, day, year = [int(j) for j in rdict['begin_date'].split('/')]
      rdict['begin_date'] = datetime.date(year, mon, day)

    if rdict.has_key('end_date'):
      mon, day, year = [int(j) for j in rdict['end_date'].split('/')]
      rdict['end_date'] = datetime.date(year, mon, day)

  except ValueError as e:
    sys.stderr.write('Encountered error in date format at row (%i): %s\n' % (i, e))
    sys.stderr.write('%s\n' % rdict)
    return num_err_rows + 1

  for age_col in ('age_start', 'age_end'):
    if rdict.has_key(age_col):
      if rdict[age_col] in ('Unknown', 'Age unkown', 'Death', 'death'): # Yes, the typo is in the data to migrate
        del rdict[age_col]
      elif rdict[age_col] in ('Under 1', 'Total', 'Total all ages', 'All ages'):
        del rdict['age_start']
        if rdict.has_key('age_end'): del rdict['age_end'] 
        break
      elif rdict[age_col] in ('Not specified','Unspecified', 'Period not indicated'):
        del rdict[age_col]
      else:
        over_match = re.match(r'Over\s(\d+)', rdict[age_col])
        if over_match:
          if rdict.has_key('age_end'): del rdict['age_end'] 
          rdict['age_start'] = over_match.group(1) 
          break

        under_match = re.match(r'Under\s(\d+)', rdict[age_col])
        if under_match:
          if rdict.has_key('age_start'): del rdict['age_start']
          rdict['age_end'] = under_match.group(1)
          break

        total_range_match = re.match(r'Total,\s(\d+)-(\d+)', rdict[age_col])
        if total_range_match:
          rdict['age_start'] = total_range_match.group(1)
          rdict['age_end'] = total_range_match.group(2)
          break

  rdict['active'] = True
  rdict['submitted_by'] = mig_user 
          
  try:
    entry = MainDataEntry(**rdict)
    entry.save()
  except (ValueError, DatabaseError, ValidationError) as e:
    sys.stderr.write('Failed to save data row (%i): %s\n' % (i, e))
    sys.stderr.write('%s\n' % rdict)
    num_err_rows += 1

  return num_err_rows