row_dict = dict(zip(headers, row)) xref = utils.xref_from_row_dict(row_dict) geography = utils.find_geography_by_xref(collection, xref, fields=['data']) if not geography: continue if YEAR not in geography['data']: geography['data'][YEAR] = {} tables = {} for k, v in row_dict.items(): # Format table names to match labels t = utils.parse_table_from_key(k) if t not in tables: tables[t] = {} tables[t][k] = v for k, v in tables.items(): geography['data'][YEAR][k] = v collection.update({ '_id': objectid.ObjectId(geography['_id']) }, { '$set': { 'data': geography['data'] } }, safe=True) updates += 1 print "File: %s" % FILENAME print ' Row count: %i' % row_count print ' Updated: %i' % updates
continue for k, v in geography_2000s[0]['data']['2000'][table].items(): try: keys_2010 = KEY_MAPPINGS[k] except KeyError: # Skip 2000 fields that don't exist in 2010 continue # Skip 2000 fields that don't have an equivalent in 2010 if not keys_2010: continue # Copy value to all 2010 fields which are comparable to this field in 2000 for key_2010 in keys_2010: table_2010 = utils.parse_table_from_key(key_2010) if table_2010 not in data: data[table_2010] = {} parts = [] for g in geography_2000s: value = float(g['data']['2000'][table][k]) pct = geography['xwalk'][g['geoid']][crosswalk_field] parts.append(value * pct) data[table_2010][key_2010] = int(sum(parts)) # OTHER SUMLEVS - can be directly compared by geoid
xref = utils.xref_from_row_dict(row_dict) geography = utils.find_geography_by_xref(collection, xref, fields=['data']) if not geography: continue if YEAR not in geography['data']: geography['data'][YEAR] = {} tables = {} for k, v in row_dict.items(): # Format table names to match labels t = utils.parse_table_from_key(k) if t not in tables: tables[t] = {} tables[t][k] = v for k, v in tables.items(): geography['data'][YEAR][k] = v collection.update({'_id': objectid.ObjectId(geography['_id'])}, {'$set': { 'data': geography['data'] }}, safe=True) updates += 1