def do_the_work(file_contents, field_defs, filename): """ field_defs looks like: { 10: { 'type': 'city_state', 'append_columns': ['total_population', 'median_age'] } } or like this: { 10;2: { 'type': 'city;state', 'append_columns': ['total_population', 'median_age'] } } where the semicolon separated values represent a multicolumn geography file_contents is a string containing the contents of the uploaded file. """ contents = StringIO(file_contents) reader = UnicodeCSVReader(contents) header = reader.next() result = None geo_ids = set() mancer_mapper = {} fields_key = field_defs.keys()[0] errors = [] geo_type, col_idxs, val_fmt = find_geo_type(field_defs[fields_key]['type'], fields_key) geo_name = get_geo_types(geo_type=geo_type)[0][0]['info'].human_name for mancer in MANCERS: m = import_class(mancer) api_key = MANCER_KEYS.get(m.machine_name) try: m = m(api_key=api_key) except ImportError, e: errors.append(e.message) continue mancer_cols = [c['table_id'] for c in m.get_metadata()] for k, v in field_defs.items(): field_cols = v['append_columns'] for f in field_cols: if f in mancer_cols: mancer_mapper[f] = { 'mancer': m, 'geo_id_map': {}, 'geo_ids': set(), 'geo_type': geo_type, }
def get_geo_types(geo_type=None): types = {} columns = [] geo_types = [] errors = [] for mancer in MANCERS: m = import_class(mancer) api_key = MANCER_KEYS.get(m.machine_name) try: m = m(api_key=api_key) except ImportError, e: errors.append(e.message) continue for col in m.get_metadata(): geo_types.extend(col['geo_types']) columns.extend(m.get_metadata())
def table_info(): """ Return a list of data sources """ columns = OrderedDict() for mancer in MANCERS: m = import_class(mancer) api_key = MANCER_KEYS.get(m.machine_name) try: m = m(api_key=api_key) except ImportError, e: continue col_info = m.get_metadata() for col in col_info: columns[col['table_id']] = { 'table_id': col['table_id'], 'human_name': col['human_name'], 'mancer': m.name, 'columns': col['columns'], 'source_url': col['source_url'], }
def get_data_sources(geo_type=None): mancer_data = [] errors = [] for mancer in MANCERS: m = import_class(mancer) api_key = MANCER_KEYS.get(m.machine_name) try: m = m(api_key=api_key) except ImportError, e: errors.append(e.message) continue mancer_obj = { "name": m.name, "machine_name": m.machine_name, "base_url": m.base_url, "info_url": m.info_url, "description": m.description, "data_types": {} } info = m.get_metadata() for col in info: if geo_type: col_types = [i.machine_name for i in col['geo_types']] if geo_type in col_types: mancer_obj["data_types"][col['table_id']] = col else: mancer_obj["data_types"][col['table_id']] = col try: mancer_obj["data_types"][col['table_id']]['geo_types'] = \ sorted(mancer_obj["data_types"][col['table_id']]['geo_types'], key=lambda x: x.human_name) except KeyError: pass mancer_obj["data_types"] = sorted(mancer_obj["data_types"].values(), key=lambda x: x['human_name']) if mancer_obj['data_types']: mancer_data.append(mancer_obj)