def fetch_state(self, state): ''' Fetch data for a single state, returning a tuple of (fetched_result, parsed_data) If there's no query for the state: return (None, _) ''' logging.debug("Fetching: %s", state) res = None queries = self.sources.queries_for(state) if not queries: return res, {} results = [] mapping = self.sources.mapping_for(state) for query in queries: # TODO: make a better mapping here try: if query['type'] in ['arcgis', 'json', 'ckan', 'soda']: res = request_and_parse(query['url'], query['params']) elif query['type'] in ['csv']: res = request_csv(query['url'], query['params'], header=query.get('header', True), encoding=query.get('encoding')) elif query['type'] in ['html']: res = request(query['url'], query['params']) elif query['type'] in ['html:soup']: res = request_soup(query['url'], query['params']) elif query['type'] in ['pandas', 'xls', 'xlsx']: res = request_pandas(query) results.append(res) except Exception: logging.error("{}: Failed to fetch {}".format( state, query['url']), exc_info=True) raise processed_results = [] if state in self.extras: processed_results = self.extras[state](results, mapping) else: for i, result in enumerate(results): if queries[i].get('type') == 'arcgis': partial = extract_arcgis_attributes(result, mapping, state) else: # This is a guess; getting an unknown top level object partial = extract_attributes( result, queries[i].get('data_path', []), mapping, state) processed_results.append(partial) data = self._aggregate_state_results(state, processed_results, mapping) return results, data
def handle_ca(res, mapping, queries): # need to cumsum mapped = [] for query, result in zip(queries, res): # extract also maps items = extract_attributes(result, query.data_path, mapping, 'CA') df = prep_df( items, mapping).sort_index(na_position='first').drop(columns=TS).cumsum() df = df.loc[df.index.notna()] add_query_constants(df, query) df[TS] = df.index mapped.extend(df.to_dict(orient='records')) return mapped
def process_source_responses(source, results): processed_results = [] if source.extras: processed_results = source.extras(results, source.mapping) else: for i, result in enumerate(results): query = source.queries[i] if query.type == 'arcgis': partial = extract_arcgis_attributes(result, source.mapping, source.name) else: # This is a guess; getting an unknown top level object partial = extract_attributes(result, query.data_path, source.mapping, source.name) processed_results.append(partial) data = _aggregate_state_results(source, processed_results) return data