def fetch_query(state, query): # TODO: make a better mapping here res = None try: if query.type in ['arcgis', 'json', 'ckan', 'soda']: res = request_and_parse(query.url, query.params) elif query.type in ['csv']: res = request_csv(query.url, query.params, header=query.header, encoding=query.encoding) elif query.type in ['html']: res = request(query.url, query.params, query.encoding) elif query.type in ['html:soup']: res = request_soup(query.url, query.params, query.encoding) elif query.type in ['pandas', 'xls', 'xlsx']: res = request_pandas(query) else: # the default is to send the URL as is # TODO: It's used for something, but it's not great res = query.url except Exception: logging.error("{}: Failed to fetch {}".format(state, query.url), exc_info=True) raise return res
def fetch_state(self, state): ''' Fetch data for a single state, returning a tuple of (fetched_result, parsed_data) If there's no query for the state: return (None, _) ''' logging.debug("Fetching: %s", state) res = None queries = self.sources.queries_for(state) if not queries: return res, {} results = [] mapping = self.sources.mapping_for(state) for query in queries: # TODO: make a better mapping here try: if query['type'] in ['arcgis', 'json', 'ckan', 'soda']: res = request_and_parse(query['url'], query['params']) elif query['type'] in ['csv']: res = request_csv(query['url'], query['params'], header=query.get('header', True), encoding=query.get('encoding')) elif query['type'] in ['html']: res = request(query['url'], query['params']) elif query['type'] in ['html:soup']: res = request_soup(query['url'], query['params']) elif query['type'] in ['pandas', 'xls', 'xlsx']: res = request_pandas(query) results.append(res) except Exception: logging.error("{}: Failed to fetch {}".format( state, query['url']), exc_info=True) raise processed_results = [] if state in self.extras: processed_results = self.extras[state](results, mapping) else: for i, result in enumerate(results): if queries[i].get('type') == 'arcgis': partial = extract_arcgis_attributes(result, mapping, state) else: # This is a guess; getting an unknown top level object partial = extract_attributes( result, queries[i].get('data_path', []), mapping, state) processed_results.append(partial) data = self._aggregate_state_results(state, processed_results, mapping) return results, data