Exemple #1
0
def fetch_query(state, query):
    # TODO: make a better mapping here
    res = None
    try:
        if query.type in ['arcgis', 'json', 'ckan', 'soda']:
            res = request_and_parse(query.url, query.params)
        elif query.type in ['csv']:
            res = request_csv(query.url,
                              query.params,
                              header=query.header,
                              encoding=query.encoding)
        elif query.type in ['html']:
            res = request(query.url, query.params, query.encoding)
        elif query.type in ['html:soup']:
            res = request_soup(query.url, query.params, query.encoding)
        elif query.type in ['pandas', 'xls', 'xlsx']:
            res = request_pandas(query)
        else:
            # the default is to send the URL as is
            # TODO: It's used for something, but it's not great
            res = query.url
    except Exception:
        logging.error("{}: Failed to fetch {}".format(state, query.url),
                      exc_info=True)
        raise

    return res
Exemple #2
0
    def fetch_state(self, state):
        ''' Fetch data for a single state, returning a tuple of
        (fetched_result, parsed_data)

        If there's no query for the state: return (None, _)
        '''
        logging.debug("Fetching: %s", state)
        res = None

        queries = self.sources.queries_for(state)
        if not queries:
            return res, {}

        results = []
        mapping = self.sources.mapping_for(state)
        for query in queries:
            # TODO: make a better mapping here
            try:
                if query['type'] in ['arcgis', 'json', 'ckan', 'soda']:
                    res = request_and_parse(query['url'], query['params'])
                elif query['type'] in ['csv']:
                    res = request_csv(query['url'],
                                      query['params'],
                                      header=query.get('header', True),
                                      encoding=query.get('encoding'))
                elif query['type'] in ['html']:
                    res = request(query['url'], query['params'])
                elif query['type'] in ['html:soup']:
                    res = request_soup(query['url'], query['params'])
                elif query['type'] in ['pandas', 'xls', 'xlsx']:
                    res = request_pandas(query)
                results.append(res)
            except Exception:
                logging.error("{}: Failed to fetch {}".format(
                    state, query['url']),
                              exc_info=True)
                raise

        processed_results = []
        if state in self.extras:
            processed_results = self.extras[state](results, mapping)
        else:
            for i, result in enumerate(results):
                if queries[i].get('type') == 'arcgis':
                    partial = extract_arcgis_attributes(result, mapping, state)
                else:
                    # This is a guess; getting an unknown top level object
                    partial = extract_attributes(
                        result, queries[i].get('data_path', []), mapping,
                        state)
                processed_results.append(partial)

        data = self._aggregate_state_results(state, processed_results, mapping)
        return results, data