Example #1
0
    def _expand_place_ids(self, terms):
        """ Lookups all of the place identifiers to get gvids

        Args:
            terms (str or unicode): terms to lookup

        Returns:
            str or list: given terms if no identifiers found, otherwise list of identifiers.
        """

        place_vids = []
        first_type = None

        for result in self.backend.identifier_index.search(terms):

            if not first_type:
                first_type = result.type

            if result.type != first_type:
                # Ignore ones that aren't the same type as the best match
                continue

            place_vids.append(result.vid)

        if place_vids:
            # Add the 'all region' gvids for the higher level
            all_set = set(itertools.chain.from_iterable(iallval(GVid.parse(x)) for x in place_vids))
            place_vids += list(str(x) for x in all_set)
            return place_vids
        else:
            return terms
Example #2
0
    def partition(self, pvid):
        from geoid.civick import GVid

        template = self.env.get_template('bundle/partition.html')

        p = self.doc_cache.partition(pvid)

        p['table'] = self.doc_cache.table(p['table_vid'])

        if 'geo_coverage' in p:

            all_idents = self.library.search.identifier_map

            for gvid in p['geo_coverage']['vids']:
                try:
                    p['geo_coverage']['names'].append(all_idents[gvid])
                except KeyError:
                    g = GVid.parse(gvid)
                    try:
                        phrase = "All {} in {} ".format(
                            g.level_plural.title(), all_idents[str(g.promote())])
                        p['geo_coverage']['names'].append(phrase)
                    except KeyError:
                        pass

        return self.render(template, p=p, **self.cc())
Example #3
0
    def expand_place_ids(self,terms):
        """ Lookup all of the place identifiers to get gvids

        :param terms:
        :return:
        """
        from geoid.civick import GVid
        from geoid.util import iallval
        import itertools

        place_vids = []
        first_type = None

        for score, vid, t, name in self.search_identifiers(terms):

            if not first_type: first_type = t

            if t != first_type: # Ignore ones that aren't the same type as the best match
                continue

            place_vids.append(vid)

        if place_vids:
            # Add the 'all region' gvids for the higher level



            all_set = set( itertools.chain.from_iterable( iallval(GVid.parse(x)) for x in place_vids))

            place_vids += list( str(x) for x in all_set)

            return place_vids

        else:
            return terms
Example #4
0
 def resum(g):
     try:
         return str(GVid.parse(g).summarize())
     except KeyError:
         return g
     except ValueError:
         logger.debug("Failed to parse gvid '{}' from partition '{}' grain coverage"
                      .format(g, partition.identity.vname))
         return g
Example #5
0
 def name_to_gvid(self, row):
     from geoid.civick import GVid
 
     v = row.state_county_city.lower()
     self.city_name = None
     
     if v == 'california':
         from  geoid.civick import State
         return State(6)
     elif v == 'balance of county':
         return None
     elif self.non_null_row_count == 1:
         self.last_county = row.state_county_city
         return GVid.parse(self.county_map()[v])
         
     else:
         self.city_name = row.state_county_city
         return GVid.parse(self.place_map().get(v))
Example #6
0
    def build_modify_row(self, row_gen, p, source, row):
        """Called for every row to allow subclasses to modify rows. """
        from geoid.civick import GVid, Zip

        row['name'] = row['name'].decode('latin1')
        row['county_gvid'] = self.counties.get(row['county'].lower())
        row['year'] = self.year
        if row['zip']:
            cg = GVid.parse(row['county_gvid'])
            if cg:
                zg = Zip(int(row['zip']))
                row['zip_gvid'] = str(zg)
Example #7
0
def parse_to_gvid(v):
    """Parse an ACS Geoid or a GVID to a GVID"""
    from geoid.civick import GVid
    from geoid.acs import AcsGeoid

    m1 = ''

    try:
        return GVid.parse(v)
    except ValueError as e:
        m1 = str(e)

    try:
        return AcsGeoid.parse(v).convert(GVid)
    except ValueError as e:
        raise ValueError("Failed to parse to either ACS or GVid: {}; {}".format(m1, str(e)))
Example #8
0
def get_boundaries(gvid, sl):
    """
    Return a cached, static geojson file of boundaries for a region
    :param gvid:  The GVID of the region
    :param sl:  The summary level of the subdivisions of the region.
    :return:
    """

    from geojson import Feature, Point, FeatureCollection, dumps
    from shapely.wkt import loads
    from geoid.civick import GVid
    from os.path import join, exists
    from flask import send_from_directory

    cache_dir = aac.library.filesystem.cache('ui/geo')

    fn = "{}-{}.geojson".format(str(gvid), sl)
    fn_path = join(cache_dir, fn)

    if not exists(fn_path):

        p = aac.library.partition('census.gov-tiger-2015-counties')

        features = []

        for i, row in enumerate(p):
            if row.statefp == 6:  # In dev, assume counties in California

                gvid = GVid.parse(row.gvid)

                f = Feature(geometry=loads(row.geometry).simplify(0.01),
                            properties={
                                'gvid': row.gvid,
                                'state': gvid.state,
                                'county': gvid.county,
                                'count_name': row.name

                            })

                features.append(f)

        fc = FeatureCollection(features)

        with open(fn_path, 'w') as f:
            f.write(dumps(fc))

    return send_from_directory(cache_dir, fn, as_attachment=False, mimetype='application/vnd.geo+json')
Example #9
0
def get_boundaries(gvid, sl):
    """
    Return a cached, static geojson file of boundaries for a region
    :param gvid:  The GVID of the region
    :param sl:  The summary level of the subdivisions of the region.
    :return:
    """

    from geojson import Feature, Point, FeatureCollection, dumps
    from shapely.wkt import loads
    from geoid.civick import GVid
    from os.path import join, exists
    from flask import send_from_directory

    cache_dir = aac.library.filesystem.cache("ui/geo")

    fn = "{}-{}.geojson".format(str(gvid), sl)
    fn_path = join(cache_dir, fn)

    if not exists(fn_path):

        p = aac.library.partition("census.gov-tiger-2015-counties")

        features = []

        for i, row in enumerate(p):
            if row.statefp == 6:  # In dev, assume counties in California

                gvid = GVid.parse(row.gvid)

                f = Feature(
                    geometry=loads(row.geometry).simplify(0.01),
                    properties={"gvid": row.gvid, "state": gvid.state, "county": gvid.county, "count_name": row.name},
                )

                features.append(f)

        fc = FeatureCollection(features)

        with open(fn_path, "w") as f:
            f.write(dumps(fc))

    return send_from_directory(cache_dir, fn, as_attachment=False, mimetype="application/vnd.geo+json")
Example #10
0
def generate_all(sumlevel, d):
    """Generate a dict that includes all of the available geoid values, with keys
    for the most common names for those values. """

    from geoid.civick import GVid
    from geoid.tiger import TigerGeoid
    from geoid.acs import AcsGeoid

    sumlevel = int(sumlevel)

    d = dict(d.items())

    # Map common name variants
    if 'cousub' in d:
        d['cosub'] = d['cousub']
        del d['cousub']

    if 'blkgrp' in d:
        d['blockgroup'] = d['blkgrp']
        del d['blkgrp']

    if 'zcta5' in d:
        d['zcta'] = d['zcta5']
        del d['zcta5']

    gvid_class = GVid.resolve_summary_level(sumlevel)

    if not gvid_class:
        return {}

    geoidt_class = TigerGeoid.resolve_summary_level(sumlevel)
    geoid_class = AcsGeoid.resolve_summary_level(sumlevel)

    try:
        return dict(
            gvid=str(gvid_class(**d)),
            geoid=str(geoid_class(**d)),
            geoidt=str(geoidt_class(**d))
        )
    except:
        raise
Example #11
0
 def resum(g):
     try:
         return str(GVid.parse(g).summarize())
     except (KeyError, ValueError):
         return g
Example #12
0
    def bundle_search(self, terms):
        """Incremental search, search as you type."""

        from geoid.civick import GVid

        parsed = self.library.search.make_query_from_terms(terms)

        final_results = []

        init_results  = self.library.search.search_datasets(parsed)

        pvid_limit = 5

        all_idents = self.library.search.identifier_map

        for result in sorted(init_results.values(), key=lambda e: e.score, reverse=True):

            d = self.doc_cache.dataset(result.vid)

            d['partition_count'] = len(result.partitions)
            d['partitions'] = {}

            for pvid in list(result.partitions)[:pvid_limit]:

                p = self.doc_cache.partition(pvid)

                p['table'] = self.doc_cache.table(p['table_vid'])

                if 'geo_coverage' in p:
                    for gvid in p['geo_coverage']['vids']:
                        try:

                            p['geo_coverage']['names'].append(all_idents[gvid])

                        except KeyError:
                            g = GVid.parse(gvid)
                            try:
                                phrase = "All {} in {} ".format(
                                    g.level_plural.title(), all_idents[str(g.promote())])
                                p['geo_coverage']['names'].append(phrase)
                            except KeyError:
                                pass

                d['partitions'][pvid] = p

            final_results.append(d)

        template = self.env.get_template('search/results.html')

        # Collect facets to display to the user, for additional sorting
        facets = {
            'years': set(),
            'sources': set(),
            'states': set()
        }

        for r in final_results:
            facets['sources'].add(r['source'])
            for p in r['partitions'].values():
                if 'time_coverage' in p and p['time_coverage']:
                    facets['years'] |= set(p['time_coverage']['years'])

                if 'geo_coverage' in p:
                    for gvid in p['geo_coverage']['vids']:
                        g = GVid.parse(gvid)

                        if g.level == 'state' and not g.is_summary:
                            #facets['states'].add( (gvid, all_idents[gvid]))
                            try:
                                facets['states'].add(all_idents[gvid])
                            except KeyError:
                                pass # TODO Should probably announce an error

        return self.render(
            template,
            query = parsed,
            results=final_results,
            facets=facets,
            **self.cc())
Example #13
0
 def resum(g):
     try:
         return str(GVid.parse(g).summarize())
     except KeyError:
         return g
Example #14
0
    def bundle_search(self, terms):
        """Incremental search, search as you type."""

        from geoid.civick import GVid

        results = []

        (b_query, p_query, terms), bp_set = self.library.search.search_bundles(
            {k: v.strip() for k, v in terms.items()})

        pvid_limit = 5

        all_idents = self.library.search.identifier_map

        for bvid, pvids in bp_set.items():

            d = self.doc_cache.dataset(bvid)

            d['partition_count'] = len(pvids)
            d['partitions'] = {}

            for pvid in pvids[:pvid_limit]:

                p = self.doc_cache.partition(pvid)

                p['table'] = self.doc_cache.table(p['table_vid'])

                if 'geo_coverage' in p:
                    for gvid in p['geo_coverage']['vids']:
                        try:

                            p['geo_coverage']['names'].append(all_idents[gvid])

                        except KeyError:
                            g = GVid.parse(gvid)
                            try:
                                phrase = "All {} in {} ".format(
                                    g.level_plural.title(), all_idents[str(g.promote())])
                                p['geo_coverage']['names'].append(phrase)
                            except KeyError:
                                pass

                d['partitions'][pvid] = p

            results.append(d)

        template = self.env.get_template('search/results.html')

        results = sorted(results, key=lambda x: x['vname'])

        # Collect facets to display to the user, for additional sorting
        facets = {
            'years': set(),
            'sources': set(),
            'states': set()
        }

        for r in results:
            facets['sources'].add(r['source'])
            for p in r['partitions'].values():
                if 'time_coverage' in p and p['time_coverage']:
                    facets['years'] |= set(p['time_coverage']['years'])

                if 'geo_coverage' in p:
                    for gvid in p['geo_coverage']['vids']:
                        g = GVid.parse(gvid)

                        if g.level == 'state' and not g.is_summary:
                            #facets['states'].add( (gvid, all_idents[gvid]))
                            facets['states'].add(all_idents[gvid])

        return self.render(
            template,
            queries=dict(
                b_query=b_query,
                p_query=p_query,
                terms=terms),
            results=results,
            facets=facets,
            **self.cc())
Example #15
0
    def search_bundles(self, search, limit=None):
        """Search for datasets and partitions using a structured search object.

        :param search: a dict, with values for each of the search components.
        :param limit:
        :return:

        """
        from ..identity import ObjectNumber
        from collections import defaultdict
        from geoid.civick import GVid

        if search.get('all', False):

            search = SearchTermParser().parse(search['all'])

        bvid_term = about_term = source_term = with_term = grain_term = years_term = in_term = ''

        if search.get('source', False):
            source_term = "source:" + search.get('source', '').strip()

        if search.get('about', False):
            about_term = "doc:({})".format(search.get('about', '').strip())

        d_term = ' AND '.join(x for x in [source_term, about_term] if bool(x))

        # This is the doc terms we'll move to the partition search if the partition search returns nothing
        # but only if the about term was the only one specified.
        dt_p_term = about_term if not source_term else None

        if search.get('in', False):
            place_vids = list(
                x[1] for x in self.search_identifiers(
                    search['in']))  # COnvert generator to list

            if place_vids:

                # Add the 'all region' gvids for the higher level

                all_set = set(str(GVid.parse(x).allval()) for x in place_vids)

                place_vids += list(all_set)

                in_term = "coverage:({})".format(' OR '.join(place_vids))

        if search.get('by', False):
            grain_term = "coverage:" + search.get('by', '').strip()

        # The wackiness with the converts to int and str, and adding ' ', is because there
        # can't be a space between the 'TO' and the brackets in the time range
        # when one end is open

        if search.get('from', False):
            try:
                from_year = str(int(search.get('from', False))) + ' '
            except ValueError:
                pass
        else:
            from_year = ''

        if search.get('to', False):
            try:
                to_year = ' ' + str(int(search.get('to', False)))
            except ValueError:
                pass
        else:
            to_year = ''

        if bool(from_year) or bool(to_year):

            years_term = "coverage:[{}TO{}]".format(from_year, to_year)

        if search.get('with', False):
            with_term = 'schema:({})'.format(search.get('with', False))

        if bool(d_term):
            # list(...) : the return from search_datasets is a generator, so it
            # can only be read once.
            bvids = list(self.search_datasets(d_term))

        else:
            bvids = []

        p_term = ' AND '.join(
            x for x in [
                in_term,
                years_term,
                grain_term,
                with_term] if bool(x))

        if bool(p_term):
            if bvids:
                p_term += " AND bvid:({})".format(' OR '.join(bvids))
            elif dt_p_term:
                # In case the about term didn't generate any hits for the
                # bundle.
                p_term += " AND {}".format(dt_p_term)
        else:
            if not bvids and dt_p_term:
                p_term = dt_p_term

        if p_term:
            pvids = list(self.search_partitions(p_term))

            if pvids:
                bp_set = defaultdict(set)
                for p in pvids:
                    bvid = str(ObjectNumber.parse(p).as_dataset)
                    bp_set[bvid].add(p)

                rtrn = {b: list(p) for b, p in bp_set.items()}
            else:
                rtrn = {}

        else:

            rtrn = {b: [] for b in bvids}

        return (d_term, p_term, search), rtrn