Exemple #1
0
    def expand_place_ids(self,terms):
        """ Lookup all of the place identifiers to get gvids

        :param terms:
        :return:
        """
        from geoid.civick import GVid
        from geoid.util import iallval
        import itertools

        place_vids = []
        first_type = None

        for score, vid, t, name in self.search_identifiers(terms):

            if not first_type: first_type = t

            if t != first_type: # Ignore ones that aren't the same type as the best match
                continue

            place_vids.append(vid)

        if place_vids:
            # Add the 'all region' gvids for the higher level



            all_set = set( itertools.chain.from_iterable( iallval(GVid.parse(x)) for x in place_vids))

            place_vids += list( str(x) for x in all_set)

            return place_vids

        else:
            return terms
Exemple #2
0
    def _expand_place_ids(self, terms):
        """ Lookups all of the place identifiers to get gvids

        Args:
            terms (str or unicode): terms to lookup

        Returns:
            str or list: given terms if no identifiers found, otherwise list of identifiers.
        """

        place_vids = []
        first_type = None

        for result in self.backend.identifier_index.search(terms):

            if not first_type:
                first_type = result.type

            if result.type != first_type:
                # Ignore ones that aren't the same type as the best match
                continue

            place_vids.append(result.vid)

        if place_vids:
            # Add the 'all region' gvids for the higher level
            all_set = set(itertools.chain.from_iterable(iallval(GVid.parse(x)) for x in place_vids))
            place_vids += list(str(x) for x in all_set)
            return place_vids
        else:
            return terms
Exemple #3
0
    def partition(self, pvid):
        from geoid.civick import GVid

        template = self.env.get_template('bundle/partition.html')

        p = self.doc_cache.partition(pvid)

        p['table'] = self.doc_cache.table(p['table_vid'])

        if 'geo_coverage' in p:

            all_idents = self.library.search.identifier_map

            for gvid in p['geo_coverage']['vids']:
                try:
                    p['geo_coverage']['names'].append(all_idents[gvid])
                except KeyError:
                    g = GVid.parse(gvid)
                    try:
                        phrase = "All {} in {} ".format(
                            g.level_plural.title(), all_idents[str(g.promote())])
                        p['geo_coverage']['names'].append(phrase)
                    except KeyError:
                        pass

        return self.render(template, p=p, **self.cc())
Exemple #4
0
 def resum(g):
     try:
         return str(GVid.parse(g).summarize())
     except KeyError:
         return g
     except ValueError:
         logger.debug("Failed to parse gvid '{}' from partition '{}' grain coverage"
                      .format(g, partition.identity.vname))
         return g
 def name_to_gvid(self, row):
     from geoid.civick import GVid
 
     v = row.state_county_city.lower()
     self.city_name = None
     
     if v == 'california':
         from  geoid.civick import State
         return State(6)
     elif v == 'balance of county':
         return None
     elif self.non_null_row_count == 1:
         self.last_county = row.state_county_city
         return GVid.parse(self.county_map()[v])
         
     else:
         self.city_name = row.state_county_city
         return GVid.parse(self.place_map().get(v))
Exemple #6
0
    def build_modify_row(self, row_gen, p, source, row):
        """Called for every row to allow subclasses to modify rows. """
        from geoid.civick import GVid, Zip

        row['name'] = row['name'].decode('latin1')
        row['county_gvid'] = self.counties.get(row['county'].lower())
        row['year'] = self.year
        if row['zip']:
            cg = GVid.parse(row['county_gvid'])
            if cg:
                zg = Zip(int(row['zip']))
                row['zip_gvid'] = str(zg)
Exemple #7
0
def parse_to_gvid(v):
    """Parse an ACS Geoid or a GVID to a GVID"""
    from geoid.civick import GVid
    from geoid.acs import AcsGeoid

    m1 = ''

    try:
        return GVid.parse(v)
    except ValueError as e:
        m1 = str(e)

    try:
        return AcsGeoid.parse(v).convert(GVid)
    except ValueError as e:
        raise ValueError("Failed to parse to either ACS or GVid: {}; {}".format(m1, str(e)))
Exemple #8
0
def get_boundaries(gvid, sl):
    """
    Return a cached, static geojson file of boundaries for a region
    :param gvid:  The GVID of the region
    :param sl:  The summary level of the subdivisions of the region.
    :return:
    """

    from geojson import Feature, Point, FeatureCollection, dumps
    from shapely.wkt import loads
    from geoid.civick import GVid
    from os.path import join, exists
    from flask import send_from_directory

    cache_dir = aac.library.filesystem.cache('ui/geo')

    fn = "{}-{}.geojson".format(str(gvid), sl)
    fn_path = join(cache_dir, fn)

    if not exists(fn_path):

        p = aac.library.partition('census.gov-tiger-2015-counties')

        features = []

        for i, row in enumerate(p):
            if row.statefp == 6:  # In dev, assume counties in California

                gvid = GVid.parse(row.gvid)

                f = Feature(geometry=loads(row.geometry).simplify(0.01),
                            properties={
                                'gvid': row.gvid,
                                'state': gvid.state,
                                'county': gvid.county,
                                'count_name': row.name

                            })

                features.append(f)

        fc = FeatureCollection(features)

        with open(fn_path, 'w') as f:
            f.write(dumps(fc))

    return send_from_directory(cache_dir, fn, as_attachment=False, mimetype='application/vnd.geo+json')
Exemple #9
0
def get_boundaries(gvid, sl):
    """
    Return a cached, static geojson file of boundaries for a region
    :param gvid:  The GVID of the region
    :param sl:  The summary level of the subdivisions of the region.
    :return:
    """

    from geojson import Feature, Point, FeatureCollection, dumps
    from shapely.wkt import loads
    from geoid.civick import GVid
    from os.path import join, exists
    from flask import send_from_directory

    cache_dir = aac.library.filesystem.cache("ui/geo")

    fn = "{}-{}.geojson".format(str(gvid), sl)
    fn_path = join(cache_dir, fn)

    if not exists(fn_path):

        p = aac.library.partition("census.gov-tiger-2015-counties")

        features = []

        for i, row in enumerate(p):
            if row.statefp == 6:  # In dev, assume counties in California

                gvid = GVid.parse(row.gvid)

                f = Feature(
                    geometry=loads(row.geometry).simplify(0.01),
                    properties={"gvid": row.gvid, "state": gvid.state, "county": gvid.county, "count_name": row.name},
                )

                features.append(f)

        fc = FeatureCollection(features)

        with open(fn_path, "w") as f:
            f.write(dumps(fc))

    return send_from_directory(cache_dir, fn, as_attachment=False, mimetype="application/vnd.geo+json")
Exemple #10
0
 def resum(g):
     try:
         return str(GVid.parse(g).summarize())
     except (KeyError, ValueError):
         return g
Exemple #11
0
    def bundle_search(self, terms):
        """Incremental search, search as you type."""

        from geoid.civick import GVid

        parsed = self.library.search.make_query_from_terms(terms)

        final_results = []

        init_results  = self.library.search.search_datasets(parsed)

        pvid_limit = 5

        all_idents = self.library.search.identifier_map

        for result in sorted(init_results.values(), key=lambda e: e.score, reverse=True):

            d = self.doc_cache.dataset(result.vid)

            d['partition_count'] = len(result.partitions)
            d['partitions'] = {}

            for pvid in list(result.partitions)[:pvid_limit]:

                p = self.doc_cache.partition(pvid)

                p['table'] = self.doc_cache.table(p['table_vid'])

                if 'geo_coverage' in p:
                    for gvid in p['geo_coverage']['vids']:
                        try:

                            p['geo_coverage']['names'].append(all_idents[gvid])

                        except KeyError:
                            g = GVid.parse(gvid)
                            try:
                                phrase = "All {} in {} ".format(
                                    g.level_plural.title(), all_idents[str(g.promote())])
                                p['geo_coverage']['names'].append(phrase)
                            except KeyError:
                                pass

                d['partitions'][pvid] = p

            final_results.append(d)

        template = self.env.get_template('search/results.html')

        # Collect facets to display to the user, for additional sorting
        facets = {
            'years': set(),
            'sources': set(),
            'states': set()
        }

        for r in final_results:
            facets['sources'].add(r['source'])
            for p in r['partitions'].values():
                if 'time_coverage' in p and p['time_coverage']:
                    facets['years'] |= set(p['time_coverage']['years'])

                if 'geo_coverage' in p:
                    for gvid in p['geo_coverage']['vids']:
                        g = GVid.parse(gvid)

                        if g.level == 'state' and not g.is_summary:
                            #facets['states'].add( (gvid, all_idents[gvid]))
                            try:
                                facets['states'].add(all_idents[gvid])
                            except KeyError:
                                pass # TODO Should probably announce an error

        return self.render(
            template,
            query = parsed,
            results=final_results,
            facets=facets,
            **self.cc())
Exemple #12
0
 def resum(g):
     try:
         return str(GVid.parse(g).summarize())
     except KeyError:
         return g
Exemple #13
0
    def bundle_search(self, terms):
        """Incremental search, search as you type."""

        from geoid.civick import GVid

        results = []

        (b_query, p_query, terms), bp_set = self.library.search.search_bundles(
            {k: v.strip() for k, v in terms.items()})

        pvid_limit = 5

        all_idents = self.library.search.identifier_map

        for bvid, pvids in bp_set.items():

            d = self.doc_cache.dataset(bvid)

            d['partition_count'] = len(pvids)
            d['partitions'] = {}

            for pvid in pvids[:pvid_limit]:

                p = self.doc_cache.partition(pvid)

                p['table'] = self.doc_cache.table(p['table_vid'])

                if 'geo_coverage' in p:
                    for gvid in p['geo_coverage']['vids']:
                        try:

                            p['geo_coverage']['names'].append(all_idents[gvid])

                        except KeyError:
                            g = GVid.parse(gvid)
                            try:
                                phrase = "All {} in {} ".format(
                                    g.level_plural.title(), all_idents[str(g.promote())])
                                p['geo_coverage']['names'].append(phrase)
                            except KeyError:
                                pass

                d['partitions'][pvid] = p

            results.append(d)

        template = self.env.get_template('search/results.html')

        results = sorted(results, key=lambda x: x['vname'])

        # Collect facets to display to the user, for additional sorting
        facets = {
            'years': set(),
            'sources': set(),
            'states': set()
        }

        for r in results:
            facets['sources'].add(r['source'])
            for p in r['partitions'].values():
                if 'time_coverage' in p and p['time_coverage']:
                    facets['years'] |= set(p['time_coverage']['years'])

                if 'geo_coverage' in p:
                    for gvid in p['geo_coverage']['vids']:
                        g = GVid.parse(gvid)

                        if g.level == 'state' and not g.is_summary:
                            #facets['states'].add( (gvid, all_idents[gvid]))
                            facets['states'].add(all_idents[gvid])

        return self.render(
            template,
            queries=dict(
                b_query=b_query,
                p_query=p_query,
                terms=terms),
            results=results,
            facets=facets,
            **self.cc())
Exemple #14
0
    def search_bundles(self, search, limit=None):
        """Search for datasets and partitions using a structured search object.

        :param search: a dict, with values for each of the search components.
        :param limit:
        :return:

        """
        from ..identity import ObjectNumber
        from collections import defaultdict
        from geoid.civick import GVid

        if search.get('all', False):

            search = SearchTermParser().parse(search['all'])

        bvid_term = about_term = source_term = with_term = grain_term = years_term = in_term = ''

        if search.get('source', False):
            source_term = "source:" + search.get('source', '').strip()

        if search.get('about', False):
            about_term = "doc:({})".format(search.get('about', '').strip())

        d_term = ' AND '.join(x for x in [source_term, about_term] if bool(x))

        # This is the doc terms we'll move to the partition search if the partition search returns nothing
        # but only if the about term was the only one specified.
        dt_p_term = about_term if not source_term else None

        if search.get('in', False):
            place_vids = list(
                x[1] for x in self.search_identifiers(
                    search['in']))  # COnvert generator to list

            if place_vids:

                # Add the 'all region' gvids for the higher level

                all_set = set(str(GVid.parse(x).allval()) for x in place_vids)

                place_vids += list(all_set)

                in_term = "coverage:({})".format(' OR '.join(place_vids))

        if search.get('by', False):
            grain_term = "coverage:" + search.get('by', '').strip()

        # The wackiness with the converts to int and str, and adding ' ', is because there
        # can't be a space between the 'TO' and the brackets in the time range
        # when one end is open

        if search.get('from', False):
            try:
                from_year = str(int(search.get('from', False))) + ' '
            except ValueError:
                pass
        else:
            from_year = ''

        if search.get('to', False):
            try:
                to_year = ' ' + str(int(search.get('to', False)))
            except ValueError:
                pass
        else:
            to_year = ''

        if bool(from_year) or bool(to_year):

            years_term = "coverage:[{}TO{}]".format(from_year, to_year)

        if search.get('with', False):
            with_term = 'schema:({})'.format(search.get('with', False))

        if bool(d_term):
            # list(...) : the return from search_datasets is a generator, so it
            # can only be read once.
            bvids = list(self.search_datasets(d_term))

        else:
            bvids = []

        p_term = ' AND '.join(
            x for x in [
                in_term,
                years_term,
                grain_term,
                with_term] if bool(x))

        if bool(p_term):
            if bvids:
                p_term += " AND bvid:({})".format(' OR '.join(bvids))
            elif dt_p_term:
                # In case the about term didn't generate any hits for the
                # bundle.
                p_term += " AND {}".format(dt_p_term)
        else:
            if not bvids and dt_p_term:
                p_term = dt_p_term

        if p_term:
            pvids = list(self.search_partitions(p_term))

            if pvids:
                bp_set = defaultdict(set)
                for p in pvids:
                    bvid = str(ObjectNumber.parse(p).as_dataset)
                    bp_set[bvid].add(p)

                rtrn = {b: list(p) for b, p in bp_set.items()}
            else:
                rtrn = {}

        else:

            rtrn = {b: [] for b in bvids}

        return (d_term, p_term, search), rtrn