Example #1
0
    def prepare_geojson(self, geojson):

        props = geojson['properties']

        # Store a stringified bounding box so that tools like
        # the spelunker can zoom to extent and stuff like that
        # (20150730/thisisaaronland)

        bbox = geojson.get('bbox', [])
        bbox = map(str, bbox)  # oh python...
        bbox = ",".join(bbox)
        props['geom:bbox'] = bbox

        # ggggggrgrgrgrgrhhnhnnnhnhnhnhnhhzzzzpphphtttt - we shouldn't
        # have to do this but even with the enstringification below
        # ES tries to be too clever by half so in the interests of just
        # getting stuff done we're going to be ruthless about things...
        # (21050806/thisisaaronland)

        omgwtf = (
            u'ne:fips_10_',
            u'ne:gdp_md_est',
            u'ne:geou_dif',
            u'ne:pop_est',
            u'ne:su_dif',
            u'ne:adm0_dif',
            u'ne:level',
        )

        for bbq in omgwtf:
            if props.has_key(bbq):
                logging.warning(
                    "remove tag '%s' because ES suffers from E_EXCESSIVE_CLEVERNESS"
                    % bbq)
                del (props[bbq])

        # alt placetype names/ID

        placetype = props['wof:placetype']
        placetype = mapzen.whosonfirst.placetypes.placetype(placetype)

        placetype_id = placetype.id()
        placetype_names = []

        for n in placetype.names():
            placetype_names.append(unicode(n))

        props['wof:placetype_id'] = placetype_id
        props['wof:placetype_names'] = placetype_names

        # Names

        for k, v in props.items():

            if not k.startswith("name:", False):
                continue

            # please actually parse me using mz-wof-names
            # https://github.com/whosonfirst/py-mapzen-whosonfirst-names
            # (20151030/thisisaaronland)

            parts = k.split("_x_")

            # https://github.com/whosonfirst/py-mapzen-whosonfirst-search/issues/5
            # see above (20151030/thisisaaronland)

            if len(parts) == 2:

                label = parts[1]

                n_label = "name_%s" % label

                names = props.get(n_label, [])
                names.extend(v)

                props[n_label] = names

        # Misc counters

        conc = props.get('wof:concordances', {})
        props['wof:concordances_count'] = len(conc.items())

        props['geom:type'] = geojson['geometry']['type']

        # because ES suffers from E_EXCESSIVE_CLEVERNESS

        props = self.enstringify(props)
        return props
    def prepare_geojson(self, geojson):

        props = geojson['properties']

        # Store a stringified bounding box so that tools like
        # the spelunker can zoom to extent and stuff like that
        # (20150730/thisisaaronland)

        bbox = geojson.get('bbox', [])
        bbox = map(str, bbox)	# oh python...
        bbox = ",".join(bbox)
        props['geom:bbox'] = bbox

        # ggggggrgrgrgrgrhhnhnnnhnhnhnhnhhzzzzpphphtttt - we shouldn't
        # have to do this but even with the enstringification below
        # ES tries to be too clever by half so in the interests of just
        # getting stuff done we're going to be ruthless about things...
        # (21050806/thisisaaronland)

        omgwtf = (
            u'ne:fips_10_',
            u'ne:gdp_md_est',
            u'ne:geou_dif',
            u'ne:pop_est',
            u'ne:su_dif',
            u'ne:adm0_dif',
            u'ne:level',
            u'fsgov:ajo_pvm',
        )

        for bbq in omgwtf:
            if props.has_key(bbq):
                logging.debug("remove tag '%s' because ES suffers from E_EXCESSIVE_CLEVERNESS" % bbq)
                del(props[bbq])

        # alt placetype names/ID

        placetype = props['wof:placetype']
        placetype = mapzen.whosonfirst.placetypes.placetype(placetype)

        placetype_id = placetype.id()
        placetype_names = []

        for n in placetype.names():
            placetype_names.append(unicode(n))

        props['wof:placetype_id'] = placetype_id
        props['wof:placetype_names'] = placetype_names

        # Categories

        categories = []

        # wof categories

        wof_categories = []

        for tag in props.get('wof:categories', []):

            mt = mapzen.whosonfirst.machinetag.machinetag(tag)

            if not mt.is_machinetag():
                logging.warning("%s is not a valid wof:categories machine tag, skipping" % tag)
                continue

            enpathified = machinetag.elasticsearch.hierarchy.enpathify_from_machinetag(mt)

            if not enpathified in wof_categories:
                wof_categories.append(enpathified)

        props["wof:categories"] = wof_categories

        # mz categories

        mz_categories = []

        for tag in props.get('mz:categories', []):

            mt = mapzen.whosonfirst.machinetag.machinetag(tag)

            if not mt.is_machinetag():
                logging.warning("%s is not a valid wof:categories machine tag, skipping" % tag)
                continue

            enpathified = machinetag.elasticsearch.hierarchy.enpathify_from_machinetag(mt)

            if not enpathified in mz_categories:
                mz_categories.append(enpathified)

        props["mz:categories"] = mz_categories

        # simplegeo categories

        sg_categories = []
        
        for tag in props.get('sg:categories', []):

            mt = mapzen.whosonfirst.machinetag.machinetag(tag)

            if not mt.is_machinetag():
                logging.warning("%s is not a valid sg:categories machine tag, skipping" % tag)
                continue

            enpathified = machinetag.elasticsearch.hierarchy.enpathify_from_machinetag(mt)

            if not enpathified in sg_categories:
                sg_categories.append(enpathified)

        # old historical stuff that we may ignore/purge in time... but
        # not today (20160613/thisisaaronland)

        stz = mapzen.whosonfirst.machinetag.sanitize()

        for cl in props.get('sg:classifiers', []):

            sg_type = cl.get('type', '')
            sg_category = cl.get('category', '')
            sg_subcategory = cl.get('subcategory', '')

            clean_type = stz.filter_namespace(sg_type)
            clean_category = stz.filter_predicate(sg_category)
            clean_subcategory = stz.filter_value(sg_subcategory)

            tags = []

            mt = "sg:%s=%s" % (clean_type, clean_category)
            tags.append(mt)

            if clean_subcategory != "":
                mt = "%s:%s=%s" % (clean_type, clean_category, clean_subcategory)
                tags.append(mt)

            for t in tags:

                mt = mapzen.whosonfirst.machinetag.machinetag(t)

                if not mt.is_machinetag():
                    logging.warning("sg category fails machinetag test: '%s' (%s)" % (t, cl))
                    continue

                enpathified = machinetag.elasticsearch.hierarchy.enpathify_from_machinetag(mt)

                if not enpathified in sg_categories:
                    sg_categories.append(enpathified)

        props["sg:categories"] = sg_categories

        # Concordances

        conc = props.get('wof:concordances', {})


        # Because Boundary Issues was careless with how it encoded 'array()'
        # See: https://github.com/whosonfirst/whosonfirst-www-boundaryissues/commit/436607e41b51890080064515582240bbedda633f
        # (20161031/dphiffer)
        if conc == []:
            logging.warning("FIX %d concordances encoded as []" % props['wof:id'])
            conc = {}

        # So this may go away if we can ever figure out a simple way to facet on the
        # set of unique keys for _all_ `wof:concordances` blobs but today we can't so
        # this is faster and easier than standing around in ES-quicksand...
        # (20160518/thisisaaronland)

        props['wof:concordances_sources'] = conc.keys()

        # Misc counters

        # https://github.com/whosonfirst/py-mapzen-whosonfirst-search/issues/13

        props['counts:concordances_total'] = len(conc.items())

        # https://github.com/whosonfirst/py-mapzen-whosonfirst-search/issues/14

        langs_official = props.get('wof:lang_x_official', [])
        langs_spoken = props.get('wof:lang_x_spoken', [])

        props['counts:languages_official'] = len(langs_official)
        props['counts:languages_spoken'] = len(langs_spoken)

        count_langs = len(langs_official)

        for lang in langs_spoken:

            if not lang in langs_official:
                count_langs += 1

        props['counts:languages_total'] = count_langs

        # https://github.com/whosonfirst/py-mapzen-whosonfirst-search/issues/15

        count_names_total = 0
        count_names_prefered = 0
        count_names_variant = 0
        count_names_colloquial = 0
        count_names_languages = 0

        name_langs = []

        for k, v in props.items():

            if not k.startswith("name:"):
                continue

            count_names = len(v)
            count_names_total += count_names

            # https://github.com/whosonfirst/whosonfirst-names/issues/3

            try:
                k = k.replace("name:", "")
                parts = k.split("_x_")

                lang, qualifier = parts
            except Exception, e:
                logging.error("failed to parse '%s', because %s" % (k, e))
                continue

            if not lang in name_langs:
                count_names_languages += 1
                name_langs.append(lang)

            if qualifier == 'prefered':
                count_names_prefered += count_names
            elif qualifier == 'variant':
                count_names_variant += count_names
            elif qualifier == 'colloquial':
                count_names_colloquial += count_names
            else:
                pass
    def prepare_geojson(self, geojson):

        props = geojson['properties']

        # Store a stringified bounding box so that tools like
        # the spelunker can zoom to extent and stuff like that
        # (20150730/thisisaaronland)

        bbox = geojson.get('bbox', [])
        bbox = map(str, bbox)	# oh python...
        bbox = ",".join(bbox)
        props['geom:bbox'] = bbox

        # ggggggrgrgrgrgrhhnhnnnhnhnhnhnhhzzzzpphphtttt - we shouldn't
        # have to do this but even with the enstringification below
        # ES tries to be too clever by half so in the interests of just
        # getting stuff done we're going to be ruthless about things...
        # (21050806/thisisaaronland)

        omgwtf = (
            u'ne:fips_10_',
            u'ne:gdp_md_est',
            u'ne:geou_dif',
            u'ne:pop_est',
            u'ne:su_dif',
            u'ne:adm0_dif',
            u'ne:level',
        )

        for bbq in omgwtf:
            if props.has_key(bbq):
                logging.warning("remove tag '%s' because ES suffers from E_EXCESSIVE_CLEVERNESS" % bbq)
                del(props[bbq])

        # alt placetype names/ID

        placetype = props['wof:placetype']
        placetype = mapzen.whosonfirst.placetypes.placetype(placetype)

        placetype_id = placetype.id()
        placetype_names = []

        for n in placetype.names():
            placetype_names.append(unicode(n))

        props['wof:placetype_id'] = placetype_id
        props['wof:placetype_names'] = placetype_names

        # Names

        for k, v in props.items():

            if not k.startswith("name:", False):
                continue

            # please actually parse me using mz-wof-names
            # https://github.com/whosonfirst/py-mapzen-whosonfirst-names
            # (20151030/thisisaaronland)

            parts = k.split("_x_")

            # https://github.com/whosonfirst/py-mapzen-whosonfirst-search/issues/5
            # see above (20151030/thisisaaronland)

            if len(parts) == 2:

                label = parts[1]

                n_label = "name_%s" % label

                names = props.get(n_label, [])
                names.extend(v)

                props[n_label] = names

        # Concordances

        conc = props.get('wof:concordances', {})

        # So this may go away if we can ever figure out a simple way to facet on the
        # set of unique keys for _all_ `wof:concordances` blobs but today we can't so
        # this is faster and easier than standing around in ES-quicksand...
        # (20160518/thisisaaronland)

        props['wof:concordances_sources'] = conc.keys()

        # Misc counters

        # https://github.com/whosonfirst/py-mapzen-whosonfirst-search/issues/13

        props['counts:concordances_total'] = len(conc.items())

        # https://github.com/whosonfirst/py-mapzen-whosonfirst-search/issues/14

        langs_official = props.get('wof:lang_x_official', [])
        langs_spoken = props.get('wof:lang_x_spoken', [])

        props['counts:languages_official'] = len(langs_official)
        props['counts:languages_spoken'] = len(langs_spoken)

        count_langs = len(langs_official)

        for lang in langs_spoken:

            if not lang in langs_official:
                count_langs += 1

        props['counts:languages_total'] = count_langs

        # https://github.com/whosonfirst/py-mapzen-whosonfirst-search/issues/15

        count_names_total = 0
        count_names_prefered = 0
        count_names_variant = 0
        count_names_colloquial = 0
        count_names_languages = 0

        name_langs = []

        for k, v in props.items():

            if not k.startswith("name:"):
                continue

            count_names = len(v)
            count_names_total += count_names

            # https://github.com/whosonfirst/whosonfirst-names/issues/3

            try:
                k = k.replace("name:", "")
                parts = k.split("_x_")
                
                lang, qualifier = parts
            except Exception, e:
                logging.error("failed to parse '%s', because %s" % (k, e))
                continue

            if not lang in name_langs:
                count_names_languages += 1
                name_langs.append(lang)

            if qualifier == 'prefered':
                count_names_prefered += count_names
            elif qualifier == 'variant':
                count_names_variant += count_names
            elif qualifier == 'colloquial':
                count_names_colloquial += count_names
            else:
                pass
Example #4
0
 def load(cls, filename):
     """ Load a collections class from a GeoJSON file of metadata """
     with open(filename) as f:
         geoj = json.loads(f.read())
     scenes = [GBDXScene(feature) for feature in geoj['features']]
     return cls(scenes, properties=geoj.get('properties', {}))
    def prepare_geojson(self, geojson):

        props = geojson["properties"]

        # Store a stringified bounding box so that tools like
        # the spelunker can zoom to extent and stuff like that
        # (20150730/thisisaaronland)

        bbox = geojson.get("bbox", [])
        bbox = map(str, bbox)  # oh python...
        bbox = ",".join(bbox)
        props["geom:bbox"] = bbox

        # ggggggrgrgrgrgrhhnhnnnhnhnhnhnhhzzzzpphphtttt - we shouldn't
        # have to do this but even with the enstringification below
        # ES tries to be too clever by half so in the interests of just
        # getting stuff done we're going to be ruthless about things...
        # (21050806/thisisaaronland)

        omgwtf = (
            u"ne:fips_10_",
            u"ne:gdp_md_est",
            u"ne:geou_dif",
            u"ne:pop_est",
            u"ne:su_dif",
            u"ne:adm0_dif",
            u"ne:level",
        )

        for bbq in omgwtf:
            if props.has_key(bbq):
                logging.warning("remove tag '%s' because ES suffers from E_EXCESSIVE_CLEVERNESS" % bbq)
                del (props[bbq])

        # alt placetype names/ID

        placetype = props["wof:placetype"]
        placetype = mapzen.whosonfirst.placetypes.placetype(placetype)

        placetype_id = placetype.id()
        placetype_names = []

        for n in placetype.names():
            placetype_names.append(unicode(n))

        props["wof:placetype_id"] = placetype_id
        props["wof:placetype_names"] = placetype_names

        # Names

        for k, v in props.items():

            if not k.startswith("name:", False):
                continue

            # please actually parse me using mz-wof-names
            # https://github.com/whosonfirst/py-mapzen-whosonfirst-names
            # (20151030/thisisaaronland)

            parts = k.split("_x_")

            # https://github.com/whosonfirst/py-mapzen-whosonfirst-search/issues/5
            # see above (20151030/thisisaaronland)

            if len(parts) == 2:

                label = parts[1]

                n_label = "name_%s" % label

                names = props.get(n_label, [])
                names.extend(v)

                props[n_label] = names

        # Misc counters

        conc = props.get("wof:concordances", {})
        props["wof:concordances_count"] = len(conc.items())

        props["geom:type"] = geojson["geometry"]["type"]

        # because ES suffers from E_EXCESSIVE_CLEVERNESS

        props = self.enstringify(props)
        return props