def prepare_geojson(self, geojson): props = geojson['properties'] # Store a stringified bounding box so that tools like # the spelunker can zoom to extent and stuff like that # (20150730/thisisaaronland) bbox = geojson.get('bbox', []) bbox = map(str, bbox) # oh python... bbox = ",".join(bbox) props['geom:bbox'] = bbox # ggggggrgrgrgrgrhhnhnnnhnhnhnhnhhzzzzpphphtttt - we shouldn't # have to do this but even with the enstringification below # ES tries to be too clever by half so in the interests of just # getting stuff done we're going to be ruthless about things... # (21050806/thisisaaronland) omgwtf = ( u'ne:fips_10_', u'ne:gdp_md_est', u'ne:geou_dif', u'ne:pop_est', u'ne:su_dif', u'ne:adm0_dif', u'ne:level', ) for bbq in omgwtf: if props.has_key(bbq): logging.warning( "remove tag '%s' because ES suffers from E_EXCESSIVE_CLEVERNESS" % bbq) del (props[bbq]) # alt placetype names/ID placetype = props['wof:placetype'] placetype = mapzen.whosonfirst.placetypes.placetype(placetype) placetype_id = placetype.id() placetype_names = [] for n in placetype.names(): placetype_names.append(unicode(n)) props['wof:placetype_id'] = placetype_id props['wof:placetype_names'] = placetype_names # Names for k, v in props.items(): if not k.startswith("name:", False): continue # please actually parse me using mz-wof-names # https://github.com/whosonfirst/py-mapzen-whosonfirst-names # (20151030/thisisaaronland) parts = k.split("_x_") # https://github.com/whosonfirst/py-mapzen-whosonfirst-search/issues/5 # see above (20151030/thisisaaronland) if len(parts) == 2: label = parts[1] n_label = "name_%s" % label names = props.get(n_label, []) names.extend(v) props[n_label] = names # Misc counters conc = props.get('wof:concordances', {}) props['wof:concordances_count'] = len(conc.items()) props['geom:type'] = geojson['geometry']['type'] # because ES suffers from E_EXCESSIVE_CLEVERNESS props = self.enstringify(props) return props
def prepare_geojson(self, geojson): props = geojson['properties'] # Store a stringified bounding box so that tools like # the spelunker can zoom to extent and stuff like that # (20150730/thisisaaronland) bbox = geojson.get('bbox', []) bbox = map(str, bbox) # oh python... bbox = ",".join(bbox) props['geom:bbox'] = bbox # ggggggrgrgrgrgrhhnhnnnhnhnhnhnhhzzzzpphphtttt - we shouldn't # have to do this but even with the enstringification below # ES tries to be too clever by half so in the interests of just # getting stuff done we're going to be ruthless about things... # (21050806/thisisaaronland) omgwtf = ( u'ne:fips_10_', u'ne:gdp_md_est', u'ne:geou_dif', u'ne:pop_est', u'ne:su_dif', u'ne:adm0_dif', u'ne:level', u'fsgov:ajo_pvm', ) for bbq in omgwtf: if props.has_key(bbq): logging.debug("remove tag '%s' because ES suffers from E_EXCESSIVE_CLEVERNESS" % bbq) del(props[bbq]) # alt placetype names/ID placetype = props['wof:placetype'] placetype = mapzen.whosonfirst.placetypes.placetype(placetype) placetype_id = placetype.id() placetype_names = [] for n in placetype.names(): placetype_names.append(unicode(n)) props['wof:placetype_id'] = placetype_id props['wof:placetype_names'] = placetype_names # Categories categories = [] # wof categories wof_categories = [] for tag in props.get('wof:categories', []): mt = mapzen.whosonfirst.machinetag.machinetag(tag) if not mt.is_machinetag(): logging.warning("%s is not a valid wof:categories machine tag, skipping" % tag) continue enpathified = machinetag.elasticsearch.hierarchy.enpathify_from_machinetag(mt) if not enpathified in wof_categories: wof_categories.append(enpathified) props["wof:categories"] = wof_categories # mz categories mz_categories = [] for tag in props.get('mz:categories', []): mt = mapzen.whosonfirst.machinetag.machinetag(tag) if not mt.is_machinetag(): logging.warning("%s is not a valid wof:categories machine tag, skipping" % tag) continue enpathified = machinetag.elasticsearch.hierarchy.enpathify_from_machinetag(mt) if not enpathified in mz_categories: mz_categories.append(enpathified) props["mz:categories"] = mz_categories # simplegeo categories sg_categories = [] for tag in props.get('sg:categories', []): mt = mapzen.whosonfirst.machinetag.machinetag(tag) if not mt.is_machinetag(): logging.warning("%s is not a valid sg:categories machine tag, skipping" % tag) continue enpathified = machinetag.elasticsearch.hierarchy.enpathify_from_machinetag(mt) if not enpathified in sg_categories: sg_categories.append(enpathified) # old historical stuff that we may ignore/purge in time... but # not today (20160613/thisisaaronland) stz = mapzen.whosonfirst.machinetag.sanitize() for cl in props.get('sg:classifiers', []): sg_type = cl.get('type', '') sg_category = cl.get('category', '') sg_subcategory = cl.get('subcategory', '') clean_type = stz.filter_namespace(sg_type) clean_category = stz.filter_predicate(sg_category) clean_subcategory = stz.filter_value(sg_subcategory) tags = [] mt = "sg:%s=%s" % (clean_type, clean_category) tags.append(mt) if clean_subcategory != "": mt = "%s:%s=%s" % (clean_type, clean_category, clean_subcategory) tags.append(mt) for t in tags: mt = mapzen.whosonfirst.machinetag.machinetag(t) if not mt.is_machinetag(): logging.warning("sg category fails machinetag test: '%s' (%s)" % (t, cl)) continue enpathified = machinetag.elasticsearch.hierarchy.enpathify_from_machinetag(mt) if not enpathified in sg_categories: sg_categories.append(enpathified) props["sg:categories"] = sg_categories # Concordances conc = props.get('wof:concordances', {}) # Because Boundary Issues was careless with how it encoded 'array()' # See: https://github.com/whosonfirst/whosonfirst-www-boundaryissues/commit/436607e41b51890080064515582240bbedda633f # (20161031/dphiffer) if conc == []: logging.warning("FIX %d concordances encoded as []" % props['wof:id']) conc = {} # So this may go away if we can ever figure out a simple way to facet on the # set of unique keys for _all_ `wof:concordances` blobs but today we can't so # this is faster and easier than standing around in ES-quicksand... # (20160518/thisisaaronland) props['wof:concordances_sources'] = conc.keys() # Misc counters # https://github.com/whosonfirst/py-mapzen-whosonfirst-search/issues/13 props['counts:concordances_total'] = len(conc.items()) # https://github.com/whosonfirst/py-mapzen-whosonfirst-search/issues/14 langs_official = props.get('wof:lang_x_official', []) langs_spoken = props.get('wof:lang_x_spoken', []) props['counts:languages_official'] = len(langs_official) props['counts:languages_spoken'] = len(langs_spoken) count_langs = len(langs_official) for lang in langs_spoken: if not lang in langs_official: count_langs += 1 props['counts:languages_total'] = count_langs # https://github.com/whosonfirst/py-mapzen-whosonfirst-search/issues/15 count_names_total = 0 count_names_prefered = 0 count_names_variant = 0 count_names_colloquial = 0 count_names_languages = 0 name_langs = [] for k, v in props.items(): if not k.startswith("name:"): continue count_names = len(v) count_names_total += count_names # https://github.com/whosonfirst/whosonfirst-names/issues/3 try: k = k.replace("name:", "") parts = k.split("_x_") lang, qualifier = parts except Exception, e: logging.error("failed to parse '%s', because %s" % (k, e)) continue if not lang in name_langs: count_names_languages += 1 name_langs.append(lang) if qualifier == 'prefered': count_names_prefered += count_names elif qualifier == 'variant': count_names_variant += count_names elif qualifier == 'colloquial': count_names_colloquial += count_names else: pass
def prepare_geojson(self, geojson): props = geojson['properties'] # Store a stringified bounding box so that tools like # the spelunker can zoom to extent and stuff like that # (20150730/thisisaaronland) bbox = geojson.get('bbox', []) bbox = map(str, bbox) # oh python... bbox = ",".join(bbox) props['geom:bbox'] = bbox # ggggggrgrgrgrgrhhnhnnnhnhnhnhnhhzzzzpphphtttt - we shouldn't # have to do this but even with the enstringification below # ES tries to be too clever by half so in the interests of just # getting stuff done we're going to be ruthless about things... # (21050806/thisisaaronland) omgwtf = ( u'ne:fips_10_', u'ne:gdp_md_est', u'ne:geou_dif', u'ne:pop_est', u'ne:su_dif', u'ne:adm0_dif', u'ne:level', ) for bbq in omgwtf: if props.has_key(bbq): logging.warning("remove tag '%s' because ES suffers from E_EXCESSIVE_CLEVERNESS" % bbq) del(props[bbq]) # alt placetype names/ID placetype = props['wof:placetype'] placetype = mapzen.whosonfirst.placetypes.placetype(placetype) placetype_id = placetype.id() placetype_names = [] for n in placetype.names(): placetype_names.append(unicode(n)) props['wof:placetype_id'] = placetype_id props['wof:placetype_names'] = placetype_names # Names for k, v in props.items(): if not k.startswith("name:", False): continue # please actually parse me using mz-wof-names # https://github.com/whosonfirst/py-mapzen-whosonfirst-names # (20151030/thisisaaronland) parts = k.split("_x_") # https://github.com/whosonfirst/py-mapzen-whosonfirst-search/issues/5 # see above (20151030/thisisaaronland) if len(parts) == 2: label = parts[1] n_label = "name_%s" % label names = props.get(n_label, []) names.extend(v) props[n_label] = names # Concordances conc = props.get('wof:concordances', {}) # So this may go away if we can ever figure out a simple way to facet on the # set of unique keys for _all_ `wof:concordances` blobs but today we can't so # this is faster and easier than standing around in ES-quicksand... # (20160518/thisisaaronland) props['wof:concordances_sources'] = conc.keys() # Misc counters # https://github.com/whosonfirst/py-mapzen-whosonfirst-search/issues/13 props['counts:concordances_total'] = len(conc.items()) # https://github.com/whosonfirst/py-mapzen-whosonfirst-search/issues/14 langs_official = props.get('wof:lang_x_official', []) langs_spoken = props.get('wof:lang_x_spoken', []) props['counts:languages_official'] = len(langs_official) props['counts:languages_spoken'] = len(langs_spoken) count_langs = len(langs_official) for lang in langs_spoken: if not lang in langs_official: count_langs += 1 props['counts:languages_total'] = count_langs # https://github.com/whosonfirst/py-mapzen-whosonfirst-search/issues/15 count_names_total = 0 count_names_prefered = 0 count_names_variant = 0 count_names_colloquial = 0 count_names_languages = 0 name_langs = [] for k, v in props.items(): if not k.startswith("name:"): continue count_names = len(v) count_names_total += count_names # https://github.com/whosonfirst/whosonfirst-names/issues/3 try: k = k.replace("name:", "") parts = k.split("_x_") lang, qualifier = parts except Exception, e: logging.error("failed to parse '%s', because %s" % (k, e)) continue if not lang in name_langs: count_names_languages += 1 name_langs.append(lang) if qualifier == 'prefered': count_names_prefered += count_names elif qualifier == 'variant': count_names_variant += count_names elif qualifier == 'colloquial': count_names_colloquial += count_names else: pass
def load(cls, filename): """ Load a collections class from a GeoJSON file of metadata """ with open(filename) as f: geoj = json.loads(f.read()) scenes = [GBDXScene(feature) for feature in geoj['features']] return cls(scenes, properties=geoj.get('properties', {}))
def prepare_geojson(self, geojson): props = geojson["properties"] # Store a stringified bounding box so that tools like # the spelunker can zoom to extent and stuff like that # (20150730/thisisaaronland) bbox = geojson.get("bbox", []) bbox = map(str, bbox) # oh python... bbox = ",".join(bbox) props["geom:bbox"] = bbox # ggggggrgrgrgrgrhhnhnnnhnhnhnhnhhzzzzpphphtttt - we shouldn't # have to do this but even with the enstringification below # ES tries to be too clever by half so in the interests of just # getting stuff done we're going to be ruthless about things... # (21050806/thisisaaronland) omgwtf = ( u"ne:fips_10_", u"ne:gdp_md_est", u"ne:geou_dif", u"ne:pop_est", u"ne:su_dif", u"ne:adm0_dif", u"ne:level", ) for bbq in omgwtf: if props.has_key(bbq): logging.warning("remove tag '%s' because ES suffers from E_EXCESSIVE_CLEVERNESS" % bbq) del (props[bbq]) # alt placetype names/ID placetype = props["wof:placetype"] placetype = mapzen.whosonfirst.placetypes.placetype(placetype) placetype_id = placetype.id() placetype_names = [] for n in placetype.names(): placetype_names.append(unicode(n)) props["wof:placetype_id"] = placetype_id props["wof:placetype_names"] = placetype_names # Names for k, v in props.items(): if not k.startswith("name:", False): continue # please actually parse me using mz-wof-names # https://github.com/whosonfirst/py-mapzen-whosonfirst-names # (20151030/thisisaaronland) parts = k.split("_x_") # https://github.com/whosonfirst/py-mapzen-whosonfirst-search/issues/5 # see above (20151030/thisisaaronland) if len(parts) == 2: label = parts[1] n_label = "name_%s" % label names = props.get(n_label, []) names.extend(v) props[n_label] = names # Misc counters conc = props.get("wof:concordances", {}) props["wof:concordances_count"] = len(conc.items()) props["geom:type"] = geojson["geometry"]["type"] # because ES suffers from E_EXCESSIVE_CLEVERNESS props = self.enstringify(props) return props