def process(self, text, country_filter): """ The main processing function that extracts place names from text, does the country-limited search, and returns the findings. Parameters ---------- self: the Flask API text: A unicode string country_filter: A list containing an ISO 3 character country code Returns ------- locations: a list of locations. Each location is a dictionary with keys lat, lon, searchterm, placename, countrycode, admin1. admin1 is the name of the state/region/governorate/province that the location is in. """ locs = utilities.mitie_context(text, ner_model) locations = [] for i in locs['entities']: if i['text'] in country_names: print " (Country/blacklist. Skipping...)" elif i['tag'] == "LOCATION" or i['tag'] == "Location": try: # put this in query_geonames? searchterm = re.sub(r"Governorate|District|Subdistrict|Airport", "", i['text']).strip() searchterm = re.sub("Dar 'a", "Dar'a", searchterm) feature_class = extract_feature_class(searchterm, i['text'], i['context']) cache_term = '___'.join([searchterm, ''.join(feature_class)]) try: t = self.place_cache[cache_term] except KeyError: t = utilities.query_geonames(es_conn, searchterm, country_filter) self.place_cache[cache_term] = t loc = self.pick_best_result(t, i['text'], i['context']) # loc is a nice format for debugging and looks like # [35.13179, 36.75783, 'searchterm', u'matchname', # u'feature_class', u'country_code3', u'admin1']: if loc: formatted_loc = {"lat": loc[0], "lon": loc[1], "searchterm": loc[2], "placename": loc[3], "countrycode": loc[5], "admin1" : loc[6]} print('Formatted loc: {}'.format(formatted_loc)) locations.append(formatted_loc) except Exception as e: print e return locations
def process(self, text, country_filter): """ The main processing function that extracts place names from text, does the country-limited search, and returns the findings. Parameters ---------- self: the Flask API text: A unicode string country_filter: A list containing an ISO 3 character country code Returns ------- locations: a list of locations. Each location is a dictionary with keys lat, lon, searchterm, placename, countrycode, admin1. admin1 is the name of the state/region/governorate/province that the location is in. """ locs = utilities.mitie_context(text, self.ner_model) locations = [] for i in locs['entities']: if i['text'] in country_names: print " (Country/blacklist. Skipping...)" elif i['tag'] == "LOCATION" or i['tag'] == "Location": try: # put this in query_geonames? searchterm = re.sub(r"Governorate|District|Subdistrict|Airport", "", i['text']).strip() searchterm = re.sub("Dar 'a", "Dar'a", searchterm) feature_class = extract_feature_class(searchterm, i['text'], i['context']) cache_term = '___'.join([searchterm, ''.join(feature_class)]) try: t = self.place_cache[cache_term] except KeyError: t = utilities.query_geonames(self.es_conn, searchterm, country_filter) self.place_cache[cache_term] = t loc = self.pick_best_result(t, i['text'], i['context']) # loc is a nice format for debugging and looks like # [35.13179, 36.75783, 'searchterm', u'matchname', # u'feature_class', u'country_code3', u'admin1']: if loc: formatted_loc = {"lat": loc[0], "lon": loc[1], "searchterm": loc[2], "placename": loc[3], "countrycode": loc[5], "admin1" : loc[6]} print('Formatted loc: {}'.format(formatted_loc)) locations.append(formatted_loc) except Exception as e: print e return locations
def post(self): args = self.reqparse.parse_args() text = args['text'] country_filter = args['country'] if not country_filter: try: country_filter = CountryAPI().process(text) except ValueError: return json.dumps(locations) out = utilities.mitie_context(text, ner_model) located = self.process(out, country_filter) return located
def process(self, text, country_filter): locs = utilities.mitie_context(text, ner_model) locations = [] for i in locs['entities']: if i['text'] in country_names: print " (Country/blacklist. Skipping...)" elif i['tag'] == "LOCATION" or i['tag'] == "Location": try: # put this in query_geonames? searchterm = re.sub(r"Governorate|District|Subdistrict|Airport", "", i['text']).strip() searchterm = re.sub("Dar 'a", "Dar'a", searchterm) feature_class = extract_feature_class(searchterm, i['text'], i['context']) cache_term = '___'.join([searchterm, ''.join(feature_class)]) try: t = self.place_cache[cache_term] except KeyError: t = utilities.query_geonames(es_conn, searchterm, country_filter) self.place_cache[cache_term] = t loc = pick_best_result2(t, i['text'], i['context']) # loc is a nice format for debugging and looks like # [35.13179, 36.75783, 'searchterm', u'matchname', # u'feature_class', u'country_code3']: if loc: formatted_loc = {"lat": loc[0], "lon": loc[1], "searchterm": loc[2], "placename": loc[3], "countrycode": loc[5]} print('Formatted loc: {}'.format(formatted_loc)) locations.append(formatted_loc) except Exception as e: print e return locations