Exemple #1
0
    def process(self, text, country_filter):
        """
        The main processing function that extracts place names from text, does the 
        country-limited search, and returns the findings.

        Parameters
        ----------
        self: the Flask API

        text: A unicode string

        country_filter: A list containing an ISO 3 character country code

        Returns
        -------

        locations: a list of locations. Each location is a dictionary with keys
        lat, lon, searchterm, placename, countrycode, admin1.

        admin1 is the name of the state/region/governorate/province that the location is in.
        """

        locs = utilities.mitie_context(text, ner_model)
        locations = []
        for i in locs['entities']:
            if i['text'] in country_names:
                print " (Country/blacklist. Skipping...)"
            elif i['tag'] == "LOCATION" or i['tag'] == "Location":
                try:
                    # put this in query_geonames?
                    searchterm = re.sub(r"Governorate|District|Subdistrict|Airport",
                                        "", i['text']).strip()
                    searchterm = re.sub("Dar 'a", "Dar'a", searchterm)
                    feature_class = extract_feature_class(searchterm, i['text'],
                                                          i['context'])
                    cache_term = '___'.join([searchterm,
                                             ''.join(feature_class)])
                    try:
                        t = self.place_cache[cache_term]
                    except KeyError:
                        t = utilities.query_geonames(es_conn,
                                                     searchterm,
                                                     country_filter)
                        self.place_cache[cache_term] = t
                    loc = self.pick_best_result(t, i['text'], i['context'])
                    # loc is a nice format for debugging and looks like
                    # [35.13179, 36.75783, 'searchterm', u'matchname',
                    # u'feature_class', u'country_code3', u'admin1']:
                    if loc:
                        formatted_loc = {"lat": loc[0], "lon": loc[1],
                                         "searchterm": loc[2],
                                         "placename": loc[3],
                                         "countrycode": loc[5],
                                         "admin1" : loc[6]}
                        print('Formatted loc: {}'.format(formatted_loc))
                        locations.append(formatted_loc)
                except Exception as e:
                    print e
        return locations
Exemple #2
0
    def process(self, text, country_filter):
        """
        The main processing function that extracts place names from text, does the
        country-limited search, and returns the findings.

        Parameters
        ----------
        self: the Flask API

        text: A unicode string

        country_filter: A list containing an ISO 3 character country code

        Returns
        -------

        locations: a list of locations. Each location is a dictionary with keys
        lat, lon, searchterm, placename, countrycode, admin1.

        admin1 is the name of the state/region/governorate/province that the location is in.
        """

        locs = utilities.mitie_context(text, self.ner_model)
        locations = []
        for i in locs['entities']:
            if i['text'] in country_names:
                print " (Country/blacklist. Skipping...)"
            elif i['tag'] == "LOCATION" or i['tag'] == "Location":
                try:
                    # put this in query_geonames?
                    searchterm = re.sub(r"Governorate|District|Subdistrict|Airport",
                                        "", i['text']).strip()
                    searchterm = re.sub("Dar 'a", "Dar'a", searchterm)
                    feature_class = extract_feature_class(searchterm, i['text'],
                                                          i['context'])
                    cache_term = '___'.join([searchterm,
                                             ''.join(feature_class)])
                    try:
                        t = self.place_cache[cache_term]
                    except KeyError:
                        t = utilities.query_geonames(self.es_conn,
                                                     searchterm,
                                                     country_filter)
                        self.place_cache[cache_term] = t
                    loc = self.pick_best_result(t, i['text'], i['context'])
                    # loc is a nice format for debugging and looks like
                    # [35.13179, 36.75783, 'searchterm', u'matchname',
                    # u'feature_class', u'country_code3', u'admin1']:
                    if loc:
                        formatted_loc = {"lat": loc[0], "lon": loc[1],
                                         "searchterm": loc[2],
                                         "placename": loc[3],
                                         "countrycode": loc[5],
                                         "admin1" : loc[6]}
                        print('Formatted loc: {}'.format(formatted_loc))
                        locations.append(formatted_loc)
                except Exception as e:
                    print e
        return locations
Exemple #3
0
    def post(self):
        args = self.reqparse.parse_args()
        text = args['text']
        country_filter = args['country']
        if not country_filter:
            try:
                country_filter = CountryAPI().process(text)
            except ValueError:
                return json.dumps(locations)

        out = utilities.mitie_context(text, ner_model)

        located = self.process(out, country_filter)

        return located
Exemple #4
0
 def process(self, text, country_filter):
     locs = utilities.mitie_context(text, ner_model)
     locations = []
     for i in locs['entities']:
         if i['text'] in country_names:
             print " (Country/blacklist. Skipping...)"
         elif i['tag'] == "LOCATION" or i['tag'] == "Location":
             try:
                 # put this in query_geonames?
                 searchterm = re.sub(r"Governorate|District|Subdistrict|Airport",
                                     "", i['text']).strip()
                 searchterm = re.sub("Dar 'a", "Dar'a", searchterm)
                 feature_class = extract_feature_class(searchterm, i['text'],
                                                       i['context'])
                 cache_term = '___'.join([searchterm,
                                          ''.join(feature_class)])
                 try:
                     t = self.place_cache[cache_term]
                 except KeyError:
                     t = utilities.query_geonames(es_conn,
                                                  searchterm,
                                                  country_filter)
                     self.place_cache[cache_term] = t
                 loc = pick_best_result2(t, i['text'], i['context'])
                 # loc is a nice format for debugging and looks like
                 # [35.13179, 36.75783, 'searchterm', u'matchname',
                 # u'feature_class', u'country_code3']:
                 if loc:
                     formatted_loc = {"lat": loc[0], "lon": loc[1],
                                      "searchterm": loc[2],
                                      "placename": loc[3],
                                      "countrycode": loc[5]}
                     print('Formatted loc: {}'.format(formatted_loc))
                     locations.append(formatted_loc)
             except Exception as e:
                 print e
     return locations