Example #1
0
    def __init__(self, terms, excep, canonize, weight=0):
        if weight is None:
            weight = 0
        self.weight = int(weight)
        self.canonize = canonize
        if self.canonize:
            terms = canonical_form(terms)

        if excep:
            self.pattern = r"(?<!{excep})\s*" + self.pattern
            if self.canonize:
                excep = canonical_form(excep)

        self.pattern = re.compile(self.pattern.format(term=terms, excep=excep))
Example #2
0
def add_geos(table):
    geocoder = Geocoder()
    terms_db = TermsDB()
    table_coords = []
    total = len(table)
    for index, row in table.iterrows():
        row_terms = []
        for cell in row:
            if type(cell) is str:
                canonical = canonical_form(cell)
                terms = terms_db.search(cell, canonical)
                if terms:
                    row_terms = row_terms + terms
                # for term in terms:
                #     # print(term)
                #     row_coords.append(geocoder.geocode(term))
                # if geo:
                #     geo.sort(key=lambda x: x[1])
                    # print(row_coords)
                # if not geo and cell not in EXC:
                #     a[cell] = 1
        gt = GeoEntity(row_terms)
        gt.geocode(geocoder)
        table_coords.append(gt)
        # print progress
        sys.stdout.write("\r %s / %s" % (str(index), total))
        sys.stdout.flush()
    print("")

    geocoder.close()

    return pd.concat(
        [table, pd.Series(table_coords, name="geoentity")],
        axis=1)
Example #3
0
 def geocode_list(self, strings):
     """Creates a GeoEntity with the strings geocoded."""
     all_terms = []
     for string in strings:
         canonical = canonical_form(string)
         terms = self.terms_db.search(string, canonical)
         if terms:
             all_terms += terms
     gt = GeoEntity(all_terms)
     gt.geocode(self)
     return gt
Example #4
0
def load_plans(ensemble_paths):
    plans_by_ensemble = dict()
    
    for (ensemble, path) in ensemble_paths.items():
        # load sampled ensemble, collect unique plans
        with open(path, 'rb') as f:
            raw_plans = pickle.load(f)
        plans = [canonical_form(plan) for plan in raw_plans]
        plans_by_ensemble[ensemble] = plans
    
    return plans_by_ensemble