def map_breweries_for_country(db, rb_id, iso_code): breweries_osm = get_osm_breweries('NO') breweries_rb = db.get_rb_breweries_for_country(rb_id) brewery_matcher = BreweryNameMatcher(breweries_rb) matches = [] for brewery in breweries_osm['features']: props = brewery['properties'] match = brewery_matcher.match_name(unicode(props['operator'])) if match is None: match = brewery_matcher.match_name(unicode(props['name'])) add = { 'osm_id': brewery['id'], 'ratebeer_id': None, 'website': props['website'], 'amenity': props['amenity'], 'housenumber': props['housenumber'], 'city': props['city'], 'postcode': props['postcode'], 'street': props['street'], 'country': props['country'], 'name': props['name'], 'operator': props['operator'], 'geom': json.dumps(brewery['geometry']) } if match is not None: add['ratebeer_id'] = match['id'] matches.append(add) return matches
def match_pol_breweries(breweries_pol, breweries_rb): grouped = defaultdict(list) brewery_matcher = BreweryNameMatcher(breweries_rb) for brewery_pol in breweries_pol: match = brewery_matcher.match_name(unicode(brewery_pol)) if match is not None: grouped[match['id']].append(brewery_pol) return grouped
def compare_breweries_geojson(breweries, breweries_rb): breweries_rb = [b for b in breweries_rb if b['country'] == 154] matcher = BreweryNameMatcher(breweries_rb) for brewery in breweries['features']: brewery_name = brewery['properties']['name'] match = matcher.match_name(brewery_name) if match is not None: brewery['properties']['ratebeer_id'] = match['id'] return breweries
def compare_breweries(pol_data, breweries_rb): breweries_pol = get_breweries(pol_data, 'Produsent') # breweries_rb = wrap_breweries(get_breweries(rb_data, 'brewery')) matcher = BreweryNameMatcher(breweries_rb) with open('data/nomatch.txt', 'w') as nomatch: with open('data/match.txt', 'w') as match_file: for brewery in breweries_pol: match = matcher.match_name(brewery) if match is None: nomatch.write(brewery.encode('utf8') + '\n') else: string = '%s: %s' % (brewery, match['name']) match_file.write(string.encode('utf8') + '\n')
def compare_beers(pol_data, rb_beers, breweries_rb): fasit = get_textfile('beer_compare_fasit.txt') known_errors = get_textfile('err_polet.txt') breweries_pol = get_breweries(pol_data, 'Produsent') grouped = defaultdict(list) brewery_matcher = BreweryNameMatcher(breweries_rb) for brewery_pol in breweries_pol: match = brewery_matcher.match_name(brewery_pol) if match is not None: grouped[match['id']].append(brewery_pol) errors = [] nomatches = [] num_err = 0 for key, value in grouped.iteritems(): rb_brewery = find_in_list(breweries_rb, 'id', key)['name'] rb_beers_for_brewery = findall_in_list(rb_beers, 'brewery_id', key) beer_matcher = BeerNameMatcher(rb_brewery, rb_beers_for_brewery, skip_retired=True) for pol_brewery in value: pol_beers = findall_in_list(pol_data, 'Produsent', pol_brewery) for pol_beer in pol_beers: pol_beer_name = pol_beer['Varenavn'] abv = parse_pol_abv(pol_beer['Alkohol']) beer_match = beer_matcher.match_name(pol_beer_name, abv=abv) score = None if isinstance(beer_match, tuple): score = beer_match[1] beer_match = beer_match[0] nameline = None if beer_match is None: nameline = '%s - %s' % (pol_brewery, pol_beer_name) if nameline in known_errors: num_err = num_err + 1 else: f = find_in_fasit(nameline, fasit) if f: nomatches.append(f) else: nomatches.append(nameline) else: nameline = '%s - %s :: %s - %s' % (pol_brewery, pol_beer_name, rb_brewery, beer_match['name']) if nameline not in fasit: if score is not None: nameline = '%s (%s)' % (nameline, score) errors.append(nameline) print '%s errors' % len(errors) print '%s nomatch' % len(nomatches) print '%s wrong from polet' % num_err with codecs.open('data/beer_errors.txt', 'w', 'utf-8') as err_file: err_file.write('NO MATCH\n\n') for error in nomatches: err_file.write('%s\n' % error) err_file.write('\n\nERR\n\n') for error in errors: err_file.write('%s\n' % error)
def compare_beers(pol_data, rb_beers, breweries_rb): fasit = get_textfile('beer_compare_fasit.txt') known_errors = get_textfile('err_polet.txt') breweries_pol = get_breweries(pol_data, 'Produsent') grouped = defaultdict(list) brewery_matcher = BreweryNameMatcher(breweries_rb) for brewery_pol in breweries_pol: match = brewery_matcher.match_name(brewery_pol) if match is not None: grouped[match['id']].append(brewery_pol) errors = [] nomatches = [] num_err = 0 for key, value in grouped.iteritems(): rb_brewery = find_in_list(breweries_rb, 'id', key)['name'] rb_beers_for_brewery = findall_in_list(rb_beers, 'brewery_id', key) beer_matcher = BeerNameMatcher(rb_brewery, rb_beers_for_brewery, skip_retired=True) for pol_brewery in value: pol_beers = findall_in_list(pol_data, 'Produsent', pol_brewery) for pol_beer in pol_beers: pol_beer_name = pol_beer['Varenavn'] abv = parse_pol_abv(pol_beer['Alkohol']) beer_match = beer_matcher.match_name(pol_beer_name, abv=abv) score = None if isinstance(beer_match, tuple): score = beer_match[1] beer_match = beer_match[0] nameline = None if beer_match is None: nameline = '%s - %s' % (pol_brewery, pol_beer_name) if nameline in known_errors: num_err = num_err + 1 else: f = find_in_fasit(nameline, fasit) if f: nomatches.append(f) else: nomatches.append(nameline) else: nameline = '%s - %s :: %s - %s' % ( pol_brewery, pol_beer_name, rb_brewery, beer_match['name']) if nameline not in fasit: if score is not None: nameline = '%s (%s)' % (nameline, score) errors.append(nameline) print '%s errors' % len(errors) print '%s nomatch' % len(nomatches) print '%s wrong from polet' % num_err with codecs.open('data/beer_errors.txt', 'w', 'utf-8') as err_file: err_file.write('NO MATCH\n\n') for error in nomatches: err_file.write('%s\n' % error) err_file.write('\n\nERR\n\n') for error in errors: err_file.write('%s\n' % error)