def map_breweries_for_country(db, rb_id, iso_code):
    breweries_osm = get_osm_breweries('NO')
    breweries_rb = db.get_rb_breweries_for_country(rb_id)
    brewery_matcher = BreweryNameMatcher(breweries_rb)
    matches = []
    for brewery in breweries_osm['features']:
        props = brewery['properties']
        match = brewery_matcher.match_name(unicode(props['operator']))
        if match is None:
            match = brewery_matcher.match_name(unicode(props['name']))
        add = {
            'osm_id': brewery['id'],
            'ratebeer_id': None,
            'website': props['website'],
            'amenity': props['amenity'],
            'housenumber': props['housenumber'],
            'city': props['city'],
            'postcode': props['postcode'],
            'street': props['street'],
            'country': props['country'],
            'name': props['name'],
            'operator': props['operator'],
            'geom': json.dumps(brewery['geometry'])
        }
        if match is not None:
            add['ratebeer_id'] = match['id']
        matches.append(add)

    return matches
Example #2
0
def match_pol_breweries(breweries_pol, breweries_rb):
    grouped = defaultdict(list)

    brewery_matcher = BreweryNameMatcher(breweries_rb)
    for brewery_pol in breweries_pol:
        match = brewery_matcher.match_name(unicode(brewery_pol))
        if match is not None:
            grouped[match['id']].append(brewery_pol)
    return grouped
Example #3
0
def match_pol_breweries(breweries_pol, breweries_rb):
    grouped = defaultdict(list)

    brewery_matcher = BreweryNameMatcher(breweries_rb)
    for brewery_pol in breweries_pol:
        match = brewery_matcher.match_name(unicode(brewery_pol))
        if match is not None:
            grouped[match['id']].append(brewery_pol)
    return grouped
def compare_breweries_geojson(breweries, breweries_rb):
    breweries_rb = [b for b in breweries_rb if b['country'] == 154]
    matcher = BreweryNameMatcher(breweries_rb)

    for brewery in breweries['features']:
        brewery_name = brewery['properties']['name']
        match = matcher.match_name(brewery_name)
        if match is not None:
            brewery['properties']['ratebeer_id'] = match['id']
    return breweries
Example #5
0
def compare_breweries(pol_data, breweries_rb):
    breweries_pol = get_breweries(pol_data, 'Produsent')
    # breweries_rb = wrap_breweries(get_breweries(rb_data, 'brewery'))
    matcher = BreweryNameMatcher(breweries_rb)

    with open('data/nomatch.txt', 'w') as nomatch:
        with open('data/match.txt', 'w') as match_file:
            for brewery in breweries_pol:
                match = matcher.match_name(brewery)
                if match is None:
                    nomatch.write(brewery.encode('utf8') + '\n')
                else:
                    string = '%s: %s' % (brewery, match['name'])
                    match_file.write(string.encode('utf8') + '\n')
def compare_beers(pol_data, rb_beers, breweries_rb):

    fasit = get_textfile('beer_compare_fasit.txt')
    known_errors = get_textfile('err_polet.txt')

    breweries_pol = get_breweries(pol_data, 'Produsent')

    grouped = defaultdict(list)

    brewery_matcher = BreweryNameMatcher(breweries_rb)
    for brewery_pol in breweries_pol:
        match = brewery_matcher.match_name(brewery_pol)
        if match is not None:
            grouped[match['id']].append(brewery_pol)

    errors = []
    nomatches = []
    num_err = 0
    for key, value in grouped.iteritems():
        rb_brewery = find_in_list(breweries_rb, 'id', key)['name']
        rb_beers_for_brewery = findall_in_list(rb_beers, 'brewery_id', key)

        beer_matcher = BeerNameMatcher(rb_brewery, rb_beers_for_brewery, skip_retired=True)
        for pol_brewery in value:
            pol_beers = findall_in_list(pol_data, 'Produsent', pol_brewery)
            for pol_beer in pol_beers:
                pol_beer_name = pol_beer['Varenavn']
                abv = parse_pol_abv(pol_beer['Alkohol'])
                beer_match = beer_matcher.match_name(pol_beer_name, abv=abv)

                score = None
                if isinstance(beer_match, tuple):
                    score = beer_match[1]
                    beer_match = beer_match[0]

                nameline = None

                if beer_match is None:
                    nameline = '%s - %s' % (pol_brewery, pol_beer_name)

                    if nameline in known_errors:
                        num_err = num_err + 1
                    else:
                        f = find_in_fasit(nameline, fasit)
                        if f:
                            nomatches.append(f)
                        else:
                            nomatches.append(nameline)
                else:
                    nameline = '%s - %s :: %s - %s' % (pol_brewery, pol_beer_name, rb_brewery, beer_match['name'])
                    if nameline not in fasit:
                        if score is not None:
                            nameline = '%s (%s)' % (nameline, score)
                        errors.append(nameline)

    print '%s errors' % len(errors)
    print '%s nomatch' % len(nomatches)
    print '%s wrong from polet' % num_err
    with codecs.open('data/beer_errors.txt', 'w', 'utf-8') as err_file:
        err_file.write('NO MATCH\n\n')
        for error in nomatches:
            err_file.write('%s\n' % error)
        err_file.write('\n\nERR\n\n')
        for error in errors:
            err_file.write('%s\n' % error)
def compare_beers(pol_data, rb_beers, breweries_rb):

    fasit = get_textfile('beer_compare_fasit.txt')
    known_errors = get_textfile('err_polet.txt')

    breweries_pol = get_breweries(pol_data, 'Produsent')

    grouped = defaultdict(list)

    brewery_matcher = BreweryNameMatcher(breweries_rb)
    for brewery_pol in breweries_pol:
        match = brewery_matcher.match_name(brewery_pol)
        if match is not None:
            grouped[match['id']].append(brewery_pol)

    errors = []
    nomatches = []
    num_err = 0
    for key, value in grouped.iteritems():
        rb_brewery = find_in_list(breweries_rb, 'id', key)['name']
        rb_beers_for_brewery = findall_in_list(rb_beers, 'brewery_id', key)

        beer_matcher = BeerNameMatcher(rb_brewery,
                                       rb_beers_for_brewery,
                                       skip_retired=True)
        for pol_brewery in value:
            pol_beers = findall_in_list(pol_data, 'Produsent', pol_brewery)
            for pol_beer in pol_beers:
                pol_beer_name = pol_beer['Varenavn']
                abv = parse_pol_abv(pol_beer['Alkohol'])
                beer_match = beer_matcher.match_name(pol_beer_name, abv=abv)

                score = None
                if isinstance(beer_match, tuple):
                    score = beer_match[1]
                    beer_match = beer_match[0]

                nameline = None

                if beer_match is None:
                    nameline = '%s - %s' % (pol_brewery, pol_beer_name)

                    if nameline in known_errors:
                        num_err = num_err + 1
                    else:
                        f = find_in_fasit(nameline, fasit)
                        if f:
                            nomatches.append(f)
                        else:
                            nomatches.append(nameline)
                else:
                    nameline = '%s - %s :: %s - %s' % (
                        pol_brewery, pol_beer_name, rb_brewery,
                        beer_match['name'])
                    if nameline not in fasit:
                        if score is not None:
                            nameline = '%s (%s)' % (nameline, score)
                        errors.append(nameline)

    print '%s errors' % len(errors)
    print '%s nomatch' % len(nomatches)
    print '%s wrong from polet' % num_err
    with codecs.open('data/beer_errors.txt', 'w', 'utf-8') as err_file:
        err_file.write('NO MATCH\n\n')
        for error in nomatches:
            err_file.write('%s\n' % error)
        err_file.write('\n\nERR\n\n')
        for error in errors:
            err_file.write('%s\n' % error)