Ejemplo n.º 1
0
        found_albums = []
        albums = set([r[0] for r in db.execute(query_artist_albums, (id, id))])
        albums_to_ignore = set()
        for album in albums:
            if mangle_name(name) in mangle_name(album):
                albums_to_ignore.add(album)
        albums -= albums_to_ignore
        if not albums:
            continue
        for album in albums:
            mangled_album = mangle_name(album)
            if len(mangled_album) > 4 and mangled_album in page:
                found_albums.append(album)
        ratio = len(found_albums) * 1.0 / len(albums)
        print ' * ratio: %s, has albums: %s, found albums: %s' % (ratio, len(albums), len(found_albums))
        min_ratio = 0.2
        if len(found_albums) < 2:
            continue
        #if ratio < min_ratio:
        #    continue
        url = 'http://ko.wikipedia.org/wiki/%s' % (quote_page_title(page_title),)
        text = 'Matched based on the name. The page mentions %s.' % (join_names('album', found_albums),)
        print ' * linking to %s' % (url,)
        print ' * edit note: %s' % (text,)
        mb.add_url("artist", gid, 179, url, text)
        break
    db.execute("INSERT INTO bot_wp_artist_ko (gid) VALUES (%s)", (gid,))

print processed, skipped

Ejemplo n.º 2
0
        if "disambiguationpages" in page:
            print " * disambiguation or album page, skipping"
            continue
        if "recordlabels" not in page:
            print " * not a record label page, skipping"
            continue
        page_title = pages[0]["title"]
        print ' * trying article "%s"' % (page_title,)
        artists = set([r[0] for r in db.execute(query_label_artists, (id,))])
        if name in artists:
            artists.remove(name)
        if not artists:
            continue
        found_artists = []
        for artist in artists:
            mangled_artist = mangle_name(artist)
            if len(mangled_artist) > 5 and mangled_artist in page:
                found_artists.append(artist)
        ratio = len(found_artists) * 1.0 / len(artists)
        print " * ratio: %s, has artists: %s, found artists: %s" % (ratio, len(artists), len(found_artists))
        if len(found_artists) < 2:
            continue
        url = "https://en.wikipedia.org/wiki/%s" % (quote_page_title(page_title),)
        text = "Matched based on the name. The page mentions %s." % (join_names("artist", found_artists),)
        print " * linking to %s" % (url,)
        print " * edit note: %s" % (text,)
        time.sleep(60)
        mb.add_url("label", gid, 216, url, text)
        break
    db.execute("INSERT INTO bot_wp_label (gid) VALUES (%s)", (gid,))
Ejemplo n.º 3
0
def main(verbose=False):
    download_if_modified(bbc_sitemap_url, bbc_sitemap)

    db = db_connect()

    release_redirects = dict(get_release_redirects(db))
    release_groups = dict(get_release_groups(db))
    releases = dict(get_releases(db))
    bbc_reviews_set = set((gid, url) for gid, url in db.execute("""SELECT gid, url FROM bot_bbc_reviews_set"""))

    review_urls = defaultdict(set)
    for rg, url in get_review_urls(db):
        review_urls[rg].add(url)

    cleanup_review_urls = set()
    for cleanup_url in cleanup_urls:
        f = urllib.urlopen(cleanup_url)
        cleanup_review_urls |= set(re.findall(ur"http://www.bbc.co.uk/music/reviews/[0-9a-z]+", f.read()))

    editor_id = db.execute("""SELECT id FROM editor WHERE name = %s""", cfg.MB_USERNAME).first()[0]
    mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE, editor_id=editor_id)

    normal_edits_left, edits_left = mb.edits_left()

    bbc_reviews = list(load_bbc_reviews(bbc_sitemap))
    count = len(bbc_reviews)
    for i, (review_url, release_url, title) in enumerate(bbc_reviews):
        if normal_edits_left <= 0:
            break
        if verbose:
            out(u"%d/%d - %.2f%%" % (i + 1, count, (i + 1) * 100.0 / count))
            out(u"%s %s" % (title, review_url))
            out(release_url)
        if review_url in cleanup_review_urls:
            continue
        release_gid = utils.extract_mbid(release_url, "release")
        row = release_redirects.get(release_gid)
        if not row:
            row = releases.get(release_gid)
        if not row:
            if verbose:
                out("  non-existant release in review %s" % review_url)
            continue
        rg, ac, release_name = row
        gid, name = release_groups[rg]
        if review_url in review_urls[rg]:
            continue
        if (gid, review_url) in bbc_reviews_set:
            if verbose:
                out(u"  already linked earlier (probably got removed by some editor!")
            continue
        mb_title = "%s - %s" % (artist_credit(db, ac), release_name)
        if not are_similar(title, mb_title):
            if verbose:
                out(u"  similarity too small: %s <-> %s" % (title, mb_title))
                # out(u'|-\n| [%s %s]\n| [[ReleaseGroup:%s|%s]]\n| [[Release:%s|%s]]' % (review_url, bbc_name, gid, name, release_gid, release_name))
            continue
        text = (
            u"Review is in BBC mapping [1], and review name “%s” is"
            " similar to the release name. If this is wrong,"
            " please note it here and put the correct mapping in"
            " the wiki [2].\n\n[1] %s\n[2] %s" % (title, bbc_sitemap_url, cleanup_urls[0])
        )
        text += "\n\n%s" % prog
        try:
            out(u"http://musicbrainz.org/release-group/%s  ->  %s" % (gid, review_url))
            mb.add_url("release_group", gid, 94, review_url, text, auto=False)
            db.execute("INSERT INTO bot_bbc_reviews_set (gid,url) VALUES (%s,%s)", (gid, review_url))
            bbc_reviews_set.add((gid, review_url))
            normal_edits_left -= 1
        except (urllib2.HTTPError, urllib2.URLError, socket.timeout) as e:
            out(e)
Ejemplo n.º 4
0
        if (found_artists):
            reasons.append(join_names('related artist', found_artists))
            out(' * has related artists: %s, found related artists: %s' % (len(artists), len(found_artists)))

        # Determine if artist matches
        if not found_albums and not found_works and not found_artists and not found_urls:
            continue

        # Check if wikipedia lang is compatible with artist country
        if wp_lang != 'en' or wp_lang in acceptable_countries_for_lang:
            if wp_lang not in acceptable_countries_for_lang:
                continue
            country, country_reasons = determine_country(wikipage)
            if (country not in acceptable_countries_for_lang[wp_lang]):
                colored_out(bcolors.HEADER, ' * artist country (%s) not compatible with wiki language (%s)' % (country, wp_lang))
                continue

        wp_url = 'https://%s.wikipedia.org/wiki/%s' % (wp_lang, quote_page_title(page_title),)
        wd_url = 'https://www.wikidata.org/wiki/%s' % wikipage.wikidata_id.upper()
        text = 'Wikidata identifier found from matching Wikipedia page %s. The page mentions %s.' % (wp_url, ', '.join(reasons))
        colored_out(bcolors.OKGREEN, ' * linking to %s' % (wd_url,))
        out(' * edit note: %s' % (text,))
        time.sleep(60)
        mb.add_url("artist", artist['gid'], 352, wd_url, text)
        break

    if artist['processed'] is None:
        db.execute("INSERT INTO bot_wp_artist_link (gid, lang) VALUES (%s, %s)", (artist['gid'], wp_lang))
    else:
        db.execute("UPDATE bot_wp_artist_link SET processed = now() WHERE (gid, lang) = (%s, %s)", (artist['gid'], wp_lang))
Ejemplo n.º 5
0
            country, country_reasons = determine_country(wikipage)
            if (country not in acceptable_countries_for_lang[wp_lang]):
                colored_out(
                    bcolors.HEADER,
                    ' * artist country (%s) not compatible with wiki language (%s)'
                    % (country, wp_lang))
                continue

        wp_url = 'http://%s.wikipedia.org/wiki/%s' % (
            wp_lang,
            quote_page_title(page_title),
        )
        wd_url = 'http://www.wikidata.org/wiki/%s' % wikipage.wikidata_id.upper(
        )
        text = 'Wikidata identifier found from matching Wikipedia page %s. The page mentions %s.' % (
            wp_url, ', '.join(reasons))
        colored_out(bcolors.OKGREEN, ' * linking to %s' % (wd_url, ))
        out(' * edit note: %s' % (text, ))
        time.sleep(60)
        mb.add_url("artist", artist['gid'], 352, wd_url, text)
        break

    if artist['processed'] is None:
        db.execute(
            "INSERT INTO bot_wp_artist_link (gid, lang) VALUES (%s, %s)",
            (artist['gid'], wp_lang))
    else:
        db.execute(
            "UPDATE bot_wp_artist_link SET processed = now() WHERE (gid, lang) = (%s, %s)",
            (artist['gid'], wp_lang))
Ejemplo n.º 6
0
            continue
        found_tracks = []
        tracks = set([r[0] for r in db.execute(query_album_tracks, (rg_id,))])
        tracks_to_ignore = set()
        for track in tracks:
            mangled_track = mangle_name(track)
            if len(mangled_track) <= 4 or mangle_name(rg_name) in mangle_name(track):
                tracks_to_ignore.add(track)
        tracks -= tracks_to_ignore
        if len(tracks) < 5:
            continue
        for track in tracks:
            mangled_track = mangle_name(track)
            if len(mangled_track) > 4 and mangled_track in page:
                found_tracks.append(track)
            else:
                out(" * track %s not found" % (track,))
        ratio = len(found_tracks) * 1.0 / len(tracks)
        out(' * ratio: %s, has tracks: %s, found tracks: %s' % (ratio, len(tracks), len(found_tracks)))
        min_ratio = 0.8 if len(rg_name) > 4 else 1.0
        if ratio < min_ratio:
            continue
        text = 'Matched based on the name. The page mentions artist "%s" and %s.' % (ac_name, join_names('track', found_tracks),)
        out(' * linking to %s' % (url,))
        out(' * edit note: %s' % (text,))
        time.sleep(30)
        mb.add_url("release_group", rg_gid, 89, url, text)
        break
    db.execute("INSERT INTO bot_wp_rg (gid) VALUES (%s)", (rg_gid,))

Ejemplo n.º 7
0
        if 'recordlabels' not in page:
            print ' * not a record label page, skipping'
            continue
        page_title = pages[0]['title']
        print ' * trying article "%s"' % (page_title, )
        artists = set([r[0] for r in db.execute(query_label_artists, (id, ))])
        if name in artists:
            artists.remove(name)
        if not artists:
            continue
        found_artists = []
        for artist in artists:
            mangled_artist = mangle_name(artist)
            if len(mangled_artist) > 5 and mangled_artist in page:
                found_artists.append(artist)
        ratio = len(found_artists) * 1.0 / len(artists)
        print ' * ratio: %s, has artists: %s, found artists: %s' % (
            ratio, len(artists), len(found_artists))
        if len(found_artists) < 2:
            continue
        url = 'https://en.wikipedia.org/wiki/%s' % (
            quote_page_title(page_title), )
        text = 'Matched based on the name. The page mentions %s.' % (
            join_names('artist', found_artists), )
        print ' * linking to %s' % (url, )
        print ' * edit note: %s' % (text, )
        time.sleep(60)
        mb.add_url("label", gid, 216, url, text)
        break
    db.execute("INSERT INTO bot_wp_label (gid) VALUES (%s)", (gid, ))
        found_albums = []
        albums = set([r[0] for r in db.execute(query_artist_albums, (id, id))])
        albums_to_ignore = set()
        for album in albums:
            if mangle_name(name) in mangle_name(album):
                albums_to_ignore.add(album)
        albums -= albums_to_ignore
        if not albums:
            continue
        for album in albums:
            mangled_album = mangle_name(album)
            if len(mangled_album) > 4 and mangled_album in page:
                found_albums.append(album)
        ratio = len(found_albums) * 1.0 / len(albums)
        print ' * ratio: %s, has albums: %s, found albums: %s' % (ratio, len(albums), len(found_albums))
        min_ratio = 0.2
        if len(found_albums) < 2:
            continue
        #if ratio < min_ratio:
        #    continue
        url = 'http://ko.wikipedia.org/wiki/%s' % (quote_page_title(page_title),)
        text = 'Matched based on the name. The page mentions %s.' % (join_names('album', found_albums),)
        print ' * linking to %s' % (url,)
        print ' * edit note: %s' % (text,)
        mb.add_url("artist", gid, 179, url, text)
        break
    db.execute("INSERT INTO bot_wp_artist_ko (gid) VALUES (%s)", (gid,))

print processed, skipped

Ejemplo n.º 9
0
            if wp_lang not in acceptable_countries_for_lang:
                continue
            country, country_reasons = determine_country(wikipage)
            if (country not in acceptable_countries_for_lang[wp_lang]):
                colored_out(
                    bcolors.HEADER,
                    ' * artist country (%s) not compatible with wiki language (%s)'
                    % (country, wp_lang))
                continue

        url = 'http://%s.wikipedia.org/wiki/%s' % (
            wp_lang,
            quote_page_title(page_title),
        )
        text = 'Matched based on the name. The page mentions %s.' % (
            ', '.join(reasons), )
        colored_out(bcolors.OKGREEN, ' * linking to %s' % (url, ))
        out(' * edit note: %s' % (text, ))
        time.sleep(60)
        mb.add_url("artist", artist['gid'], 179, url, text)
        break

    if artist['processed'] is None:
        db.execute(
            "INSERT INTO bot_wp_artist_link (gid, lang) VALUES (%s, %s)",
            (artist['gid'], wp_lang))
    else:
        db.execute(
            "UPDATE bot_wp_artist_link SET processed = now() WHERE (gid, lang) = (%s, %s)",
            (artist['gid'], wp_lang))
for i, (rg, gid, name) in enumerate(itertools.chain(*rg_grouped)):
    urls = set(u[0] for u in db.execute(query_rg_release_discogs, rg))
    if len(urls) < 2:
        continue
    out(u'%d/%d - %.2f%%' % (i, count, i * 100.0 / count))
    out(u'%s http://musicbrainz.org/release-group/%s' % (name, gid))
    masters = list(discogs_get_master(urls))
    if len(masters) == 0:
        out(u'  aborting, no Discogs master!')
        continue
    if len(set(masters)) > 1:
        out(u'  aborting, releases with different Discogs master in one group!'
            )
        continue
    if len(masters) != len(urls):
        out(u'  aborting, releases without Discogs master in group!')
        continue
    master_name, master_id, master_artists = masters[0]
    ratio = Levenshtein.ratio(master_name.lower(), name.lower())
    if ratio < 0.8:
        out(u'  Similarity ratio too small: %.2f' % ratio)
        continue
    master_url = 'http://www.discogs.com/master/%d' % master_id
    text = u'There are %d distinct Discogs links in this release group, and all point to this master URL.\n' % len(
        urls)
    text += u'The name of the Discogs master is “%s” (similarity: %.0f%%)' % (
        master_name, ratio * 100)
    text += u' by %s.' % master_artists
    out(u'  %s\n  %s' % (master_url, text))
    mb.add_url('release_group', gid, 90, master_url, text)
Ejemplo n.º 11
0
            mangled_track = mangle_name(track)
            if len(mangled_track) > 4 and mangled_track in page:
                found_tracks.append(track)
        ratio = len(found_tracks) * 1.0 / len(tracks)
        out(' * ratio: %s, has tracks: %s, found tracks: %s' %
            (ratio, len(tracks), len(found_tracks)))
        min_ratio = 0.7 if len(rg_name) > 4 else 1.0
        if ratio < min_ratio:
            colored_out(bcolors.WARNING,
                        '  => ratio too low (min = %s)' % min_ratio)
            continue
        auto = ratio > 0.75 and (rg_sec_types is None or
                                 ('Compilation' not in rg_sec_types
                                  and 'Soundtrack' not in rg_sec_types))
        text = 'Matched based on the name. The page mentions artist "%s" and %s.' % (
            ac_name,
            join_names('track', found_tracks),
        )
        colored_out(bcolors.OKGREEN, ' * linking to %s' % (url, ))
        out(' * edit note: %s' % (text, ))
        time.sleep(5)
        mb.add_url("release_group", rg_gid, 89, url, text, auto=auto)
        break
    if processed is None:
        db.execute("INSERT INTO bot_wp_rg_link (gid, lang) VALUES (%s, %s)",
                   (rg_gid, wp_lang))
    else:
        db.execute(
            "UPDATE bot_wp_rg_link SET processed = now() WHERE (gid, lang) = (%s, %s)",
            (rg_gid, wp_lang))
Ejemplo n.º 12
0
            except ValueError:
                pass
            except urllib2.HTTPError:
                pass
    for shs_artist in shs_artists:
        shs_artist_name = mangle_name(re.sub(' \[\d+\]$', '', shs_artist['commonName']))
        mb_artist_name = mangle_name(artist['name'])
        if shs_artist_name == mb_artist_name:
            artist_uri = shs_artist['uri']
            break
        elif similarity2(to_unicode(shs_artist_name), to_unicode(mb_artist_name)) > 0.85:
            print " * '%s' has a similarity of %.2f" % (shs_artist['commonName'], similarity2(to_unicode(shs_artist_name), to_unicode(mb_artist_name)))
            artist_uri = shs_artist['uri']
            break

    if artist_uri:
        matched_artists.add(artist['gid'])
        colored_out(bcolors.HEADER, ' * using %s, found artist SHS URL: %s' % (artist['shs_url'], artist_uri))
        edit_note = 'Guessing artist SecondHandSongs URL from work https://musicbrainz.org/work/%s linked to %s' % (artist['work_gid'], artist['shs_url'])
        out(' * edit note: %s' % (edit_note,))
        
        mb.add_url('artist', artist['gid'], str(307), artist_uri, edit_note)
    else:
        colored_out(bcolors.NONE, ' * using %s, no artist SHS URL has been found' % (artist['shs_url'],))

    if artist['processed'] is None and artist['gid'] not in seen_artists:
        db.execute("INSERT INTO bot_shs_link_artist (artist) VALUES (%s)", (artist['gid'],))
    else:
        db.execute("UPDATE bot_shs_link_artist SET processed = now() WHERE artist = %s", (artist['gid'],))
    seen_artists.add(artist['gid'])
Ejemplo n.º 13
0
    artist_key = 'cdbaby:' + album['artist_cdbaby_id']
    if 'type' not in album:
        album['type'] = 'album'
    album_url = 'http://www.cdbaby.com/cd/' + album['_id'].split(':')[1]
    print "adding", album_url
    if 'artist_mbid' not in album:
        artist = db.artists.find_one(artist_key)
        if not artist:
            artist_url = 'http://www.cdbaby.com/Artist/' + album['artist_cdbaby_id']
            mbid = mb.add_artist({'name': album['artist']}, artist_url)
            artist = {'_id': artist_key, 'mbid': mbid}
            db.artists.save(artist)
            print 'added artist', mbid
        album['artist_mbid'] = artist['mbid']
    #pprint.pprint(album)
    edit_note = album_url
    mbid = mb.add_release(album, edit_note)
    mb.add_url('release', mbid, 78, album_url)
    album['mbid'] = mbid
    album['status']['imported'] = True
    db.albums.save(album)
    print 'added release', mbid

    #form = album_to_form(album)
    #print '<form action="http://musicbrainz.org/release/add" method="post">'
    #for name, value in form.iteritems():
    #    print '<input type="hidden" name="%s" value="%s" />' % (html_escape(name), html_escape(unicode(value)))
    #print '<input type="submit" value="Add Release">'
    #print '</form>'

Ejemplo n.º 14
0
    artist_key = 'cdbaby:' + album['artist_cdbaby_id']
    if 'type' not in album:
        album['type'] = 'album'
    album_url = 'http://www.cdbaby.com/cd/' + album['_id'].split(':')[1]
    print "adding", album_url
    if 'artist_mbid' not in album:
        artist = db.artists.find_one(artist_key)
        if not artist:
            artist_url = 'http://www.cdbaby.com/Artist/' + album['artist_cdbaby_id']
            mbid = mb.add_artist({'name': album['artist']}, artist_url)
            artist = {'_id': artist_key, 'mbid': mbid}
            db.artists.save(artist)
            print 'added artist', mbid
        album['artist_mbid'] = artist['mbid']
    #pprint.pprint(album)
    edit_note = album_url
    mbid = mb.add_release(album, edit_note)
    mb.add_url('release', mbid, 78, album_url)
    album['mbid'] = mbid
    album['status']['imported'] = True
    db.albums.save(album)
    print 'added release', mbid

    #form = album_to_form(album)
    #print '<form action="https://musicbrainz.org/release/add" method="post">'
    #for name, value in form.iteritems():
    #    print '<input type="hidden" name="%s" value="%s" />' % (html_escape(name), html_escape(unicode(value)))
    #print '<input type="submit" value="Add Release">'
    #print '</form>'

Ejemplo n.º 15
0
            mangled_track = mangle_name(track)
            if len(mangled_track) <= 4 or mangle_name(rg_name) in mangle_name(track):
                tracks_to_ignore.add(track)
        tracks -= tracks_to_ignore
        if len(tracks) < 5:
            continue
        for track in tracks:
            mangled_track = mangle_name(track)
            if len(mangled_track) > 4 and mangled_track in page:
                found_tracks.append(track)
        ratio = len(found_tracks) * 1.0 / len(tracks)
        out(' * ratio: %s, has tracks: %s, found tracks: %s' % (ratio, len(tracks), len(found_tracks)))
        min_ratio = 0.7 if len(rg_name) > 4 else 1.0
        if ratio < min_ratio:
            colored_out(bcolors.WARNING, '  => ratio too low (min = %s)' % min_ratio)
            continue
        auto = ratio > 0.75 and (rg_sec_types is None or ('Compilation' not in rg_sec_types and 'Soundtrack' not in rg_sec_types))

        wp_url = 'https://%s.wikipedia.org/wiki/%s' % (wp_lang, quote_page_title(page_title),)
        wd_url = 'https://www.wikidata.org/wiki/%s' % wikipage.wikidata_id.upper()
        text = 'Wikidata identifier found from matching Wikipedia page %s. The page mentions artist "%s" and %s.' % (wp_url, ac_name, join_names('track', found_tracks),)
        colored_out(bcolors.OKGREEN, ' * linking to %s' % (wd_url,))
        out(' * edit note: %s' % (text,))
        time.sleep(5)
        mb.add_url("release_group", rg_gid, 353, wd_url, text, auto=auto)
        break
    if processed is None:
        db.execute("INSERT INTO bot_wp_rg_link (gid, lang) VALUES (%s, %s)", (rg_gid, wp_lang))
    else:
        db.execute("UPDATE bot_wp_rg_link SET processed = now() WHERE (gid, lang) = (%s, %s)", (rg_gid, wp_lang))
Ejemplo n.º 16
0
                found_artists.append(rel_artist)
        if (found_artists):
            reasons.append(join_names('related artist', found_artists))
            out(' * has related artists: %s, found related artists: %s' % (len(artists), len(found_artists)))

        # Determine if artist matches
        if not found_albums and not found_works and not found_artists and not found_urls:
            continue

        # Check if wikipedia lang is compatible with artist country
        if wp_lang != 'en' or wp_lang in acceptable_countries_for_lang:
            if wp_lang not in acceptable_countries_for_lang:
                continue
            country, country_reasons = determine_country(wikipage)
            if (country not in acceptable_countries_for_lang[wp_lang]):
                colored_out(bcolors.HEADER, ' * artist country (%s) not compatible with wiki language (%s)' % (country, wp_lang))
                continue

        url = 'http://%s.wikipedia.org/wiki/%s' % (wp_lang, quote_page_title(page_title),)
        text = 'Matched based on the name. The page mentions %s.' % (', '.join(reasons),)
        colored_out(bcolors.OKGREEN, ' * linking to %s' % (url,))
        out(' * edit note: %s' % (text,))
        time.sleep(60)
        mb.add_url("artist", artist['gid'], 179, url, text)
        break

    if artist['processed'] is None:
        db.execute("INSERT INTO bot_wp_artist_link (gid, lang) VALUES (%s, %s)", (artist['gid'], wp_lang))
    else:
        db.execute("UPDATE bot_wp_artist_link SET processed = now() WHERE (gid, lang) = (%s, %s)", (artist['gid'], wp_lang))
            except ValueError:
                pass
            except urllib2.HTTPError:
                pass
    for shs_artist in shs_artists:
        shs_artist_name = mangle_name(re.sub(' \[\d+\]$', '', shs_artist['commonName']))
        mb_artist_name = mangle_name(artist['name'])
        if shs_artist_name == mb_artist_name:
            artist_uri = shs_artist['uri']
            break
        elif similarity2(to_unicode(shs_artist_name), to_unicode(mb_artist_name)) > 0.85:
            print "%s => similarity = %.2f" % (shs_artist['commonName'], similarity2(to_unicode(shs_artist_name), to_unicode(mb_artist_name)))
            artist_uri = shs_artist['uri']
            break

    if artist_uri:
        matched_artists.add(artist['gid'])
        colored_out(bcolors.HEADER, ' * using %s, found artist SHS URL: %s' % (artist['shs_url'], artist_uri))
        edit_note = 'Guessing artist SecondHandSongs URL from work http://musicbrainz.org/work/%s linked to %s' % (artist['work_gid'], artist['shs_url'])
        out(' * edit note: %s' % (edit_note,))
        
        mb.add_url('artist', artist['gid'], str(307), artist_uri, edit_note)
    else:
        colored_out(bcolors.NONE, ' * using %s, no artist SHS URL has been found' % (artist['shs_url'],))

    if artist['processed'] is None and artist['gid'] not in seen_artists:
        db.execute("INSERT INTO bot_shs_link_artist (artist) VALUES (%s)", (artist['gid'],))
    else:
        db.execute("UPDATE bot_shs_link_artist SET processed = now() WHERE artist = %s", (artist['gid'],))
    seen_artists.add(artist['gid'])
rg_grouped = rg_by_ac.values()
random.shuffle(rg_grouped)

for i, (rg, gid, name) in enumerate(itertools.chain(*rg_grouped)):
    urls = set(u[0] for u in db.execute(query_rg_release_discogs, rg))
    if len(urls) < 2:
        continue
    out(u'%d/%d - %.2f%%' % (i, count, i * 100.0 / count))
    out(u'%s http://musicbrainz.org/release-group/%s' % (name, gid))
    masters = list(discogs_get_master(urls))
    if len(masters) == 0:
        out(u'  aborting, no Discogs master!')
        continue
    if len(set(masters)) > 1:
        out(u'  aborting, releases with different Discogs master in one group!')
        continue
    if len(masters) != len(urls):
        out(u'  aborting, releases without Discogs master in group!')
        continue
    master_name, master_id, master_artists = masters[0]
    ratio = Levenshtein.ratio(master_name.lower(), name.lower())
    if ratio < 0.8:
        out(u'  Similarity ratio too small: %.2f' % ratio)
        continue
    master_url = 'http://www.discogs.com/master/%d' % master_id
    text = u'There are %d distinct Discogs links in this release group, and all point to this master URL.\n' % len(urls)
    text += u'The name of the Discogs master is “%s” (similarity: %.0f%%)' % (master_name, ratio * 100)
    text += u' by %s.' % master_artists
    out(u'  %s\n  %s' % (master_url, text))
    mb.add_url('release_group', gid, 90, master_url, text)