コード例 #1
0
        found_albums = []
        albums = set([r[0] for r in db.execute(query_artist_albums, (id, id))])
        albums_to_ignore = set()
        for album in albums:
            if mangle_name(name) in mangle_name(album):
                albums_to_ignore.add(album)
        albums -= albums_to_ignore
        if not albums:
            continue
        for album in albums:
            mangled_album = mangle_name(album)
            if len(mangled_album) > 4 and mangled_album in page:
                found_albums.append(album)
        ratio = len(found_albums) * 1.0 / len(albums)
        print ' * ratio: %s, has albums: %s, found albums: %s' % (ratio, len(albums), len(found_albums))
        min_ratio = 0.2
        if len(found_albums) < 2:
            continue
        #if ratio < min_ratio:
        #    continue
        url = 'http://ko.wikipedia.org/wiki/%s' % (quote_page_title(page_title),)
        text = 'Matched based on the name. The page mentions %s.' % (join_names('album', found_albums),)
        print ' * linking to %s' % (url,)
        print ' * edit note: %s' % (text,)
        mb.add_url("artist", gid, 179, url, text)
        break
    db.execute("INSERT INTO bot_wp_artist_ko (gid) VALUES (%s)", (gid,))

print processed, skipped

コード例 #2
0
ファイル: wp_links_rgs.py プロジェクト: Freso/musicbrainz-bot
            mangled_track = mangle_name(track)
            if len(mangled_track) <= 4 or mangle_name(rg_name) in mangle_name(track):
                tracks_to_ignore.add(track)
        tracks -= tracks_to_ignore
        if len(tracks) < 5:
            continue
        for track in tracks:
            mangled_track = mangle_name(track)
            if len(mangled_track) > 4 and mangled_track in page:
                found_tracks.append(track)
        ratio = len(found_tracks) * 1.0 / len(tracks)
        out(' * ratio: %s, has tracks: %s, found tracks: %s' % (ratio, len(tracks), len(found_tracks)))
        min_ratio = 0.7 if len(rg_name) > 4 else 1.0
        if ratio < min_ratio:
            colored_out(bcolors.WARNING, '  => ratio too low (min = %s)' % min_ratio)
            continue
        auto = ratio > 0.75 and (rg_sec_types is None or ('Compilation' not in rg_sec_types and 'Soundtrack' not in rg_sec_types))

        wp_url = 'https://%s.wikipedia.org/wiki/%s' % (wp_lang, quote_page_title(page_title),)
        wd_url = 'https://www.wikidata.org/wiki/%s' % wikipage.wikidata_id.upper()
        text = 'Wikidata identifier found from matching Wikipedia page %s. The page mentions artist "%s" and %s.' % (wp_url, ac_name, join_names('track', found_tracks),)
        colored_out(bcolors.OKGREEN, ' * linking to %s' % (wd_url,))
        out(' * edit note: %s' % (text,))
        time.sleep(5)
        mb.add_url("release_group", rg_gid, 353, wd_url, text, auto=auto)
        break
    if processed is None:
        db.execute("INSERT INTO bot_wp_rg_link (gid, lang) VALUES (%s, %s)", (rg_gid, wp_lang))
    else:
        db.execute("UPDATE bot_wp_rg_link SET processed = now() WHERE (gid, lang) = (%s, %s)", (rg_gid, wp_lang))
コード例 #3
0
        # Check if wikipedia lang is compatible with artist country
        if wp_lang != 'en' or wp_lang in acceptable_countries_for_lang:
            if wp_lang not in acceptable_countries_for_lang:
                continue
            country, country_reasons = determine_country(wikipage)
            if (country not in acceptable_countries_for_lang[wp_lang]):
                colored_out(
                    bcolors.HEADER,
                    ' * artist country (%s) not compatible with wiki language (%s)'
                    % (country, wp_lang))
                continue

        wp_url = 'http://%s.wikipedia.org/wiki/%s' % (
            wp_lang,
            quote_page_title(page_title),
        )
        wd_url = 'http://www.wikidata.org/wiki/%s' % wikipage.wikidata_id.upper(
        )
        text = 'Wikidata identifier found from matching Wikipedia page %s. The page mentions %s.' % (
            wp_url, ', '.join(reasons))
        colored_out(bcolors.OKGREEN, ' * linking to %s' % (wd_url, ))
        out(' * edit note: %s' % (text, ))
        time.sleep(60)
        mb.add_url("artist", artist['gid'], 352, wd_url, text)
        break

    if artist['processed'] is None:
        db.execute(
            "INSERT INTO bot_wp_artist_link (gid, lang) VALUES (%s, %s)",
            (artist['gid'], wp_lang))
コード例 #4
0
        if "disambiguationpages" in page:
            print " * disambiguation or album page, skipping"
            continue
        if "recordlabels" not in page:
            print " * not a record label page, skipping"
            continue
        page_title = pages[0]["title"]
        print ' * trying article "%s"' % (page_title,)
        artists = set([r[0] for r in db.execute(query_label_artists, (id,))])
        if name in artists:
            artists.remove(name)
        if not artists:
            continue
        found_artists = []
        for artist in artists:
            mangled_artist = mangle_name(artist)
            if len(mangled_artist) > 5 and mangled_artist in page:
                found_artists.append(artist)
        ratio = len(found_artists) * 1.0 / len(artists)
        print " * ratio: %s, has artists: %s, found artists: %s" % (ratio, len(artists), len(found_artists))
        if len(found_artists) < 2:
            continue
        url = "https://en.wikipedia.org/wiki/%s" % (quote_page_title(page_title),)
        text = "Matched based on the name. The page mentions %s." % (join_names("artist", found_artists),)
        print " * linking to %s" % (url,)
        print " * edit note: %s" % (text,)
        time.sleep(60)
        mb.add_url("label", gid, 216, url, text)
        break
    db.execute("INSERT INTO bot_wp_label (gid) VALUES (%s)", (gid,))
コード例 #5
0
        page_title = pages[0]["title"]
        found_albums = []
        albums = set([r[0] for r in db.execute(query_artist_albums, (id, id))])
        albums_to_ignore = set()
        for album in albums:
            if mangle_name(name) in mangle_name(album):
                albums_to_ignore.add(album)
        albums -= albums_to_ignore
        if not albums:
            continue
        for album in albums:
            mangled_album = mangle_name(album)
            if len(mangled_album) > 4 and mangled_album in page:
                found_albums.append(album)
        ratio = len(found_albums) * 1.0 / len(albums)
        print " * ratio: %s, has albums: %s, found albums: %s" % (ratio, len(albums), len(found_albums))
        min_ratio = 0.2
        if len(found_albums) < 2:
            continue
        if ratio < min_ratio:
            continue
        url = "http://ja.wikipedia.org/wiki/%s" % (quote_page_title(page_title),)
        text = "Matched based on the name. The page mentions %s." % (join_names("album", found_albums),)
        print " * linking to %s" % (url,)
        print " * edit note: %s" % (text,)
        mb.add_url("artist", gid, 179, url, text)
        break
    db.execute("INSERT INTO bot_wp_artist_ja (gid) VALUES (%s)", (gid,))

print processed, skipped
コード例 #6
0
                found_artists.append(rel_artist)
        if (found_artists):
            reasons.append(join_names('related artist', found_artists))
            out(' * has related artists: %s, found related artists: %s' % (len(artists), len(found_artists)))

        # Determine if artist matches
        if not found_albums and not found_works and not found_artists and not found_urls:
            continue

        # Check if wikipedia lang is compatible with artist country
        if wp_lang != 'en' or wp_lang in acceptable_countries_for_lang:
            if wp_lang not in acceptable_countries_for_lang:
                continue
            country, country_reasons = determine_country(wikipage)
            if (country not in acceptable_countries_for_lang[wp_lang]):
                colored_out(bcolors.HEADER, ' * artist country (%s) not compatible with wiki language (%s)' % (country, wp_lang))
                continue

        url = 'http://%s.wikipedia.org/wiki/%s' % (wp_lang, quote_page_title(page_title),)
        text = 'Matched based on the name. The page mentions %s.' % (', '.join(reasons),)
        colored_out(bcolors.OKGREEN, ' * linking to %s' % (url,))
        out(' * edit note: %s' % (text,))
        time.sleep(60)
        mb.add_url("artist", artist['gid'], 179, url, text)
        break

    if artist['processed'] is None:
        db.execute("INSERT INTO bot_wp_artist_link (gid, lang) VALUES (%s, %s)", (artist['gid'], wp_lang))
    else:
        db.execute("UPDATE bot_wp_artist_link SET processed = now() WHERE (gid, lang) = (%s, %s)", (artist['gid'], wp_lang))
コード例 #7
0
        found_albums = []
        albums = set([r[0] for r in db.execute(query_artist_albums, (id, id))])
        albums_to_ignore = set()
        for album in albums:
            if mangle_name(name) in mangle_name(album):
                albums_to_ignore.add(album)
        albums -= albums_to_ignore
        if not albums:
            continue
        for album in albums:
            mangled_album = mangle_name(album)
            if len(mangled_album) > 4 and mangled_album in page:
                found_albums.append(album)
        ratio = len(found_albums) * 1.0 / len(albums)
        print ' * ratio: %s, has albums: %s, found albums: %s' % (ratio, len(albums), len(found_albums))
        min_ratio = 0.2
        if len(found_albums) < 2:
            continue
        #if ratio < min_ratio:
        #    continue
        url = 'http://ko.wikipedia.org/wiki/%s' % (quote_page_title(page_title),)
        text = 'Matched based on the name. The page mentions %s.' % (join_names('album', found_albums),)
        print ' * linking to %s' % (url,)
        print ' * edit note: %s' % (text,)
        mb.add_url("artist", gid, 179, url, text)
        break
    db.execute("INSERT INTO bot_wp_artist_ko (gid) VALUES (%s)", (gid,))

print processed, skipped

コード例 #8
0
        page_title = pages[0]['title']
        found_albums = []
        albums = set([r[0] for r in db.execute(query_artist_albums, (id, id))])
        albums_to_ignore = set()
        for album in albums:
            if mangle_name(name) in mangle_name(album):
                albums_to_ignore.add(album)
        albums -= albums_to_ignore
        if not albums:
            continue
        for album in albums:
            mangled_album = mangle_name(album)
            if len(mangled_album) > 4 and mangled_album in page:
                found_albums.append(album)
        ratio = len(found_albums) * 1.0 / len(albums)
        print ' * ratio: %s, has albums: %s, found albums: %s' % (ratio, len(albums), len(found_albums))
        min_ratio = 0.2
        if len(found_albums) < 2:
            continue
        if ratio < min_ratio:
            continue
        url = 'https://ja.wikipedia.org/wiki/%s' % (quote_page_title(page_title),)
        text = 'Matched based on the name. The page mentions %s.' % (join_names('album', found_albums),)
        print ' * linking to %s' % (url,)
        print ' * edit note: %s' % (text,)
        mb.add_url("artist", gid, 179, url, text)
        break
    db.execute("INSERT INTO bot_wp_artist_ja (gid) VALUES (%s)", (gid,))

print processed, skipped
コード例 #9
0
                found_tracks.append(track)
        ratio = len(found_tracks) * 1.0 / len(tracks)
        out(' * ratio: %s, has tracks: %s, found tracks: %s' %
            (ratio, len(tracks), len(found_tracks)))
        min_ratio = 0.7 if len(rg_name) > 4 else 1.0
        if ratio < min_ratio:
            colored_out(bcolors.WARNING,
                        '  => ratio too low (min = %s)' % min_ratio)
            continue
        auto = ratio > 0.75 and (rg_sec_types is None or
                                 ('Compilation' not in rg_sec_types
                                  and 'Soundtrack' not in rg_sec_types))

        wp_url = 'https://%s.wikipedia.org/wiki/%s' % (
            wp_lang,
            quote_page_title(page_title),
        )
        wd_url = 'https://www.wikidata.org/wiki/%s' % wikipage.wikidata_id.upper(
        )
        text = 'Wikidata identifier found from matching Wikipedia page %s. The page mentions artist "%s" and %s.' % (
            wp_url,
            ac_name,
            join_names('track', found_tracks),
        )
        colored_out(bcolors.OKGREEN, ' * linking to %s' % (wd_url, ))
        out(' * edit note: %s' % (text, ))
        time.sleep(5)
        mb.add_url("release_group", rg_gid, 353, wd_url, text, auto=auto)
        break
    if processed is None:
        db.execute("INSERT INTO bot_wp_rg_link (gid, lang) VALUES (%s, %s)",