found_albums = [] albums = set([r[0] for r in db.execute(query_artist_albums, (id, id))]) albums_to_ignore = set() for album in albums: if mangle_name(name) in mangle_name(album): albums_to_ignore.add(album) albums -= albums_to_ignore if not albums: continue for album in albums: mangled_album = mangle_name(album) if len(mangled_album) > 4 and mangled_album in page: found_albums.append(album) ratio = len(found_albums) * 1.0 / len(albums) print ' * ratio: %s, has albums: %s, found albums: %s' % (ratio, len(albums), len(found_albums)) min_ratio = 0.2 if len(found_albums) < 2: continue #if ratio < min_ratio: # continue url = 'http://ko.wikipedia.org/wiki/%s' % (quote_page_title(page_title),) text = 'Matched based on the name. The page mentions %s.' % (join_names('album', found_albums),) print ' * linking to %s' % (url,) print ' * edit note: %s' % (text,) mb.add_url("artist", gid, 179, url, text) break db.execute("INSERT INTO bot_wp_artist_ko (gid) VALUES (%s)", (gid,)) print processed, skipped
mangled_track = mangle_name(track) if len(mangled_track) <= 4 or mangle_name(rg_name) in mangle_name(track): tracks_to_ignore.add(track) tracks -= tracks_to_ignore if len(tracks) < 5: continue for track in tracks: mangled_track = mangle_name(track) if len(mangled_track) > 4 and mangled_track in page: found_tracks.append(track) ratio = len(found_tracks) * 1.0 / len(tracks) out(' * ratio: %s, has tracks: %s, found tracks: %s' % (ratio, len(tracks), len(found_tracks))) min_ratio = 0.7 if len(rg_name) > 4 else 1.0 if ratio < min_ratio: colored_out(bcolors.WARNING, ' => ratio too low (min = %s)' % min_ratio) continue auto = ratio > 0.75 and (rg_sec_types is None or ('Compilation' not in rg_sec_types and 'Soundtrack' not in rg_sec_types)) wp_url = 'https://%s.wikipedia.org/wiki/%s' % (wp_lang, quote_page_title(page_title),) wd_url = 'https://www.wikidata.org/wiki/%s' % wikipage.wikidata_id.upper() text = 'Wikidata identifier found from matching Wikipedia page %s. The page mentions artist "%s" and %s.' % (wp_url, ac_name, join_names('track', found_tracks),) colored_out(bcolors.OKGREEN, ' * linking to %s' % (wd_url,)) out(' * edit note: %s' % (text,)) time.sleep(5) mb.add_url("release_group", rg_gid, 353, wd_url, text, auto=auto) break if processed is None: db.execute("INSERT INTO bot_wp_rg_link (gid, lang) VALUES (%s, %s)", (rg_gid, wp_lang)) else: db.execute("UPDATE bot_wp_rg_link SET processed = now() WHERE (gid, lang) = (%s, %s)", (rg_gid, wp_lang))
# Check if wikipedia lang is compatible with artist country if wp_lang != 'en' or wp_lang in acceptable_countries_for_lang: if wp_lang not in acceptable_countries_for_lang: continue country, country_reasons = determine_country(wikipage) if (country not in acceptable_countries_for_lang[wp_lang]): colored_out( bcolors.HEADER, ' * artist country (%s) not compatible with wiki language (%s)' % (country, wp_lang)) continue wp_url = 'http://%s.wikipedia.org/wiki/%s' % ( wp_lang, quote_page_title(page_title), ) wd_url = 'http://www.wikidata.org/wiki/%s' % wikipage.wikidata_id.upper( ) text = 'Wikidata identifier found from matching Wikipedia page %s. The page mentions %s.' % ( wp_url, ', '.join(reasons)) colored_out(bcolors.OKGREEN, ' * linking to %s' % (wd_url, )) out(' * edit note: %s' % (text, )) time.sleep(60) mb.add_url("artist", artist['gid'], 352, wd_url, text) break if artist['processed'] is None: db.execute( "INSERT INTO bot_wp_artist_link (gid, lang) VALUES (%s, %s)", (artist['gid'], wp_lang))
if "disambiguationpages" in page: print " * disambiguation or album page, skipping" continue if "recordlabels" not in page: print " * not a record label page, skipping" continue page_title = pages[0]["title"] print ' * trying article "%s"' % (page_title,) artists = set([r[0] for r in db.execute(query_label_artists, (id,))]) if name in artists: artists.remove(name) if not artists: continue found_artists = [] for artist in artists: mangled_artist = mangle_name(artist) if len(mangled_artist) > 5 and mangled_artist in page: found_artists.append(artist) ratio = len(found_artists) * 1.0 / len(artists) print " * ratio: %s, has artists: %s, found artists: %s" % (ratio, len(artists), len(found_artists)) if len(found_artists) < 2: continue url = "https://en.wikipedia.org/wiki/%s" % (quote_page_title(page_title),) text = "Matched based on the name. The page mentions %s." % (join_names("artist", found_artists),) print " * linking to %s" % (url,) print " * edit note: %s" % (text,) time.sleep(60) mb.add_url("label", gid, 216, url, text) break db.execute("INSERT INTO bot_wp_label (gid) VALUES (%s)", (gid,))
page_title = pages[0]["title"] found_albums = [] albums = set([r[0] for r in db.execute(query_artist_albums, (id, id))]) albums_to_ignore = set() for album in albums: if mangle_name(name) in mangle_name(album): albums_to_ignore.add(album) albums -= albums_to_ignore if not albums: continue for album in albums: mangled_album = mangle_name(album) if len(mangled_album) > 4 and mangled_album in page: found_albums.append(album) ratio = len(found_albums) * 1.0 / len(albums) print " * ratio: %s, has albums: %s, found albums: %s" % (ratio, len(albums), len(found_albums)) min_ratio = 0.2 if len(found_albums) < 2: continue if ratio < min_ratio: continue url = "http://ja.wikipedia.org/wiki/%s" % (quote_page_title(page_title),) text = "Matched based on the name. The page mentions %s." % (join_names("album", found_albums),) print " * linking to %s" % (url,) print " * edit note: %s" % (text,) mb.add_url("artist", gid, 179, url, text) break db.execute("INSERT INTO bot_wp_artist_ja (gid) VALUES (%s)", (gid,)) print processed, skipped
found_artists.append(rel_artist) if (found_artists): reasons.append(join_names('related artist', found_artists)) out(' * has related artists: %s, found related artists: %s' % (len(artists), len(found_artists))) # Determine if artist matches if not found_albums and not found_works and not found_artists and not found_urls: continue # Check if wikipedia lang is compatible with artist country if wp_lang != 'en' or wp_lang in acceptable_countries_for_lang: if wp_lang not in acceptable_countries_for_lang: continue country, country_reasons = determine_country(wikipage) if (country not in acceptable_countries_for_lang[wp_lang]): colored_out(bcolors.HEADER, ' * artist country (%s) not compatible with wiki language (%s)' % (country, wp_lang)) continue url = 'http://%s.wikipedia.org/wiki/%s' % (wp_lang, quote_page_title(page_title),) text = 'Matched based on the name. The page mentions %s.' % (', '.join(reasons),) colored_out(bcolors.OKGREEN, ' * linking to %s' % (url,)) out(' * edit note: %s' % (text,)) time.sleep(60) mb.add_url("artist", artist['gid'], 179, url, text) break if artist['processed'] is None: db.execute("INSERT INTO bot_wp_artist_link (gid, lang) VALUES (%s, %s)", (artist['gid'], wp_lang)) else: db.execute("UPDATE bot_wp_artist_link SET processed = now() WHERE (gid, lang) = (%s, %s)", (artist['gid'], wp_lang))
page_title = pages[0]['title'] found_albums = [] albums = set([r[0] for r in db.execute(query_artist_albums, (id, id))]) albums_to_ignore = set() for album in albums: if mangle_name(name) in mangle_name(album): albums_to_ignore.add(album) albums -= albums_to_ignore if not albums: continue for album in albums: mangled_album = mangle_name(album) if len(mangled_album) > 4 and mangled_album in page: found_albums.append(album) ratio = len(found_albums) * 1.0 / len(albums) print ' * ratio: %s, has albums: %s, found albums: %s' % (ratio, len(albums), len(found_albums)) min_ratio = 0.2 if len(found_albums) < 2: continue if ratio < min_ratio: continue url = 'https://ja.wikipedia.org/wiki/%s' % (quote_page_title(page_title),) text = 'Matched based on the name. The page mentions %s.' % (join_names('album', found_albums),) print ' * linking to %s' % (url,) print ' * edit note: %s' % (text,) mb.add_url("artist", gid, 179, url, text) break db.execute("INSERT INTO bot_wp_artist_ja (gid) VALUES (%s)", (gid,)) print processed, skipped
found_tracks.append(track) ratio = len(found_tracks) * 1.0 / len(tracks) out(' * ratio: %s, has tracks: %s, found tracks: %s' % (ratio, len(tracks), len(found_tracks))) min_ratio = 0.7 if len(rg_name) > 4 else 1.0 if ratio < min_ratio: colored_out(bcolors.WARNING, ' => ratio too low (min = %s)' % min_ratio) continue auto = ratio > 0.75 and (rg_sec_types is None or ('Compilation' not in rg_sec_types and 'Soundtrack' not in rg_sec_types)) wp_url = 'https://%s.wikipedia.org/wiki/%s' % ( wp_lang, quote_page_title(page_title), ) wd_url = 'https://www.wikidata.org/wiki/%s' % wikipage.wikidata_id.upper( ) text = 'Wikidata identifier found from matching Wikipedia page %s. The page mentions artist "%s" and %s.' % ( wp_url, ac_name, join_names('track', found_tracks), ) colored_out(bcolors.OKGREEN, ' * linking to %s' % (wd_url, )) out(' * edit note: %s' % (text, )) time.sleep(5) mb.add_url("release_group", rg_gid, 353, wd_url, text, auto=auto) break if processed is None: db.execute("INSERT INTO bot_wp_rg_link (gid, lang) VALUES (%s, %s)",