WHERE l.link_type = 90 ) ORDER BY rg.artist_credit ''' query_rg_release_discogs = ''' SELECT url.url FROM l_release_url l_ru JOIN link l ON l_ru.link = l.id JOIN release ON release.id = l_ru.entity0 JOIN release_group rg ON rg.id = release.release_group JOIN url ON url.id = l_ru.entity1 WHERE release.release_group = %s AND l.link_type = 76 ''' bot_blacklist = blacklist.discogs_links('release-group') bot_blacklist_new = set() discogs_release_group_set = set((gid, url) for gid, url in db.execute('''SELECT gid, url FROM bot_discogs_release_group_set''')) discogs_release_group_set |= bot_blacklist discogs_release_group_missing = set(gid for gid, in db.execute('''SELECT gid FROM bot_discogs_release_group_missing''')) discogs_release_group_problematic = set(gid for gid, in db.execute('''SELECT gid FROM bot_discogs_release_group_problematic''')) def are_similar(name1, name2): name1, name2 = (asciipunct(s.strip().lower()) for s in (name1, name2)) ratio = Levenshtein.jaro_winkler(name1, name2) return ratio >= 0.8 or name1 in name2 or name2 in name1 def discogs_artists_str(artists): if len(artists) > 1: return ' and '.join([', '.join([a.name for a in artists[:-1]]), artists[-1].name]) else:
ratio = Levenshtein.jaro_winkler(name1, name2, 0.0) # no common prefix length return ratio >= 0.8 def combine_names(names): if len(names) > 1: return u' and '.join([', '.join([u'“'+n+u'”' for n in names[:-1]]), u'“'+names[-1]+u'”']) else: return u'“'+names[0]+u'”' def artist_credit(ac): return u''.join(u'%s%s' % (name, join_phrase if join_phrase else u'') for name, join_phrase in db.execute('''SELECT acn.name,acn.join_phrase from artist_credit ac JOIN artist_credit_name acn ON acn.artist_credit = ac.id WHERE ac.id = %s ORDER BY position''', ac)) def discogs_artist_url(discogs_artist): return u'http://www.discogs.com/artist/%d' % discogs_artist.data['id'] bot_blacklist = blacklist.discogs_links('artist') bot_blacklist_new = set() discogs_artist_set = set((gid, url) for gid, url in db.execute('''SELECT gid, url FROM bot_discogs_artist_set''')) discogs_artist_set |= bot_blacklist discogs_artist_set = set(gid for gid, url in discogs_artist_set) discogs_artist_problematic = set(gid for gid, in db.execute('''SELECT gid FROM bot_discogs_artist_problematic''')) def main(verbose=False): normal_edits_left, edits_left = mb.edits_left() d = defaultdict(dict) for r, r_gid, t_name, t_pos, m_pos, url, a, a_gid, ac in db.execute(query_missing): if a_gid in discogs_artist_problematic: continue d[a][r] = (r, r_gid, t_name, t_pos, m_pos, url, a, a_gid, ac)