def main(verbose=False): normal_edits_left, edits_left = mb.edits_left() d = defaultdict(dict) for r, r_gid, t_name, t_pos, m_pos, url, a, a_gid, ac in db.execute(query_missing): if a_gid in discogs_artist_problematic: continue d[a][r] = (r, r_gid, t_name, t_pos, m_pos, url, a, a_gid, ac) count = len(d) for i, k in enumerate(d): if normal_edits_left <= 0: break if len(d[k]) != 1: continue r1 = list(d[k])[0] r, r_gid, t_name, t_pos, m_pos, url, a, a_gid, ac = d[k][r1] if m_pos > 1: db.execute("INSERT INTO bot_discogs_artist_problematic (gid) VALUES (%s)", a_gid) continue artist_releases = set([r for r, in db.execute('''SELECT DISTINCT r.id FROM release r JOIN medium m ON m.release = r.id JOIN track t ON t.medium = m.id WHERE t.artist_credit = %s''', ac)]) if len(artist_releases) > 1: db.execute("INSERT INTO bot_discogs_artist_problematic (gid) VALUES (%s)", a_gid) continue if verbose: out(u'%d/%d - %.2f%%' % (i+1, count, (i+1) * 100.0 / count)) out('http://musicbrainz.org/release/%s (%d-%d)' % (r_gid, m_pos, t_pos)) out('%s' % url) out('http://musicbrainz.org/artist/%s' % a_gid) m = re.match(r'^http://www\.discogs\.com/release/([0-9]+)', url) if not m: if verbose: out('skip, is no valid Discogs release URL') continue discogs_release_id = int(m.group(1)) discogs_release = discogs.Release(discogs_release_id) if discogs_release.data['status'] in ['Draft', 'Rejected']: if verbose: out('skip, release is not draft/rejected') db.execute("INSERT INTO bot_discogs_artist_problematic (gid) VALUES (%s)", a_gid) continue t_index = 0 discogs_track = None for t in discogs_release.tracklist: if t['type'] == 'Track': t_index += 1 if t_index == t_pos: discogs_track = t break if discogs_track is None: if verbose: out('track not found') db.execute("INSERT INTO bot_discogs_artist_problematic (gid) VALUES (%s)", a_gid) continue discogs_artists = discogs_track['artists'] if len(discogs_artists) == 0: discogs_artists = discogs_release.artists if len(discogs_artists) != 1: if verbose: out('skip, %d track artists' % len(discogs_artists)) db.execute("INSERT INTO bot_discogs_artist_problematic (gid) VALUES (%s)", a_gid) continue if not are_tracks_similar(discogs_track['title'], t_name): if verbose: out(u'not similar: %s <-> %s' % (discogs_track['title'], t_name)) db.execute("INSERT INTO bot_discogs_artist_problematic (gid) VALUES (%s)", a_gid) continue discogs_artist = discogs_artists[0] if discogs_artist.name in [u'Various', u'Unknown Artist']: if verbose: out(u'not linking to Various or Unknown Artist') db.execute("INSERT INTO bot_discogs_artist_problematic (gid) VALUES (%s)", a_gid) continue ac_name = artist_credit(ac) norm_name = discogs_artist.name m = re.match(r'(.*?) \([0-9]+\)', norm_name) if m: norm_name = m.group(1) m = re.match(r'(.*?), (The)', norm_name) if m: norm_name = '%s %s' % (m.group(2), m.group(1)) if not are_artists_similar(norm_name, ac_name): if verbose: out(u'not similar: %s [%s] <-> %s' % (norm_name, discogs_artist.name, ac_name)) db.execute("INSERT INTO bot_discogs_artist_problematic (gid) VALUES (%s)", a_gid) continue discogs_url = discogs_artist_url(discogs_artist) if a_gid in discogs_artist_set: if verbose: out(u' already linked earlier (probably got removed by some editor!') if (a_gid, discogs_url) not in bot_blacklist: bot_blacklist_new.add((a_gid, discogs_url)) continue text = u'Artist appears on only one release [1] (e.g. medium %d, track %d), which is linked to discogs release [2]. Also, the track names are similar:\n' % (m_pos, t_pos) text += u'Discogs: “%s” by %s\n' % (discogs_track['title'], combine_names([x.name for x in discogs_artists])) text += u'MBrainz: “%s” by “%s”\n\n' % (t_name, ac_name) text += u'[1] http://musicbrainz.org/release/%s\n[2] %s' % (r_gid, url) text += '\n\n%s' % prog try: out(u'http://musicbrainz.org/artist/%s -> %s' % (a_gid,discogs_url)) mb.add_url('artist', a_gid, 180, discogs_url.encode('utf-8'), text) db.execute("INSERT INTO bot_discogs_artist_set (gid,url) VALUES (%s,%s)", (a_gid, discogs_url)) normal_edits_left -= 1 except (urllib2.HTTPError, urllib2.URLError, socket.timeout) as e: out(e) if bot_blacklist_new: out(blacklist.wiki_markup(bot_blacklist_new, 'artist', db))
def main(verbose=False): normal_edits_left, edits_left = mb.edits_left() rgs = [(rg, gid, name) for rg, gid, name in db.execute(query_rg_without_master)] count = len(rgs) for i, (rg, gid, name) in enumerate(rgs): if edits_left <= 0: break if gid in discogs_release_group_missing or gid in discogs_release_group_problematic: continue urls = set(url for url, in db.execute(query_rg_release_discogs, rg)) if verbose: out(u'%d/%d - %.2f%%' % (i+1, count, (i+1) * 100.0 / count)) out(u'%s http://musicbrainz.org/release-group/%s' % (name, gid)) try: masters = list(discogs_get_master(urls)) except (discogs.HTTPError, discogs.requests.ConnectionError) as e: out(' ERROR: discogs_get_master(%s): %s' % (list(urls), e)) continue if len(masters) == 0: if verbose: out(u' aborting, no Discogs master!') db.execute("INSERT INTO bot_discogs_release_group_missing (gid) VALUES (%s)", gid) continue if len(set(masters)) > 1: if verbose: out(u' aborting, releases with different Discogs master in one group!') db.execute("INSERT INTO bot_discogs_release_group_problematic (gid) VALUES (%s)", gid) continue if len(masters) != len(urls): if verbose: out(u' aborting, releases without Discogs master in group!') db.execute("INSERT INTO bot_discogs_release_group_problematic (gid) VALUES (%s)", gid) continue master_name, master_id, master_artists = masters[0] if not are_similar(master_name, name): if verbose: out(u' Similarity too small: %s <-> %s' % (name, master_name)) db.execute("INSERT INTO bot_discogs_release_group_problematic (gid) VALUES (%s)", gid) continue master_url = 'http://www.discogs.com/master/%d' % master_id if (gid, master_url) in discogs_release_group_set: if verbose: out(u' already linked earlier (probably got removed by some editor!') if (gid, master_url) not in bot_blacklist: bot_blacklist_new.add((gid, master_url)) continue if len(urls) >= 2: text = u'There are %d distinct Discogs links in this release group, and all point to this master URL.\n' % len(urls) else: text = u'There is one Discogs link in this release group, and it points to this master URL.\n%s\n' % list(urls)[0] text += u'Also, the name of the Discogs master “%s” (by %s) is similar to the release group name.' % (master_name, master_artists) text += '\n\n%s' % prog try: out(u'http://musicbrainz.org/release-group/%s -> %s' % (gid,master_url)) mb.add_url('release_group', gid, 90, master_url, text, auto=True) db.execute("INSERT INTO bot_discogs_release_group_set (gid,url) VALUES (%s,%s)", (gid,master_url)) edits_left -= 1 except (urllib2.HTTPError, urllib2.URLError, socket.timeout) as e: out(' ERROR: mb.add_url(%s, %s): %s' % (gid, master_url, e)) out(e) if bot_blacklist_new: out(blacklist.wiki_markup(bot_blacklist_new, 'release-group', db))
def main(verbose=False): normal_edits_left, edits_left = mb.edits_left() releases = [(r, gid, barcode, name, ac) for r, gid, barcode, name, ac in db.execute(query_releases_without_asin)] count = len(releases) for i, (r, gid, barcode, name, ac) in enumerate(releases): for country, year, month, day in db.execute(query_release_events, (r,)): if normal_edits_left <= 0: break if gid in asin_missing or gid in asin_problematic or gid in asin_nocover or gid in asin_catmismatch: continue if not barcode_type(barcode): db.execute("INSERT INTO bot_asin_problematic (gid) VALUES (%s)", gid) continue if country not in store_map_rev: continue if barcode.lstrip('0') in barcodes_hist and barcodes_hist[barcode.lstrip('0')] > 1: if verbose: colored_out(bcolors.WARNING, ' two releases with same barcode, skip for now') db.execute("INSERT INTO bot_asin_problematic (gid) VALUES (%s)", gid) continue if verbose: colored_out(bcolors.OKBLUE, u'%d/%d - %.2f%% - %s http://musicbrainz.org/release/%s %s %s' % (i+1, count, (i+1) * 100.0 / count, name, gid, barcode, country)) try: item = amazon_get_asin(barcode, country, (year, month, day)) except (urllib2.HTTPError, urllib2.URLError, socket.timeout) as e: out(e) continue if item is None: if verbose: out(' * not found, continue') db.execute("INSERT INTO bot_asin_missing (gid) VALUES (%s)", gid) continue url = amazon_url_cleanup(str(item.DetailPageURL), str(item.ASIN)) if verbose: out(' * barcode matches %s' % url) if item.ASIN in asins: if verbose: out(' * skip, ASIN already in DB') db.execute("INSERT INTO bot_asin_problematic (gid) VALUES (%s)", gid) continue if not 'LargeImage' in item.__dict__: if verbose: out(' * skip, has no image') db.execute("INSERT INTO bot_asin_nocover (gid) VALUES (%s)", gid) continue attrs = item.ItemAttributes if 'Format' in attrs.__dict__ and 'Import' in [f for f in attrs.Format]: if verbose: out(' * skip, is marked as Import') db.execute("INSERT INTO bot_asin_problematic (gid) VALUES (%s)", gid) continue if 'ReleaseDate' in attrs.__dict__: amazon_date = datetime.datetime.strptime(str(attrs.ReleaseDate), '%Y-%m-%d') mb_date = datetime.datetime(year if year else 1, month if month else 1, day if day else 1) if abs(amazon_date - mb_date) > datetime.timedelta(days=365): if verbose: out(' * skip, has release date diff > 365 days') db.execute("INSERT INTO bot_asin_problematic (gid) VALUES (%s)", gid) continue else: if verbose: out(' * skip, has no release date') db.execute("INSERT INTO bot_asin_problematic (gid) VALUES (%s)", gid) continue amazon_name = unicode(attrs.Title) catnr = None if 'SeikodoProductCode' in attrs.__dict__: catnr = unicode(attrs.SeikodoProductCode) elif 'MPN' in attrs.__dict__: catnr = unicode(attrs.MPN) matched = False if catnr: for mb_catnr in release_catnrs(r): if cat_compare(mb_catnr, catnr, country): matched = True break if not matched and country == 'JP': if verbose: colored_out(bcolors.FAIL, u' * CAT NR MISMATCH, ARGH!') db.execute("INSERT INTO bot_asin_catmismatch (gid) VALUES (%s)", gid) continue if not matched: catnr = None if not are_similar(name, amazon_name): if verbose: colored_out(bcolors.FAIL, u' * Similarity too small: %s <-> %s' % (name, amazon_name)) db.execute("INSERT INTO bot_asin_problematic (gid) VALUES (%s)", gid) continue if (gid, item.ASIN) in asin_set: if verbose: colored_out(bcolors.WARNING, u' * already linked earlier (probably got removed by some editor!)') if (gid, url) not in bot_blacklist: bot_blacklist_new.add((gid, url)) continue text = u'%s lookup for “%s” (country: %s), ' % (barcode_type(barcode), barcode, country) if catnr: text += u'matching catalog number “%s”, release name is “%s”' % (catnr, attrs.Title) else: text += u'has similar name “%s”' % attrs.Title if 'Artist' in attrs.__dict__: text += u' by “%s”' % attrs.Artist text += u'.\nAmazon.com: ' if 'Binding' in attrs.__dict__: if 'NumberOfDiscs' in attrs.__dict__: text += u'%s × ' % attrs.NumberOfDiscs helpful_formats = ['Dual Disc'] if attrs.Binding == 'Audio CD' and 'Format' in attrs.__dict__ and attrs.Format in helpful_formats: text += u'%s' % attrs.Format else: text += u'%s' % attrs.Binding if not catnr and 'Label' in attrs.__dict__: text += u', %s' % attrs.Label if 'ReleaseDate' in attrs.__dict__: text += u', %s' % attrs.ReleaseDate text += u'\nMusicBrainz: ' text += u'%s' % release_format(r) if not catnr: labels = release_labels(r) if labels: text += u', %s' % u' / '.join(labels) if year: text += u', %s' % date_format(year, month, day) if catnr and country == 'JP': text += u'\nhttp://amazon.jp/s?field-keywords=%s\nhttp://amazon.jp/s?field-keywords=%s' % (catnr, barcode) else: text += u'\nhttp://amazon.%s/s?field-keywords=%s' % (amazon_url_tld(url), barcode) # make "Import" bold so it is easier recognizable re_bold_import = re.compile(ur'\b(imports?)\b', re.IGNORECASE) text = re_bold_import.sub(ur"'''\1'''", text) text += '\n\n%s' % prog try: colored_out(bcolors.OKGREEN, u' * http://musicbrainz.org/release/%s -> %s' % (gid,url)) mb.add_url('release', gid, 77, url, text) db.execute("INSERT INTO bot_asin_set (gid,url) VALUES (%s,%s)", (gid,url)) asins.add(url) normal_edits_left -= 1 except (urllib2.HTTPError, urllib2.URLError, socket.timeout) as e: out(e) if bot_blacklist_new: out(blacklist.wiki_markup(bot_blacklist_new, 'release', db))