def main(verbose=False):
    normal_edits_left, edits_left = mb.edits_left()
    d = defaultdict(dict)

    for r, r_gid, t_name, t_pos, m_pos, url, a, a_gid, ac in db.execute(query_missing):
        if a_gid in discogs_artist_problematic:
            continue
        d[a][r] = (r, r_gid, t_name, t_pos, m_pos, url, a, a_gid, ac)

    count = len(d)
    for i, k in enumerate(d):
        if normal_edits_left <= 0:
            break
        if len(d[k]) != 1:
            continue
        r1 = list(d[k])[0]
        r, r_gid, t_name, t_pos, m_pos, url, a, a_gid, ac = d[k][r1]
        if m_pos > 1:
            db.execute("INSERT INTO bot_discogs_artist_problematic (gid) VALUES (%s)", a_gid)
            continue
        artist_releases = set([r for r, in db.execute('''SELECT DISTINCT r.id FROM release r JOIN medium m ON m.release = r.id JOIN track t ON t.medium = m.id WHERE t.artist_credit = %s''', ac)])
        if len(artist_releases) > 1:
            db.execute("INSERT INTO bot_discogs_artist_problematic (gid) VALUES (%s)", a_gid)
            continue
        if verbose:
            out(u'%d/%d - %.2f%%' % (i+1, count, (i+1) * 100.0 / count))
            out('http://musicbrainz.org/release/%s (%d-%d)' % (r_gid, m_pos, t_pos))
            out('%s' % url)
            out('http://musicbrainz.org/artist/%s' % a_gid)
        m = re.match(r'^http://www\.discogs\.com/release/([0-9]+)', url)
        if not m:
            if verbose:
                out('skip, is no valid Discogs release URL')
            continue
        discogs_release_id = int(m.group(1))
        discogs_release = discogs.Release(discogs_release_id)
        if discogs_release.data['status'] in ['Draft', 'Rejected']:
            if verbose:
                out('skip, release is not draft/rejected')
            db.execute("INSERT INTO bot_discogs_artist_problematic (gid) VALUES (%s)", a_gid)
            continue
        t_index = 0
        discogs_track = None
        for t in discogs_release.tracklist:
            if t['type'] == 'Track':
                t_index += 1
            if t_index == t_pos:
                discogs_track = t
                break
        if discogs_track is None:
            if verbose:
                out('track not found')
            db.execute("INSERT INTO bot_discogs_artist_problematic (gid) VALUES (%s)", a_gid)
            continue
        discogs_artists = discogs_track['artists']
        if len(discogs_artists) == 0:
            discogs_artists = discogs_release.artists
        if len(discogs_artists) != 1:
            if verbose:
                out('skip, %d track artists' % len(discogs_artists))
            db.execute("INSERT INTO bot_discogs_artist_problematic (gid) VALUES (%s)", a_gid)
            continue
        if not are_tracks_similar(discogs_track['title'], t_name):
            if verbose:
                out(u'not similar: %s <-> %s' % (discogs_track['title'], t_name))
            db.execute("INSERT INTO bot_discogs_artist_problematic (gid) VALUES (%s)", a_gid)
            continue
        discogs_artist = discogs_artists[0]
        if discogs_artist.name in [u'Various', u'Unknown Artist']:
            if verbose:
                out(u'not linking to Various or Unknown Artist')
            db.execute("INSERT INTO bot_discogs_artist_problematic (gid) VALUES (%s)", a_gid)
            continue
        ac_name = artist_credit(ac)
        norm_name = discogs_artist.name
        m = re.match(r'(.*?) \([0-9]+\)', norm_name)
        if m:
            norm_name = m.group(1)
        m = re.match(r'(.*?), (The)', norm_name)
        if m:
            norm_name = '%s %s' % (m.group(2), m.group(1))
        if not are_artists_similar(norm_name, ac_name):
            if verbose:
                out(u'not similar: %s [%s] <-> %s' % (norm_name, discogs_artist.name, ac_name))
            db.execute("INSERT INTO bot_discogs_artist_problematic (gid) VALUES (%s)", a_gid)
            continue
        discogs_url = discogs_artist_url(discogs_artist)
        if a_gid in discogs_artist_set:
            if verbose:
                out(u'  already linked earlier (probably got removed by some editor!')
            if (a_gid, discogs_url) not in bot_blacklist:
                bot_blacklist_new.add((a_gid, discogs_url))
            continue
        text = u'Artist appears on only one release [1] (e.g. medium %d, track %d), which is linked to discogs release [2]. Also, the track names are similar:\n' % (m_pos, t_pos)
        text += u'Discogs: “%s” by %s\n' % (discogs_track['title'], combine_names([x.name for x in discogs_artists]))
        text += u'MBrainz: “%s” by “%s”\n\n' % (t_name, ac_name)
        text += u'[1] http://musicbrainz.org/release/%s\n[2] %s' % (r_gid, url)
        text += '\n\n%s' % prog
        try:
            out(u'http://musicbrainz.org/artist/%s  ->  %s' % (a_gid,discogs_url))
            mb.add_url('artist', a_gid, 180, discogs_url.encode('utf-8'), text)
            db.execute("INSERT INTO bot_discogs_artist_set (gid,url) VALUES (%s,%s)", (a_gid, discogs_url))
            normal_edits_left -= 1
        except (urllib2.HTTPError, urllib2.URLError, socket.timeout) as e:
            out(e)
    if bot_blacklist_new:
        out(blacklist.wiki_markup(bot_blacklist_new, 'artist', db))
def main(verbose=False):
    normal_edits_left, edits_left = mb.edits_left()
    rgs = [(rg, gid, name) for rg, gid, name in db.execute(query_rg_without_master)]
    count = len(rgs)
    for i, (rg, gid, name) in enumerate(rgs):
        if edits_left <= 0:
            break
        if gid in discogs_release_group_missing or gid in discogs_release_group_problematic:
            continue
        urls = set(url for url, in db.execute(query_rg_release_discogs, rg))
        if verbose:
            out(u'%d/%d - %.2f%%' % (i+1, count, (i+1) * 100.0 / count))
            out(u'%s http://musicbrainz.org/release-group/%s' % (name, gid))
        try:
            masters = list(discogs_get_master(urls))
        except (discogs.HTTPError, discogs.requests.ConnectionError) as e:
            out('  ERROR: discogs_get_master(%s): %s' % (list(urls), e))
            continue
        if len(masters) == 0:
            if verbose:
                out(u'  aborting, no Discogs master!')
            db.execute("INSERT INTO bot_discogs_release_group_missing (gid) VALUES (%s)", gid)
            continue
        if len(set(masters)) > 1:
            if verbose:
                out(u'  aborting, releases with different Discogs master in one group!')
            db.execute("INSERT INTO bot_discogs_release_group_problematic (gid) VALUES (%s)", gid)
            continue
        if len(masters) != len(urls):
            if verbose:
                out(u'  aborting, releases without Discogs master in group!')
            db.execute("INSERT INTO bot_discogs_release_group_problematic (gid) VALUES (%s)", gid)
            continue
        master_name, master_id, master_artists = masters[0]
        if not are_similar(master_name, name):
            if verbose:
                out(u'  Similarity too small: %s <-> %s' % (name, master_name))
            db.execute("INSERT INTO bot_discogs_release_group_problematic (gid) VALUES (%s)", gid)
            continue
        master_url = 'http://www.discogs.com/master/%d' % master_id
        if (gid, master_url) in discogs_release_group_set:
            if verbose:
                out(u'  already linked earlier (probably got removed by some editor!')
            if (gid, master_url) not in bot_blacklist:
                bot_blacklist_new.add((gid, master_url))
            continue
        if len(urls) >= 2:
            text = u'There are %d distinct Discogs links in this release group, and all point to this master URL.\n' % len(urls)
        else:
            text = u'There is one Discogs link in this release group, and it points to this master URL.\n%s\n' % list(urls)[0]
        text += u'Also, the name of the Discogs master “%s” (by %s) is similar to the release group name.' % (master_name, master_artists)
        text += '\n\n%s' % prog
        try:
            out(u'http://musicbrainz.org/release-group/%s  ->  %s' % (gid,master_url))
            mb.add_url('release_group', gid, 90, master_url, text, auto=True)
            db.execute("INSERT INTO bot_discogs_release_group_set (gid,url) VALUES (%s,%s)", (gid,master_url))
            edits_left -= 1
        except (urllib2.HTTPError, urllib2.URLError, socket.timeout) as e:
            out('  ERROR: mb.add_url(%s, %s): %s' % (gid, master_url, e))
            out(e)
    if bot_blacklist_new:
        out(blacklist.wiki_markup(bot_blacklist_new, 'release-group', db))
Esempio n. 3
0
def main(verbose=False):
    normal_edits_left, edits_left = mb.edits_left()
    releases = [(r, gid, barcode, name, ac) for r, gid, barcode, name, ac in db.execute(query_releases_without_asin)]
    count = len(releases)
    for i, (r, gid, barcode, name, ac) in enumerate(releases):
        for country, year, month, day in db.execute(query_release_events, (r,)):
            if normal_edits_left <= 0:
                break
            if gid in asin_missing or gid in asin_problematic or gid in asin_nocover or gid in asin_catmismatch:
                continue
            if not barcode_type(barcode):
                db.execute("INSERT INTO bot_asin_problematic (gid) VALUES (%s)", gid)
                continue
            if country not in store_map_rev:
                continue
            if barcode.lstrip('0') in barcodes_hist and barcodes_hist[barcode.lstrip('0')] > 1:
                if verbose:
                    colored_out(bcolors.WARNING, '  two releases with same barcode, skip for now')
                db.execute("INSERT INTO bot_asin_problematic (gid) VALUES (%s)", gid)
                continue
            if verbose:
                colored_out(bcolors.OKBLUE, u'%d/%d - %.2f%% - %s http://musicbrainz.org/release/%s %s %s' % (i+1, count, (i+1) * 100.0 / count, name, gid, barcode, country))
            try:
                item = amazon_get_asin(barcode, country, (year, month, day))
            except (urllib2.HTTPError, urllib2.URLError, socket.timeout) as e:
                out(e)
                continue
            if item is None:
                if verbose:
                    out(' * not found, continue')
                db.execute("INSERT INTO bot_asin_missing (gid) VALUES (%s)", gid)
                continue
            url = amazon_url_cleanup(str(item.DetailPageURL), str(item.ASIN))
            if verbose:
                out(' * barcode matches %s' % url)
            if item.ASIN in asins:
                if verbose:
                    out('   * skip, ASIN already in DB')
                db.execute("INSERT INTO bot_asin_problematic (gid) VALUES (%s)", gid)
                continue
            if not 'LargeImage' in item.__dict__:
                if verbose:
                    out('   * skip, has no image')
                db.execute("INSERT INTO bot_asin_nocover (gid) VALUES (%s)", gid)
                continue
            attrs = item.ItemAttributes
            if 'Format' in attrs.__dict__ and 'Import' in [f for f in attrs.Format]:
                if verbose:
                    out('   * skip, is marked as Import')
                db.execute("INSERT INTO bot_asin_problematic (gid) VALUES (%s)", gid)
                continue
            if 'ReleaseDate' in attrs.__dict__:
                amazon_date = datetime.datetime.strptime(str(attrs.ReleaseDate), '%Y-%m-%d')
                mb_date = datetime.datetime(year if year else 1, month if month else 1, day if day else 1)
                if abs(amazon_date - mb_date) > datetime.timedelta(days=365):
                    if verbose:
                        out('   * skip, has release date diff > 365 days')
                    db.execute("INSERT INTO bot_asin_problematic (gid) VALUES (%s)", gid)
                    continue
            else:
                if verbose:
                    out('   * skip, has no release date')
                db.execute("INSERT INTO bot_asin_problematic (gid) VALUES (%s)", gid)
                continue
            amazon_name = unicode(attrs.Title)
            catnr = None
            if 'SeikodoProductCode' in attrs.__dict__:
                catnr = unicode(attrs.SeikodoProductCode)
            elif 'MPN' in attrs.__dict__:
                catnr = unicode(attrs.MPN)
            matched = False
            if catnr:
                for mb_catnr in release_catnrs(r):
                    if cat_compare(mb_catnr, catnr, country):
                        matched = True
                        break
                if not matched and country == 'JP':
                    if verbose:
                        colored_out(bcolors.FAIL, u' * CAT NR MISMATCH, ARGH!')
                    db.execute("INSERT INTO bot_asin_catmismatch (gid) VALUES (%s)", gid)
                    continue
            if not matched:
                catnr = None
                if not are_similar(name, amazon_name):
                    if verbose:
                        colored_out(bcolors.FAIL, u'   * Similarity too small: %s <-> %s' % (name, amazon_name))
                    db.execute("INSERT INTO bot_asin_problematic (gid) VALUES (%s)", gid)
                    continue
            if (gid, item.ASIN) in asin_set:
                if verbose:
                    colored_out(bcolors.WARNING, u' * already linked earlier (probably got removed by some editor!)')
                if (gid, url) not in bot_blacklist:
                    bot_blacklist_new.add((gid, url))
                continue
            text = u'%s lookup for “%s” (country: %s), ' % (barcode_type(barcode), barcode, country)
            if catnr:
                text += u'matching catalog number “%s”, release name is “%s”' % (catnr, attrs.Title)
            else:
                text += u'has similar name “%s”' % attrs.Title
            if 'Artist' in attrs.__dict__:
                text += u' by “%s”' % attrs.Artist
            text += u'.\nAmazon.com: '
            if 'Binding' in attrs.__dict__:
                if 'NumberOfDiscs' in attrs.__dict__:
                    text += u'%s × ' % attrs.NumberOfDiscs
                helpful_formats = ['Dual Disc']
                if attrs.Binding == 'Audio CD' and 'Format' in attrs.__dict__ and attrs.Format in helpful_formats:
                    text += u'%s' % attrs.Format
                else:
                    text += u'%s' % attrs.Binding
            if not catnr and 'Label' in attrs.__dict__:
                text += u', %s' % attrs.Label
            if 'ReleaseDate' in attrs.__dict__:
                text += u', %s' % attrs.ReleaseDate
            text += u'\nMusicBrainz: '
            text += u'%s' % release_format(r)
            if not catnr:
                labels = release_labels(r)
                if labels:
                    text += u', %s' % u' / '.join(labels)
            if year:
                text += u', %s' % date_format(year, month, day)
            if catnr and country == 'JP':
                text += u'\nhttp://amazon.jp/s?field-keywords=%s\nhttp://amazon.jp/s?field-keywords=%s' % (catnr, barcode)
            else:
                text += u'\nhttp://amazon.%s/s?field-keywords=%s' % (amazon_url_tld(url), barcode)
            # make "Import" bold so it is easier recognizable
            re_bold_import = re.compile(ur'\b(imports?)\b', re.IGNORECASE)
            text = re_bold_import.sub(ur"'''\1'''", text)
            text += '\n\n%s' % prog
            try:
                colored_out(bcolors.OKGREEN, u' * http://musicbrainz.org/release/%s  ->  %s' % (gid,url))
                mb.add_url('release', gid, 77, url, text)
                db.execute("INSERT INTO bot_asin_set (gid,url) VALUES (%s,%s)", (gid,url))
                asins.add(url)
                normal_edits_left -= 1
            except (urllib2.HTTPError, urllib2.URLError, socket.timeout) as e:
                out(e)
    if bot_blacklist_new:
        out(blacklist.wiki_markup(bot_blacklist_new, 'release', db))