Esempio n. 1
0
            else:
                viaf_url = 'http://viaf.org/viaf/%s' % identifiers['VIAF']
                edit_note = 'From %s' % (artist['wp_url'],)
                colored_out(bcolors.OKGREEN, ' * found VIAF:', viaf_url)
                # Check if this VIAF has not been deleted
                skip = False
                try:
                    resp, content = httplib2.Http().request(viaf_url)
                except socket.error:
                    colored_out(bcolors.FAIL, ' * timeout!')
                    skip = True
                deleted_message = 'abandonedViafRecord'
                if skip == False and (resp.status == '404' or deleted_message in content):
                    colored_out(bcolors.FAIL, ' * deleted VIAF!')
                    skip = True
                if skip == False:
                    time.sleep(3)
                    out(' * edit note:', edit_note.replace('\n', ' '))
                    mb.add_url('artist', artist['gid'], str(VIAF_RELATIONSHIP_TYPES['artist']), viaf_url, edit_note)
                    matched.add(artist['gid'])

        if artist['processed'] is None and artist['gid'] not in seen:
            db.execute("INSERT INTO bot_wp_artist_viaf (gid, lang) VALUES (%s, %s)", (artist['gid'], page.lang))
        else:
            db.execute("UPDATE bot_wp_artist_viaf SET processed = now() WHERE (gid, lang) = (%s, %s)", (artist['gid'], page.lang))
        seen.add(artist['gid'])

if __name__ == '__main__':
    with PIDFile('/tmp/mbbot_wp_artist_viaf.pid'):
        main()
Esempio n. 2
0
            matched.add(entity['gid'])

        if entity['processed'] is None and entity['gid'] not in seen:
            db.execute(
                "INSERT INTO bot_wp_wikidata_links (gid, lang) VALUES (%s, %s)",
                (entity['gid'], page.lang))
        else:
            db.execute(
                "UPDATE bot_wp_wikidata_links SET processed = now() WHERE (gid, lang) = (%s, %s)",
                (entity['gid'], page.lang))
        seen.add(entity['gid'])
    stats['seen'][ENTITY_TYPE] = len(seen)
    stats['matched'][ENTITY_TYPE] = len(matched)


stats = {'seen': {}, 'matched': {}}
if __name__ == '__main__':
    with PIDFile('/tmp/mbbot_wp_wikidata_links.pid'):
        ENTITY_TYPES = ('event', 'instrument', 'series', 'place',
                        'release-group', 'artist', 'work', 'label')
        if len(sys.argv) > 1 and sys.argv[1] in ENTITY_TYPES:
            main(sys.argv[1])
        else:
            for entity_type in ENTITY_TYPES:
                main(entity_type)
        print '\nStats:'
        for entity_type in ENTITY_TYPES:
            print ' * %s : %s / %s' % (entity_type,
                                       stats['matched'][entity_type],
                                       stats['seen'][entity_type])
Esempio n. 3
0
                catnr, barcode)
        else:
            text += u'\nhttp://amazon.%s/s?field-keywords=%s' % (
                amazon_url_tld(url), barcode)
        # make "Import" bold so it is easier recognizable
        re_bold_import = re.compile(ur'\b(imports?)\b', re.IGNORECASE)
        text = re_bold_import.sub(ur"'''\1'''", text)
        try:
            colored_out(
                bcolors.OKGREEN,
                u' * https://musicbrainz.org/release/%s  ->  %s' % (gid, url))
            mb.add_url('release', gid, 77, url, text)
            db.execute("INSERT INTO bot_asin_set (gid,url) VALUES (%s,%s)",
                       (gid, url))
            asins.add(url)
            edits_left -= 1
        except (urllib2.HTTPError, urllib2.URLError, socket.timeout) as e:
            out(e)


if __name__ == '__main__':
    parser = OptionParser()
    parser.add_option('-v',
                      '--verbose',
                      action='store_true',
                      default=False,
                      help='be more verbose')
    (options, args) = parser.parse_args()
    with PIDFile('/tmp/mbbot_asin_links.pid'):
        main(options.verbose)
                        continue
            text += u'I’m converting this relationship because I’ve found a link to %s in the linked page %s.' % (
                license_url_raw, url)
        mb.add_url('release', gid, 301, license_url, text, auto=False)
        if not mb.edit_relationship(rel_id,
                                    'release',
                                    'url',
                                    84,
                                    link_id, {'license.0': []}, {}, {},
                                    text,
                                    auto=False):
            if (gid, original_url) not in cc_removed:
                text = u'Download and License relationship are already set, so this relationship is not necessary anymore.'
                mb.remove_relationship(rel_id, 'release', 'url', text)
                db.execute(
                    "INSERT INTO bot_cc_removed (gid,url) VALUES (%s,%s)",
                    (gid, original_url))
                cc_removed.add((gid, original_url))


if __name__ == '__main__':
    parser = OptionParser()
    parser.add_option('-v',
                      '--verbose',
                      action='store_true',
                      default=False,
                      help='be more verbose')
    (options, args) = parser.parse_args()
    with PIDFile('/tmp/mbbot_convert_cc_links.pid'):
        main(options.verbose)
Esempio n. 5
0
        pages_with_viaf.add(page)

    artist_viaf = {}
    rows = sdb.execute("SELECT artist, url, viaf FROM viaf")
    for artist, url, viaf in rows:
        artist_viaf[artist] = {
            'url': url,
            'viaf': viaf,
            'submitted': submitted
        }

    cnt = 0
    for artist in db.execute(wp_url_query):
        if artist['id'] in artist_viaf:
            continue
        page = extract_page_title(artist['url'], wp_lang, normalize=True)
        if page not in pages_with_viaf:
            continue
        cnt += 1
        viaf = fetch_viaf(page)
        print artist, viaf
        sdb.execute('INSERT INTO viaf (artist, url, viaf) VALUES (?, ?, ?)',
                    (artist['id'], artist['url'], viaf))
        sdb.commit()
    print cnt


if __name__ == '__main__':
    with PIDFile('/tmp/mbbot_wp_viaf.pid'):
        main()
        print 'Finding URLs using SQL query:', query_http_urls
    urls = db.execute(query_http_urls)
    if verbose:
        print u'Found %s URLs!' % (urls.rowcount)
    for url in urls:
        if verbose:
            print u'[!!!] Working on url: %s' % (cfg.MB_SITE + u'/url/' +
                                                 unicode(url['gid']))
        new_url = url['url'].replace('http://classic.', 'https://www.', 1)
        if new_url is None:
            print 'Skipping %s.' % (url['url'])
            continue
        if verbose:
            print u'Changing %s to %s' % (url['url'], new_url)
        try:
            mb.edit_url(url['gid'], url['url'], new_url, edit_note, auto=False)
        except:
            continue


if __name__ == '__main__':
    parser = OptionParser()
    parser.add_option('-v',
                      '--verbose',
                      action='store_true',
                      default=True,
                      help='be more verbose')
    (options, args) = parser.parse_args()
    with PIDFile('/tmp/mbbot_httpsify_secondhandsongs.pid'):
        main(options.verbose)
Esempio n. 7
0
from utils import out
import config as cfg

def main(args):
    if not args:
        out('Usage:   cancel_edits.py <edit_number edit_note>...\n')
        out('Example: cancel_edits.py "Edit #123 my mistake"')
        out('         cancel_edits.py 123 124 125')
        return

    edits = []
    for arg in args:
        if not isinstance(arg, unicode):
            arg = unicode(arg, locale.getpreferredencoding())
        m = re.match(ur'(?:[Ee]dit )?#?([0-9]+) ?(.*)$', arg)
        if not m:
            out('invalid edit number "%s", aborting!' % arg)
            return
        edit_nr = str(m.group(1))
        edit_note = m.group(2).lstrip()
        edits.append((edit_nr, edit_note))

    mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)
    for edit_nr, edit_note in edits:
        out(u'Cancel edit #%s: %s' % (edit_nr, edit_note if edit_note else u'<no edit note>'))
        mb.cancel_edit(str(edit_nr), edit_note)

if __name__ == '__main__':
    with PIDFile('/tmp/mbbot_cancel_edits.pid'):
        main(sys.argv[1:])
browser = mechanize.Browser()
browser.set_handle_robots(False)
browser.set_debug_redirects(False)
browser.set_debug_http(False)


def main(verbose = False):
    urls = db.execute(query_bandcamp_urls_using_http)
    for url in urls:
        new_url = u'https' + url['url'][4:]
        edit_note = """Updating HTTP URL to HTTPS.

Using `bandcamp_httpsify.py`;
Source available at https://github.com/Freso/musicbrainz-bot/blob/master/bandcamp_httpsify.py"""
        if verbose:
            print u'Working on url: %s' % (url)
            print u'→ Changing %s to %s' % (url['url'], new_url)
        try:
            mb.edit_url(url['gid'], url['url'], new_url, edit_note, auto=True)
        except:
            continue


if __name__ == '__main__':
    parser = OptionParser()
    parser.add_option('-v', '--verbose', action='store_true', default=False,
            help='be more verbose')
    (options, args) = parser.parse_args()
    with PIDFile('/tmp/mbbot_httpsify_bandcamp_links.pid'):
        main(options.verbose)
Esempio n. 9
0
                artist, page, is_performance_name)
            if end_date['year']:
                colored_out(bcolors.OKGREEN, " * new end date:", end_date)
                artist['end_date_year'] = end_date['year']
                artist['end_date_month'] = end_date['month']
                artist['end_date_day'] = end_date['day']
                update.add('end_date')
                reasons.append(('END DATE', end_date_reasons))

        if update:
            edit_note = 'From %s' % (artist['url'], )
            for field, reason in reasons:
                edit_note += '\n\n%s:\n%s' % (field, ' '.join(reason))
            out(' * edit note:', edit_note.replace('\n', ' '))
            time.sleep(10)
            mb.edit_artist(artist, update, edit_note)

        if artist['processed'] is None:
            db.execute(
                "INSERT INTO bot_wp_artist_data (gid, lang) VALUES (%s, %s)",
                (artist['gid'], wp_lang))
        else:
            db.execute(
                "UPDATE bot_wp_artist_data SET processed = now() WHERE (gid, lang) = (%s, %s)",
                (artist['gid'], wp_lang))


if __name__ == '__main__':
    with PIDFile('/tmp/mbbot_wp_artist_data.pid'):
        main()
Esempio n. 10
0
    if verbose:
        out(pprint.pformat(dict(stats)))
    r_grouped = r_by_ac.values()
    random.shuffle(r_grouped)
    r_flat = list(itertools.chain(*r_grouped))
    count = len(r_flat)
    if verbose:
        out('script can be set for %d out of %d releases' % (count, count_all))

    for i, (gid, old_script_id, new_script, script_stats) in enumerate(r_flat):
        if verbose:
            out('%d/%d - %.2f%%' % (i + 1, count, (i + 1) * 100.0 / count))
        out('https://musicbrainz.org/release/%s %s -> %s' % (gid, mb_to_iso15924[old_script_id] if old_script_id else '', new_script))
        new_script_name = iso15924_to_mb[new_script]['name']
        new_script_id = iso15924_to_mb[new_script]['id']
        text = u'I’m setting this to “%s” because it is the predominant script on the tracklist (>40%%), and no other (determined) script is on the tracklist.' % new_script_name
        if not old_script_id:
            old_script_id = ''
        try:
            mb.set_release_script(gid, old_script_id, new_script_id, text, auto=True)
        except (urllib2.HTTPError, urllib2.URLError, socket.timeout) as e:
            out(e)

if __name__ == '__main__':
    parser = OptionParser()
    parser.add_option('-v', '--verbose', action='store_true', default=False,
            help='be more verbose')
    (options, args) = parser.parse_args()
    with PIDFile('/tmp/mbbot_set_script.pid'):
        main(options.verbose)
            text = u'There is one Discogs link in this release group, and it points to this master URL.\n%s\n' % list(
                urls)[0]
        text += u'Also, the name of the Discogs master “%s” (by %s) is similar to the release group name.' % (
            master_name, master_artists)
        try:
            out(u'https://musicbrainz.org/release-group/%s  ->  %s' %
                (gid, master_url))
            mb.add_url('release_group',
                       gid,
                       90,
                       master_url,
                       text,
                       auto=(len(urls) >= 2))
            db.execute(
                "INSERT INTO bot_discogs_release_group_set (gid,url) VALUES (%s,%s)",
                (gid, master_url))
        except (urllib2.HTTPError, urllib2.URLError, socket.timeout) as e:
            out(e)


if __name__ == '__main__':
    parser = OptionParser()
    parser.add_option('-v',
                      '--verbose',
                      action='store_true',
                      default=False,
                      help='be more verbose')
    (options, args) = parser.parse_args()
    with PIDFile('/tmp/mbbot_discogs_links_release_groups.pid'):
        main(options.verbose)
        print 'Finding URLs using SQL query:', query_exit_urls
    urls = db.execute(query_exit_urls)
    if verbose:
        print u'Found %s URLs!' % (urls.rowcount)
    for url in urls:
        if verbose:
            print u'[!!!] Working on url: %s' % (cfg.MB_SITE + u'/url/' +
                                                 unicode(url['gid']))
        new_url = get_target_url(url['url'], verbose)
        if new_url is None:
            print 'Skipping %s.' % (url['url'])
            continue
        if verbose:
            print u'Changing %s to %s' % (url['url'], new_url)
        try:
            mb.edit_url(url['gid'], url['url'], new_url, edit_note, auto=False)
        except:
            continue


if __name__ == '__main__':
    parser = OptionParser()
    parser.add_option('-v',
                      '--verbose',
                      action='store_true',
                      default=False,
                      help='be more verbose')
    (options, args) = parser.parse_args()
    with PIDFile('/tmp/mbbot_clean_up_exit_urls.pid'):
        main(options.verbose)
        print 'Finding URLs using SQL query:', query_http_urls
    urls = db.execute(query_http_urls)
    if verbose:
        print u'Found %s URLs!' % (urls.rowcount)
    for url in urls:
        if verbose:
            print u'[!!!] Working on url: %s' % (cfg.MB_SITE + u'/url/' +
                                                 unicode(url['gid']))
        new_url = url['url'].replace('http://', 'https://', 1)
        if new_url is None:
            print 'Skipping %s.' % (url['url'])
            continue
        if verbose:
            print u'Changing %s to %s' % (url['url'], new_url)
        try:
            mb.edit_url(url['gid'], url['url'], new_url, edit_note, auto=False)
        except:
            continue


if __name__ == '__main__':
    parser = OptionParser()
    parser.add_option('-v',
                      '--verbose',
                      action='store_true',
                      default=False,
                      help='be more verbose')
    (options, args) = parser.parse_args()
    with PIDFile('/tmp/mbbot_httpsify_the_world.pid'):
        main(options.verbose)
Esempio n. 14
0
                update.add('begin_date')
                reasons.append(('BEGIN DATE', begin_date_reasons))
        if not artist['end_date_year']:
            end_date, end_date_reasons = determine_end_date(
                artist, page, is_performance_name)
            if end_date['year']:
                colored_out(bcolors.OKGREEN, " * new end date:", end_date)
                artist['end_date_year'] = end_date['year']
                artist['end_date_month'] = end_date['month']
                artist['end_date_day'] = end_date['day']
                update.add('end_date')
                reasons.append(('END DATE', end_date_reasons))

        if update:
            edit_note = 'From %s' % (artist['url'], )
            for field, reason in reasons:
                edit_note += '\n\n%s:\n%s' % (field, ' '.join(reason))
            out(' * edit note:', edit_note.replace('\n', ' '))
            time.sleep(10)
            mb.edit_artist(artist, update, edit_note)

        db.execute(
            "INSERT INTO bot_wp_artist_data (gid, lang) VALUES (%s, %s)",
            (artist['gid'], wp_lang))
        out()


if __name__ == '__main__':
    with PIDFile('/tmp/mbbot_wp_artist_country.pid'):
        main()
Esempio n. 15
0
    Using `spotify_url_cleanup.py`: https://github.com/Freso/musicbrainz-bot/blob/master/spotify_url_cleanup.py"""
    urls = db.execute(query_bad_spotify_urls)
    if verbose:
        print u'Found %s URLs!' % (urls.rowcount)
    for url in urls:
        if verbose:
            print u'Working on url: %s' % (cfg.MB_SITE + u'/url/' +
                                           unicode(url['gid']))
        new_url = get_spotify_url(url['url'], verbose)
        if new_url is None:
            print 'Skipping %s.' % (url['url'])
            continue
        if verbose:
            print u'→ Changing %s to %s' % (url['url'], new_url)
        try:
            mb.edit_url(url['gid'], url['url'], new_url, edit_note, auto=False)
        except:
            continue


if __name__ == '__main__':
    parser = OptionParser()
    parser.add_option('-v',
                      '--verbose',
                      action='store_true',
                      default=False,
                      help='be more verbose')
    (options, args) = parser.parse_args()
    with PIDFile('/tmp/mbbot_clean_up_spotify_links.pid'):
        main(options.verbose)