else: viaf_url = 'http://viaf.org/viaf/%s' % identifiers['VIAF'] edit_note = 'From %s' % (artist['wp_url'],) colored_out(bcolors.OKGREEN, ' * found VIAF:', viaf_url) # Check if this VIAF has not been deleted skip = False try: resp, content = httplib2.Http().request(viaf_url) except socket.error: colored_out(bcolors.FAIL, ' * timeout!') skip = True deleted_message = 'abandonedViafRecord' if skip == False and (resp.status == '404' or deleted_message in content): colored_out(bcolors.FAIL, ' * deleted VIAF!') skip = True if skip == False: time.sleep(3) out(' * edit note:', edit_note.replace('\n', ' ')) mb.add_url('artist', artist['gid'], str(VIAF_RELATIONSHIP_TYPES['artist']), viaf_url, edit_note) matched.add(artist['gid']) if artist['processed'] is None and artist['gid'] not in seen: db.execute("INSERT INTO bot_wp_artist_viaf (gid, lang) VALUES (%s, %s)", (artist['gid'], page.lang)) else: db.execute("UPDATE bot_wp_artist_viaf SET processed = now() WHERE (gid, lang) = (%s, %s)", (artist['gid'], page.lang)) seen.add(artist['gid']) if __name__ == '__main__': with PIDFile('/tmp/mbbot_wp_artist_viaf.pid'): main()
matched.add(entity['gid']) if entity['processed'] is None and entity['gid'] not in seen: db.execute( "INSERT INTO bot_wp_wikidata_links (gid, lang) VALUES (%s, %s)", (entity['gid'], page.lang)) else: db.execute( "UPDATE bot_wp_wikidata_links SET processed = now() WHERE (gid, lang) = (%s, %s)", (entity['gid'], page.lang)) seen.add(entity['gid']) stats['seen'][ENTITY_TYPE] = len(seen) stats['matched'][ENTITY_TYPE] = len(matched) stats = {'seen': {}, 'matched': {}} if __name__ == '__main__': with PIDFile('/tmp/mbbot_wp_wikidata_links.pid'): ENTITY_TYPES = ('event', 'instrument', 'series', 'place', 'release-group', 'artist', 'work', 'label') if len(sys.argv) > 1 and sys.argv[1] in ENTITY_TYPES: main(sys.argv[1]) else: for entity_type in ENTITY_TYPES: main(entity_type) print '\nStats:' for entity_type in ENTITY_TYPES: print ' * %s : %s / %s' % (entity_type, stats['matched'][entity_type], stats['seen'][entity_type])
catnr, barcode) else: text += u'\nhttp://amazon.%s/s?field-keywords=%s' % ( amazon_url_tld(url), barcode) # make "Import" bold so it is easier recognizable re_bold_import = re.compile(ur'\b(imports?)\b', re.IGNORECASE) text = re_bold_import.sub(ur"'''\1'''", text) try: colored_out( bcolors.OKGREEN, u' * https://musicbrainz.org/release/%s -> %s' % (gid, url)) mb.add_url('release', gid, 77, url, text) db.execute("INSERT INTO bot_asin_set (gid,url) VALUES (%s,%s)", (gid, url)) asins.add(url) edits_left -= 1 except (urllib2.HTTPError, urllib2.URLError, socket.timeout) as e: out(e) if __name__ == '__main__': parser = OptionParser() parser.add_option('-v', '--verbose', action='store_true', default=False, help='be more verbose') (options, args) = parser.parse_args() with PIDFile('/tmp/mbbot_asin_links.pid'): main(options.verbose)
continue text += u'I’m converting this relationship because I’ve found a link to %s in the linked page %s.' % ( license_url_raw, url) mb.add_url('release', gid, 301, license_url, text, auto=False) if not mb.edit_relationship(rel_id, 'release', 'url', 84, link_id, {'license.0': []}, {}, {}, text, auto=False): if (gid, original_url) not in cc_removed: text = u'Download and License relationship are already set, so this relationship is not necessary anymore.' mb.remove_relationship(rel_id, 'release', 'url', text) db.execute( "INSERT INTO bot_cc_removed (gid,url) VALUES (%s,%s)", (gid, original_url)) cc_removed.add((gid, original_url)) if __name__ == '__main__': parser = OptionParser() parser.add_option('-v', '--verbose', action='store_true', default=False, help='be more verbose') (options, args) = parser.parse_args() with PIDFile('/tmp/mbbot_convert_cc_links.pid'): main(options.verbose)
pages_with_viaf.add(page) artist_viaf = {} rows = sdb.execute("SELECT artist, url, viaf FROM viaf") for artist, url, viaf in rows: artist_viaf[artist] = { 'url': url, 'viaf': viaf, 'submitted': submitted } cnt = 0 for artist in db.execute(wp_url_query): if artist['id'] in artist_viaf: continue page = extract_page_title(artist['url'], wp_lang, normalize=True) if page not in pages_with_viaf: continue cnt += 1 viaf = fetch_viaf(page) print artist, viaf sdb.execute('INSERT INTO viaf (artist, url, viaf) VALUES (?, ?, ?)', (artist['id'], artist['url'], viaf)) sdb.commit() print cnt if __name__ == '__main__': with PIDFile('/tmp/mbbot_wp_viaf.pid'): main()
print 'Finding URLs using SQL query:', query_http_urls urls = db.execute(query_http_urls) if verbose: print u'Found %s URLs!' % (urls.rowcount) for url in urls: if verbose: print u'[!!!] Working on url: %s' % (cfg.MB_SITE + u'/url/' + unicode(url['gid'])) new_url = url['url'].replace('http://classic.', 'https://www.', 1) if new_url is None: print 'Skipping %s.' % (url['url']) continue if verbose: print u'Changing %s to %s' % (url['url'], new_url) try: mb.edit_url(url['gid'], url['url'], new_url, edit_note, auto=False) except: continue if __name__ == '__main__': parser = OptionParser() parser.add_option('-v', '--verbose', action='store_true', default=True, help='be more verbose') (options, args) = parser.parse_args() with PIDFile('/tmp/mbbot_httpsify_secondhandsongs.pid'): main(options.verbose)
from utils import out import config as cfg def main(args): if not args: out('Usage: cancel_edits.py <edit_number edit_note>...\n') out('Example: cancel_edits.py "Edit #123 my mistake"') out(' cancel_edits.py 123 124 125') return edits = [] for arg in args: if not isinstance(arg, unicode): arg = unicode(arg, locale.getpreferredencoding()) m = re.match(ur'(?:[Ee]dit )?#?([0-9]+) ?(.*)$', arg) if not m: out('invalid edit number "%s", aborting!' % arg) return edit_nr = str(m.group(1)) edit_note = m.group(2).lstrip() edits.append((edit_nr, edit_note)) mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) for edit_nr, edit_note in edits: out(u'Cancel edit #%s: %s' % (edit_nr, edit_note if edit_note else u'<no edit note>')) mb.cancel_edit(str(edit_nr), edit_note) if __name__ == '__main__': with PIDFile('/tmp/mbbot_cancel_edits.pid'): main(sys.argv[1:])
browser = mechanize.Browser() browser.set_handle_robots(False) browser.set_debug_redirects(False) browser.set_debug_http(False) def main(verbose = False): urls = db.execute(query_bandcamp_urls_using_http) for url in urls: new_url = u'https' + url['url'][4:] edit_note = """Updating HTTP URL to HTTPS. Using `bandcamp_httpsify.py`; Source available at https://github.com/Freso/musicbrainz-bot/blob/master/bandcamp_httpsify.py""" if verbose: print u'Working on url: %s' % (url) print u'→ Changing %s to %s' % (url['url'], new_url) try: mb.edit_url(url['gid'], url['url'], new_url, edit_note, auto=True) except: continue if __name__ == '__main__': parser = OptionParser() parser.add_option('-v', '--verbose', action='store_true', default=False, help='be more verbose') (options, args) = parser.parse_args() with PIDFile('/tmp/mbbot_httpsify_bandcamp_links.pid'): main(options.verbose)
artist, page, is_performance_name) if end_date['year']: colored_out(bcolors.OKGREEN, " * new end date:", end_date) artist['end_date_year'] = end_date['year'] artist['end_date_month'] = end_date['month'] artist['end_date_day'] = end_date['day'] update.add('end_date') reasons.append(('END DATE', end_date_reasons)) if update: edit_note = 'From %s' % (artist['url'], ) for field, reason in reasons: edit_note += '\n\n%s:\n%s' % (field, ' '.join(reason)) out(' * edit note:', edit_note.replace('\n', ' ')) time.sleep(10) mb.edit_artist(artist, update, edit_note) if artist['processed'] is None: db.execute( "INSERT INTO bot_wp_artist_data (gid, lang) VALUES (%s, %s)", (artist['gid'], wp_lang)) else: db.execute( "UPDATE bot_wp_artist_data SET processed = now() WHERE (gid, lang) = (%s, %s)", (artist['gid'], wp_lang)) if __name__ == '__main__': with PIDFile('/tmp/mbbot_wp_artist_data.pid'): main()
if verbose: out(pprint.pformat(dict(stats))) r_grouped = r_by_ac.values() random.shuffle(r_grouped) r_flat = list(itertools.chain(*r_grouped)) count = len(r_flat) if verbose: out('script can be set for %d out of %d releases' % (count, count_all)) for i, (gid, old_script_id, new_script, script_stats) in enumerate(r_flat): if verbose: out('%d/%d - %.2f%%' % (i + 1, count, (i + 1) * 100.0 / count)) out('https://musicbrainz.org/release/%s %s -> %s' % (gid, mb_to_iso15924[old_script_id] if old_script_id else '', new_script)) new_script_name = iso15924_to_mb[new_script]['name'] new_script_id = iso15924_to_mb[new_script]['id'] text = u'I’m setting this to “%s” because it is the predominant script on the tracklist (>40%%), and no other (determined) script is on the tracklist.' % new_script_name if not old_script_id: old_script_id = '' try: mb.set_release_script(gid, old_script_id, new_script_id, text, auto=True) except (urllib2.HTTPError, urllib2.URLError, socket.timeout) as e: out(e) if __name__ == '__main__': parser = OptionParser() parser.add_option('-v', '--verbose', action='store_true', default=False, help='be more verbose') (options, args) = parser.parse_args() with PIDFile('/tmp/mbbot_set_script.pid'): main(options.verbose)
text = u'There is one Discogs link in this release group, and it points to this master URL.\n%s\n' % list( urls)[0] text += u'Also, the name of the Discogs master “%s” (by %s) is similar to the release group name.' % ( master_name, master_artists) try: out(u'https://musicbrainz.org/release-group/%s -> %s' % (gid, master_url)) mb.add_url('release_group', gid, 90, master_url, text, auto=(len(urls) >= 2)) db.execute( "INSERT INTO bot_discogs_release_group_set (gid,url) VALUES (%s,%s)", (gid, master_url)) except (urllib2.HTTPError, urllib2.URLError, socket.timeout) as e: out(e) if __name__ == '__main__': parser = OptionParser() parser.add_option('-v', '--verbose', action='store_true', default=False, help='be more verbose') (options, args) = parser.parse_args() with PIDFile('/tmp/mbbot_discogs_links_release_groups.pid'): main(options.verbose)
print 'Finding URLs using SQL query:', query_exit_urls urls = db.execute(query_exit_urls) if verbose: print u'Found %s URLs!' % (urls.rowcount) for url in urls: if verbose: print u'[!!!] Working on url: %s' % (cfg.MB_SITE + u'/url/' + unicode(url['gid'])) new_url = get_target_url(url['url'], verbose) if new_url is None: print 'Skipping %s.' % (url['url']) continue if verbose: print u'Changing %s to %s' % (url['url'], new_url) try: mb.edit_url(url['gid'], url['url'], new_url, edit_note, auto=False) except: continue if __name__ == '__main__': parser = OptionParser() parser.add_option('-v', '--verbose', action='store_true', default=False, help='be more verbose') (options, args) = parser.parse_args() with PIDFile('/tmp/mbbot_clean_up_exit_urls.pid'): main(options.verbose)
print 'Finding URLs using SQL query:', query_http_urls urls = db.execute(query_http_urls) if verbose: print u'Found %s URLs!' % (urls.rowcount) for url in urls: if verbose: print u'[!!!] Working on url: %s' % (cfg.MB_SITE + u'/url/' + unicode(url['gid'])) new_url = url['url'].replace('http://', 'https://', 1) if new_url is None: print 'Skipping %s.' % (url['url']) continue if verbose: print u'Changing %s to %s' % (url['url'], new_url) try: mb.edit_url(url['gid'], url['url'], new_url, edit_note, auto=False) except: continue if __name__ == '__main__': parser = OptionParser() parser.add_option('-v', '--verbose', action='store_true', default=False, help='be more verbose') (options, args) = parser.parse_args() with PIDFile('/tmp/mbbot_httpsify_the_world.pid'): main(options.verbose)
update.add('begin_date') reasons.append(('BEGIN DATE', begin_date_reasons)) if not artist['end_date_year']: end_date, end_date_reasons = determine_end_date( artist, page, is_performance_name) if end_date['year']: colored_out(bcolors.OKGREEN, " * new end date:", end_date) artist['end_date_year'] = end_date['year'] artist['end_date_month'] = end_date['month'] artist['end_date_day'] = end_date['day'] update.add('end_date') reasons.append(('END DATE', end_date_reasons)) if update: edit_note = 'From %s' % (artist['url'], ) for field, reason in reasons: edit_note += '\n\n%s:\n%s' % (field, ' '.join(reason)) out(' * edit note:', edit_note.replace('\n', ' ')) time.sleep(10) mb.edit_artist(artist, update, edit_note) db.execute( "INSERT INTO bot_wp_artist_data (gid, lang) VALUES (%s, %s)", (artist['gid'], wp_lang)) out() if __name__ == '__main__': with PIDFile('/tmp/mbbot_wp_artist_country.pid'): main()
Using `spotify_url_cleanup.py`: https://github.com/Freso/musicbrainz-bot/blob/master/spotify_url_cleanup.py""" urls = db.execute(query_bad_spotify_urls) if verbose: print u'Found %s URLs!' % (urls.rowcount) for url in urls: if verbose: print u'Working on url: %s' % (cfg.MB_SITE + u'/url/' + unicode(url['gid'])) new_url = get_spotify_url(url['url'], verbose) if new_url is None: print 'Skipping %s.' % (url['url']) continue if verbose: print u'→ Changing %s to %s' % (url['url'], new_url) try: mb.edit_url(url['gid'], url['url'], new_url, edit_note, auto=False) except: continue if __name__ == '__main__': parser = OptionParser() parser.add_option('-v', '--verbose', action='store_true', default=False, help='be more verbose') (options, args) = parser.parse_args() with PIDFile('/tmp/mbbot_clean_up_spotify_links.pid'): main(options.verbose)