def push_data(): mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) for row in aClient.get_mb_data(): gid, workid, iswc, artist, work = row note = 'Data taken from ' + aClient.search_url(str(workid), 'i') mb.edit_work(gid, {'iswc': iswc}, note) print artist + ' work: ' + work + ' Done!'
def main(args): if not args: out('Usage: cancel_edits.py <edit_number edit_note>...\n') out('Example: cancel_edits.py "Edit #123 my mistake"') out(' cancel_edits.py 123 124 125') return edits = [] for arg in args: if not isinstance(arg, unicode): arg = unicode(arg, locale.getpreferredencoding()) m = re.match(ur'(?:[Ee]dit )?#?([0-9]+) ?(.*)$', arg) if not m: out('invalid edit number "%s", aborting!' % arg) return edit_nr = str(m.group(1)) edit_note = m.group(2).lstrip() edits.append((edit_nr, edit_note)) mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) for edit_nr, edit_note in edits: out(u'Cancel edit #%s: %s' % (edit_nr, edit_note if edit_note else u'<no edit note>')) mb.cancel_edit(str(edit_nr), edit_note)
#!/usr/bin/python import re import sqlalchemy from editing import MusicBrainzClient import time from utils import out, colored_out, bcolors import config as cfg engine = sqlalchemy.create_engine(cfg.MB_DB) db = engine.connect() db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB) mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) query = """ SELECT DISTINCT r.id, r.gid AS r_gid, w.gid AS w_gid, r.name, r.comment, lrw.id AS rel_id, lt.id AS link_type, r.artist_credit FROM recording r JOIN l_recording_work lrw ON lrw.entity0 = r.id JOIN link l ON l.id = lrw.link JOIN link_type lt ON l.link_type = lt.id JOIN link_attribute la ON la.link = l.id JOIN link_attribute_type lat ON la.attribute_type = lat.id AND lat.name = 'live' JOIN work w ON lrw.entity1 = w.id WHERE r.comment ~ E'live, \\\\d{4}(-\\\\d{2})?(-\\\\d{2})?:' AND l.begin_date_year IS NULL AND l.end_date_year IS NULL AND lt.name = 'performance' AND r.edits_pending = 0 AND lrw.edits_pending = 0 /* Only one linked work */ AND NOT EXISTS (SELECT 1 FROM l_recording_work lrw2 WHERE lrw2.entity0 = r.id AND lrw2.entity1 <> lrw.entity1)
import re import sqlalchemy import solr from editing import MusicBrainzClient from mbbot.source.secondhandsongs import SHSWebService import pprint import urllib import time from utils import mangle_name, join_names, out, colored_out, bcolors import config as cfg engine = sqlalchemy.create_engine(cfg.MB_DB) db = engine.connect() db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB) mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) shs = SHSWebService() """ CREATE TABLE bot_shs_work_lang ( work uuid NOT NULL, processed timestamp with time zone DEFAULT now(), CONSTRAINT bot_shs_work_lang_pkey PRIMARY KEY (work) ); """ query = """ WITH works_wo_lang AS ( SELECT w.id AS work_id, u.url AS shs_url FROM work w
from mbbot.wp.analysis import determine_country from utils import mangle_name, join_names, out, colored_out, bcolors, escape_query, quote_page_title, wp_is_canonical_page import config as cfg engine = sqlalchemy.create_engine(cfg.MB_DB) db = engine.connect() db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB) wp_lang = sys.argv[1] if len(sys.argv) > 1 else 'en' wp = MediaWiki('http://%s.wikipedia.org/w/api.php' % wp_lang) suffix = '_' + wp_lang if wp_lang != 'en' else '' wps = solr.SolrConnection('http://localhost:8983/solr/wikipedia' + suffix) mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) """ CREATE TABLE bot_wp_artist_link ( gid uuid NOT NULL, lang character varying(2), processed timestamp with time zone DEFAULT now() CONSTRAINT bot_wp_artist_link_pkey PRIMARY KEY (gid, lang) ); CREATE TABLE bot_wp_artist_link_ignore ( gid uuid NOT NULL, lang character varying(2), CONSTRAINT bot_wp_artist_link_ignore_pkey PRIMARY KEY (gid, lang) ); """
from mbbot.wp.analysis import determine_country from utils import mangle_name, join_names, out, colored_out, bcolors, escape_query, quote_page_title, wp_is_canonical_page import config as cfg engine = sqlalchemy.create_engine(cfg.MB_DB) db = engine.connect() db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB) wp_lang = sys.argv[1] if len(sys.argv) > 1 else 'en' wp = MediaWiki('https://%s.wikipedia.org/w/api.php' % wp_lang) suffix = '_' + wp_lang if wp_lang != 'en' else '' wps = solr.SolrConnection('http://localhost:8983/solr/wikipedia' + suffix) mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) """ CREATE TABLE bot_wp_artist_link ( gid uuid NOT NULL, lang character varying(2), processed timestamp with time zone DEFAULT now() CONSTRAINT bot_wp_artist_link_pkey PRIMARY KEY (gid, lang) ); CREATE TABLE bot_wp_artist_link_ignore ( gid uuid NOT NULL, lang character varying(2), CONSTRAINT bot_wp_artist_link_ignore_pkey PRIMARY KEY (gid, lang) ); """
import re import sqlalchemy import solr from editing import MusicBrainzClient import pprint import urllib import time from utils import mangle_name, join_names, out, colored_out, bcolors import config as cfg engine = sqlalchemy.create_engine(cfg.MB_DB) db = engine.connect() db.execute("SET search_path TO musicbrainz") mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) """ CREATE TABLE bot_encyclopedisque_medium_format ( gid uuid NOT NULL, processed timestamp with time zone DEFAULT now() ); ALTER TABLE ONLY bot_encyclopedisque_medium_format ADD CONSTRAINT bot_encyclopedisque_medium_format_pkey PRIMARY KEY (gid); """ query = """ WITH releases_wo_7inch AS ( SELECT r.id, u.url, m.format
from mbbot.wp.wikipage import WikiPage from utils import mangle_name, join_names, out, colored_out, bcolors, escape_query, quote_page_title, wp_is_canonical_page import config as cfg engine = sqlalchemy.create_engine(cfg.MB_DB) db = engine.connect() db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB) wp_lang = sys.argv[1] if len(sys.argv) > 1 else 'en' wp = MediaWiki('https://%s.wikipedia.org/w/api.php' % wp_lang) suffix = '_' + wp_lang if wp_lang != 'en' else '' wps = solr.SolrConnection('http://localhost:8983/solr/wikipedia' + suffix) mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) """ CREATE TABLE bot_wp_rg_link ( gid uuid NOT NULL, lang character varying(2), processed timestamp with time zone DEFAULT now(), CONSTRAINT bot_wp_rg_link_pkey PRIMARY KEY (gid, lang) ); """ acceptable_countries_for_lang = { 'fr': ['FR', 'MC'] } acceptable_countries_for_lang['en'] = acceptable_countries_for_lang['fr']
def open(self, mb=False, do=False, client=False): if mb: self.mbdb = self.mbengine.connect() if do: self.dodb = self.doengine.connect() if client: return MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) return None
from simplemediawiki import MediaWiki from editing import MusicBrainzClient import pprint import urllib import time from utils import mangle_name, join_names, mw_remove_markup, out import config as cfg engine = sqlalchemy.create_engine(cfg.MB_DB) db = engine.connect() db.execute("SET search_path TO musicbrainz") wp = MediaWiki('http://en.wikipedia.org/w/api.php') wps = solr.SolrConnection('http://localhost:8983/solr/wikipedia') mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) query = """ SELECT DISTINCT a.id, a.gid, a.name, a.country, a.type, a.gender, a.begin_date_year, a.begin_date_month, a.begin_date_day, a.end_date_year, a.end_date_month, a.end_date_day, u.url FROM s_artist a JOIN l_artist_url l ON l.entity0 = a.id AND l.link IN (SELECT id FROM link WHERE link_type = 179) JOIN url u ON u.id = l.entity1 LEFT JOIN bot_wp_artist_data b ON a.gid = b.gid
from simplemediawiki import MediaWiki from editing import MusicBrainzClient import pprint import urllib import time from utils import mangle_name, join_names, out import config as cfg engine = sqlalchemy.create_engine(cfg.MB_DB) db = engine.connect() db.execute("SET search_path TO musicbrainz") wp = MediaWiki('http://en.wikipedia.org/w/api.php') wps = solr.SolrConnection('http://localhost:8983/solr/wikipedia') mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) """ CREATE TABLE bot_wp_rg ( gid uuid NOT NULL, processed timestamp with time zone DEFAULT now() ); ALTER TABLE ONLY bot_wp_rg ADD CONSTRAINT bot_wp_rg_pkey PRIMARY KEY (gid); """ query = """ WITH rgs_wo_wikipedia AS ( SELECT a.id
import re import sys import urllib import urllib2 import config import pymongo import pprint from editing import MusicBrainzClient import cgi mb = MusicBrainzClient('lukz_bot', 'mb', 'http://mb.muziq.eu') opener = urllib2.build_opener() if config.WWW_USER_AGENT: opener.addheaders = [('User-Agent', config.WWW_USER_AGENT)] mongo = pymongo.Connection() db = mongo.mbot html_escape_table = { "&": "&", '"': """, "'": "'", ">": ">", "<": "<", }
m = re.search(ur'<span property="v:identifier">(.+?)</span>', page) identifier = m.group(1).upper() if m else None return identifier if identifier and isrc_valid(identifier) else None query_releases = ''' SELECT DISTINCT r.id, r.gid, r.barcode FROM release r WHERE r.barcode ~ %s ''' engine = sqlalchemy.create_engine(cfg.MB_DB) db = engine.connect() db.execute('SET search_path TO musicbrainz') zeroinch = ZeroInch() mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) ws = MusicBrainzWebservice(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) def identify_isrc_edit(isrcs): return lambda edit_nr, text: set(isrcs) == set(re.findall(r'<a href="' + cfg.MB_SITE + r'/isrc/([A-Z0-9]{12})">', text)) isrc_submitted = set(url for url, in db.execute('''SELECT url FROM bot_isrc_zeroinch_submitted''')) isrc_missing = set(url for url, in db.execute('''SELECT url FROM bot_isrc_zeroinch_missing''')) isrc_problematic = set(url for url, in db.execute('''SELECT url FROM bot_isrc_zeroinch_problematic''')) #for artists in [['Gui_Boratto']]: #for artists in zeroinch.get_artists('/label/Warp_Records'): for artists in zeroinch.get_artists('/catalogue', cipher='all', page='1'): for artist in artists: artist_url = u'http://www.zero-inch.com/artist/%s' % artist
import re import sqlalchemy import solr from editing import MusicBrainzClient import discogs_client as discogs import pprint import urllib import time from utils import mangle_name, join_names, out, colored_out, bcolors import config as cfg engine = sqlalchemy.create_engine(cfg.MB_DB) db = engine.connect() db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB) mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) discogs.user_agent = "MusicBrainzBot/0.1 +https://github.com/murdos/musicbrainz-bot" """ CREATE TABLE bot_discogs_medium_format ( medium integer NOT NULL, processed timestamp with time zone DEFAULT now(), CONSTRAINT bot_discogs_medium_format_pkey PRIMARY KEY (medium) ); """ query = """ WITH mediums_with_fuzzy_format AS ( SELECT r.id AS release_id, m.position, m.id AS medium_id, u.url AS discogs_url, m.format
import re import sqlalchemy import solr from editing import MusicBrainzClient import pprint import urllib import time from utils import mangle_name, join_names, out, colored_out, bcolors import config as cfg engine = sqlalchemy.create_engine(cfg.MB_DB) db = engine.connect() db.execute("SET search_path TO musicbrainz") mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) """ CREATE TABLE bot_encyclopedisque_medium_format ( gid uuid NOT NULL, processed timestamp with time zone DEFAULT now() ); ALTER TABLE ONLY bot_encyclopedisque_medium_format ADD CONSTRAINT bot_encyclopedisque_medium_format_pkey PRIMARY KEY (gid); """ query = """ WITH releases_wo_7inch AS (
def main(verbose=False): download_if_modified(bbc_sitemap_url, bbc_sitemap) db = db_connect() release_redirects = dict(get_release_redirects(db)) release_groups = dict(get_release_groups(db)) releases = dict(get_releases(db)) bbc_reviews_set = set((gid, url) for gid, url in db.execute("""SELECT gid, url FROM bot_bbc_reviews_set""")) review_urls = defaultdict(set) for rg, url in get_review_urls(db): review_urls[rg].add(url) cleanup_review_urls = set() for cleanup_url in cleanup_urls: f = urllib.urlopen(cleanup_url) cleanup_review_urls |= set(re.findall(ur"http://www.bbc.co.uk/music/reviews/[0-9a-z]+", f.read())) editor_id = db.execute("""SELECT id FROM editor WHERE name = %s""", cfg.MB_USERNAME).first()[0] mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE, editor_id=editor_id) normal_edits_left, edits_left = mb.edits_left() bbc_reviews = list(load_bbc_reviews(bbc_sitemap)) count = len(bbc_reviews) for i, (review_url, release_url, title) in enumerate(bbc_reviews): if normal_edits_left <= 0: break if verbose: out(u"%d/%d - %.2f%%" % (i + 1, count, (i + 1) * 100.0 / count)) out(u"%s %s" % (title, review_url)) out(release_url) if review_url in cleanup_review_urls: continue release_gid = utils.extract_mbid(release_url, "release") row = release_redirects.get(release_gid) if not row: row = releases.get(release_gid) if not row: if verbose: out(" non-existant release in review %s" % review_url) continue rg, ac, release_name = row gid, name = release_groups[rg] if review_url in review_urls[rg]: continue if (gid, review_url) in bbc_reviews_set: if verbose: out(u" already linked earlier (probably got removed by some editor!") continue mb_title = "%s - %s" % (artist_credit(db, ac), release_name) if not are_similar(title, mb_title): if verbose: out(u" similarity too small: %s <-> %s" % (title, mb_title)) # out(u'|-\n| [%s %s]\n| [[ReleaseGroup:%s|%s]]\n| [[Release:%s|%s]]' % (review_url, bbc_name, gid, name, release_gid, release_name)) continue text = ( u"Review is in BBC mapping [1], and review name “%s” is" " similar to the release name. If this is wrong," " please note it here and put the correct mapping in" " the wiki [2].\n\n[1] %s\n[2] %s" % (title, bbc_sitemap_url, cleanup_urls[0]) ) text += "\n\n%s" % prog try: out(u"http://musicbrainz.org/release-group/%s -> %s" % (gid, review_url)) mb.add_url("release_group", gid, 94, review_url, text, auto=False) db.execute("INSERT INTO bot_bbc_reviews_set (gid,url) VALUES (%s,%s)", (gid, review_url)) bbc_reviews_set.add((gid, review_url)) normal_edits_left -= 1 except (urllib2.HTTPError, urllib2.URLError, socket.timeout) as e: out(e)
def init_mb(): global mb mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)
import re import sqlalchemy import solr from editing import MusicBrainzClient from mbbot.source.secondhandsongs import SHSWebService import pprint import urllib import time from utils import mangle_name, join_names, out, colored_out, bcolors import config as cfg engine = sqlalchemy.create_engine(cfg.MB_DB) db = engine.connect() db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB) mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) shs = SHSWebService() """ CREATE TABLE bot_shs_work_lang ( work uuid NOT NULL, processed timestamp with time zone DEFAULT now(), CONSTRAINT bot_shs_work_lang_pkey PRIMARY KEY (work) ); """ query = """ WITH works_wo_lang AS ( SELECT w.id AS work_id, u.url AS shs_url FROM work w JOIN l_url_work l ON l.entity1 = w.id AND l.link IN (SELECT id FROM link WHERE link_type = 280)
import re import sqlalchemy import solr from editing import MusicBrainzClient import discogs_client as discogs import pprint import urllib import time from utils import mangle_name, join_names, out, colored_out, bcolors import config as cfg engine = sqlalchemy.create_engine(cfg.MB_DB) db = engine.connect() db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB) mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) discogs.user_agent = 'MusicBrainzBot/0.1 +https://github.com/murdos/musicbrainz-bot' """ CREATE TABLE bot_discogs_release_packaging ( release uuid NOT NULL, processed timestamp with time zone DEFAULT now(), CONSTRAINT bot_discogs_release_packaging_pkey PRIMARY KEY (release) ); """ query = """ WITH releases_wo_packaging AS ( SELECT r.id AS release_id, u.url AS discogs_url FROM release r
from simplemediawiki import MediaWiki from editing import MusicBrainzClient import pprint import urllib import time from utils import mangle_name, join_names, quote_page_title import config as cfg engine = sqlalchemy.create_engine(cfg.MB_DB) db = engine.connect() db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB) wp = MediaWiki('https://en.wikipedia.org/w/api.php') wps = solr.SolrConnection('http://localhost:8983/solr/wikipedia') mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) """ CREATE TABLE bot_wp_label ( gid uuid NOT NULL, processed timestamp with time zone DEFAULT now() ); ALTER TABLE ONLY bot_wp_label ADD CONSTRAINT bot_wp_label_pkey PRIMARY KEY (gid); """ query = """ WITH labels_wo_wikipedia AS ( SELECT a.id
identifier = m.group(1).upper() if m else None return identifier if identifier and isrc_valid(identifier) else None query_releases = ''' SELECT DISTINCT r.id, r.gid, r.barcode FROM release r WHERE r.barcode ~ %s ''' engine = sqlalchemy.create_engine(cfg.MB_DB) db = engine.connect() db.execute('SET search_path TO musicbrainz') zeroinch = ZeroInch() mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) ws = MusicBrainzWebservice(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) def identify_isrc_edit(isrcs): return lambda edit_nr, text: set(isrcs) == set( re.findall(r'<a href="' + cfg.MB_SITE + r'/isrc/([A-Z0-9]{12})">', text )) isrc_submitted = set( url for url, in db.execute('''SELECT url FROM bot_isrc_zeroinch_submitted''')) isrc_missing = set( url for url, in db.execute('''SELECT url FROM bot_isrc_zeroinch_missing'''))
from editing import MusicBrainzClient from mbbot.source.secondhandsongs import SHSWebService from picard.similarity import similarity2 from kitchen.text.converters import to_unicode import pprint import urllib import urllib2 import time from utils import mangle_name, join_names, out, colored_out, bcolors import config as cfg engine = sqlalchemy.create_engine(cfg.MB_DB) db = engine.connect() db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB) mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) shs = SHSWebService() """ CREATE TABLE mbbot.bot_shs_link_artist ( artist uuid NOT NULL, processed timestamp with time zone DEFAULT now(), CONSTRAINT bot_shs_link_artist_pkey PRIMARY KEY (artist) ); """ query = """ WITH artists_wo_shs AS ( SELECT DISTINCT a.id AS artist_id, a.gid AS artist_gid, w.id AS work_id, w.gid AS work_gid, u.url AS shs_url FROM artist a
import re import random import locale from collections import defaultdict import itertools import sqlalchemy import discogs_client as discogs from editing import MusicBrainzClient import Levenshtein import config as cfg engine = sqlalchemy.create_engine(cfg.MB_DB) db = engine.connect() db.execute('SET search_path TO musicbrainz') mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) discogs.user_agent = 'MusicBrainzDiscogsReleaseGroupsBot/0.1 +https://github.com/weisslj/musicbrainz-bot' query_rg_without_master = ''' SELECT rg.artist_credit, rg.id, rg.gid, release_name.name FROM release_group rg JOIN release_name ON rg.name = release_name.id WHERE rg.id IN ( SELECT DISTINCT rg.id FROM release_group rg JOIN release ON rg.id = release.release_group JOIN l_release_url l_ru ON release.id = l_ru.entity0 JOIN link l ON l_ru.link = l.id WHERE l.link_type = 76
); CREATE TABLE bot_asin_catmismatch ( gid uuid NOT NULL, processed timestamp with time zone DEFAULT now(), CONSTRAINT bot_asin_catmismatch_pkey PRIMARY KEY (gid) ); ''' engine = sqlalchemy.create_engine(cfg.MB_DB) db = engine.connect() db.execute("SET search_path TO musicbrainz, mbbot") editor_id = db.execute('''SELECT id FROM editor WHERE name = %s''', cfg.MB_USERNAME).first()[0] mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE, editor_id=editor_id) store_map = [ # http://www.amazon.com/gp/help/customer/display.html/ref=hp_left_cn?nodeId=527692 ('us', ['US', 'AU']), # http://www.amazon.co.uk/gp/help/customer/display.html/ref=ssd?nodeId=1204872 ('uk', ['GB', 'XE']), # http://www.amazon.de/gp/help/customer/display.html/ref=hp_left_sib?nodeId=13464781 ('de', ['DE', 'AT', 'BE', 'LI', 'LU', 'NL', 'CH', 'XE']), # http://www.amazon.fr/gp/help/customer/display.html?nodeId=897502 ('fr', ['FR', 'MC', 'BE', 'LU', 'CH', 'XE']), # http://www.amazon.co.jp/gp/help/customer/display.html/ref=hp_rel_topic?nodeId=1039606 ('jp', ['JP']), # http://www.amazon.ca/gp/help/customer/display.html?nodeId=918742 ('ca', ['CA']),
from utils import mangle_name, join_names, out, get_page_content, extract_page_title, colored_out, bcolors, escape_query, quote_page_title, wp_is_canonical_page from utils import mangle_name, join_names, out, get_page_content, extract_page_title, colored_out, bcolors, escape_query, quote_page_title import config as cfg engine = sqlalchemy.create_engine(cfg.MB_DB) db = engine.connect() db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB) wp_lang = sys.argv[1] if len(sys.argv) > 1 else 'en' wp = MediaWiki('http://%s.wikipedia.org/w/api.php' % wp_lang) suffix = '_' + wp_lang if wp_lang != 'en' else '' wps = solr.SolrConnection('http://localhost:8983/solr/wikipedia' + suffix) mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) """ CREATE TABLE bot_wp_rg_link ( gid uuid NOT NULL, lang character varying(2), processed timestamp with time zone DEFAULT now(), CONSTRAINT bot_wp_rg_link_pkey PRIMARY KEY (gid, lang) ); """ acceptable_countries_for_lang = {'fr': ['FR', 'MC']} acceptable_countries_for_lang['en'] = acceptable_countries_for_lang['fr'] query_params = [] no_country_filter = ( wp_lang == 'en') and ('en' not in acceptable_countries_for_lang
#!/usr/bin/python import sys import os import re import time import urllib2 import json from editing import MusicBrainzClient from utils import out, colored_out, bcolors, monkeypatch_mechanize import config as cfg # Work around mechanize bug. See: https://github.com/jjlee/mechanize/pull/58 monkeypatch_mechanize() mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) FILE_RE = re.compile( r'^(?P<mbid>[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})-(?P<type>front|back|medium|booklet|tray|sticker)(?:-\d+)?\.(?:jpeg|jpg|png|gif)', re.I) class CoverArtArchiveReleaseInfo(object): def __init__(self, release_id): try: data = urllib2.urlopen('http://coverartarchive.org/release/%s/' % release_id) self.metadata = json.load(data) except urllib2.HTTPError: self.metadata = { 'images': [],
#!/usr/bin/python import re import sqlalchemy from editing import MusicBrainzClient import time from utils import out, colored_out, bcolors import config as cfg engine = sqlalchemy.create_engine(cfg.MB_DB) db = engine.connect() db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB) mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) query = """ SELECT DISTINCT r.id, r.gid, r.name, r.comment, lrw.id AS rel_id, lt.id AS link_type, r.artist_credit FROM recording r JOIN l_recording_work lrw ON lrw.entity0 = r.id JOIN link l ON l.id = lrw.link JOIN link_type lt ON l.link_type = lt.id JOIN link_attribute la ON la.link = l.id JOIN link_attribute_type lat ON la.attribute_type = lat.id AND lat.name = 'live' WHERE r.comment ~ E'live, \\\\d{4}(-\\\\d{2})?(-\\\\d{2})?:' AND l.begin_date_year IS NULL AND l.end_date_year IS NULL AND lt.name = 'performance' AND r.edits_pending = 0 AND lrw.edits_pending = 0 /* Only one linked work */ AND NOT EXISTS (SELECT 1 FROM l_recording_work lrw2 WHERE lrw2.entity0 = r.id AND lrw2.entity1 <> lrw.entity1) ORDER BY r.artist_credit
from simplemediawiki import MediaWiki from editing import MusicBrainzClient import pprint import urllib import time from utils import mangle_name, join_names, quote_page_title import config as cfg engine = sqlalchemy.create_engine(cfg.MB_DB) db = engine.connect() db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB) wp = MediaWiki("https://en.wikipedia.org/w/api.php") wps = solr.SolrConnection("http://localhost:8983/solr/wikipedia") mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) """ CREATE TABLE bot_wp_label ( gid uuid NOT NULL, processed timestamp with time zone DEFAULT now() ); ALTER TABLE ONLY bot_wp_label ADD CONSTRAINT bot_wp_label_pkey PRIMARY KEY (gid); """ query = """ WITH labels_wo_wikipedia AS (
from simplemediawiki import MediaWiki from editing import MusicBrainzClient import pprint import urllib import time from utils import mangle_name, join_names, contains_text_in_script, quote_page_title import config as cfg engine = sqlalchemy.create_engine(cfg.MB_DB) db = engine.connect() db.execute("SET search_path TO musicbrainz") wp = MediaWiki('http://ko.wikipedia.org/w/api.php') wps = solr.SolrConnection('http://localhost:8983/solr/wikipedia_ko') mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) """ CREATE TABLE bot_wp_artist_ko ( gid uuid NOT NULL, processed timestamp with time zone DEFAULT now() ); ALTER TABLE ONLY bot_wp_artist_ko ADD CONSTRAINT bot_wp_artist_kokey PRIMARY KEY (gid); """ query = """ WITH artists_wo_wikipedia AS ( SELECT a.id
import re import sqlalchemy import solr from editing import MusicBrainzClient import discogs_client import pprint import urllib import time from utils import mangle_name, join_names, out, colored_out, bcolors import config as cfg engine = sqlalchemy.create_engine(cfg.MB_DB) db = engine.connect() db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB) mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) discogs = discogs_client.Client('MusicBrainzBot/0.1 +https://github.com/murdos/musicbrainz-bot') """ CREATE TABLE bot_discogs_medium_format ( medium integer NOT NULL, processed timestamp with time zone DEFAULT now(), CONSTRAINT bot_discogs_medium_format_pkey PRIMARY KEY (medium) ); """ query = """ WITH mediums_with_fuzzy_format AS ( SELECT r.id AS release_id, m.position, m.id AS medium_id, u.url AS discogs_url, m.format
import re import sqlalchemy import solr from editing import MusicBrainzClient import discogs_client as discogs import pprint import urllib import time from utils import mangle_name, join_names, out, colored_out, bcolors import config as cfg engine = sqlalchemy.create_engine(cfg.MB_DB) db = engine.connect() db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB) mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) discogs.user_agent = 'MusicBrainzBot/0.1 +https://github.com/murdos/musicbrainz-bot' """ CREATE TABLE bot_discogs_release_packaging ( release uuid NOT NULL, processed timestamp with time zone DEFAULT now(), CONSTRAINT bot_discogs_release_packaging_pkey PRIMARY KEY (release) ); """ query = """ WITH releases_wo_packaging AS ( SELECT r.id AS release_id, u.url AS discogs_url
from simplemediawiki import MediaWiki from editing import MusicBrainzClient import pprint import urllib import time from utils import mangle_name, join_names, contains_text_in_script, quote_page_title import config as cfg engine = sqlalchemy.create_engine(cfg.MB_DB) db = engine.connect() db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB) wp = MediaWiki('http://ko.wikipedia.org/w/api.php') wps = solr.SolrConnection('http://localhost:8983/solr/wikipedia_ko') mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) """ CREATE TABLE bot_wp_artist_ko ( gid uuid NOT NULL, processed timestamp with time zone DEFAULT now() ); ALTER TABLE ONLY bot_wp_artist_ko ADD CONSTRAINT bot_wp_artist_kokey PRIMARY KEY (gid); """ query = """ WITH artists_wo_wikipedia AS ( SELECT a.id
def init_mb(): global mb print "Logging in..." mb = MusicBrainzClient(config.MB_USERNAME, config.MB_PASSWORD, config.MB_SITE)
import re import sqlalchemy import solr from editing import MusicBrainzClient import discogs_client as discogs import pprint import urllib import time from utils import mangle_name, join_names, out, colored_out, bcolors import config as cfg engine = sqlalchemy.create_engine(cfg.MB_DB) db = engine.connect() db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB) mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) discogs.user_agent = 'MusicBrainzBot/0.1 +https://github.com/murdos/musicbrainz-bot' """ CREATE TABLE bot_discogs_medium_format ( medium integer NOT NULL, processed timestamp with time zone DEFAULT now(), CONSTRAINT bot_discogs_medium_format_pkey PRIMARY KEY (medium) ); """ query = """ WITH mediums_with_fuzzy_format AS ( SELECT r.id AS release_id, m.position, m.id AS medium_id, u.url AS discogs_url, m.format
'artist': 352, 'label': 354, 'release-group': 353, 'work': 351, 'area': 358, 'place': 594, 'series': 749, 'instrument': 733, 'event': 790 } engine = sqlalchemy.create_engine(cfg.MB_DB) db = engine.connect() db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB) mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) """ CREATE TABLE mbbot.bot_wp_wikidata_links ( gid uuid NOT NULL, lang character varying(10), processed timestamp with time zone DEFAULT now(), CONSTRAINT bot_wp_wikidata_links_pkey PRIMARY KEY (gid, lang) ); """ def main(ENTITY_TYPE): entity_type_table = ENTITY_TYPE.replace('-', '_') url_relationship_table = 'l_%s_url' % entity_type_table if ENTITY_TYPE != 'work' else 'l_url_%s' % entity_type_table main_entity_entity_point = "entity0" if ENTITY_TYPE != 'work' else "entity1"
#!/usr/bin/python import sys import os import re import time import urllib2 import json from editing import MusicBrainzClient from utils import out, colored_out, bcolors, monkeypatch_mechanize import config as cfg # Work around mechanize bug. See: https://github.com/jjlee/mechanize/pull/58 monkeypatch_mechanize() mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) FILE_RE = re.compile(r'^(?P<mbid>[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})-(?P<type>front|back|medium|booklet|tray)(?:-\d+)?\.(?:jpeg|jpg|png|gif)', re.I) class CoverArtArchiveReleaseInfo(object): def __init__(self, release_id): try: data = urllib2.urlopen('http://coverartarchive.org/release/%s/' % release_id) self.metadata = json.load(data) except urllib2.HTTPError: self.metadata = {'images': [], 'release': 'http://musicbrainz.org/release/%s' % release_id} def hasType(self, type): for image in self.metadata['images']: for img_type in image['types']: if img_type.lower() == type.lower():
#!/usr/bin/python import re import sqlalchemy from editing import MusicBrainzClient import discogs_client import time import Levenshtein from utils import mangle_name, join_names, out, colored_out, bcolors, durationToMS, msToDuration, unaccent import config as cfg engine = sqlalchemy.create_engine(cfg.MB_DB) db = engine.connect() db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB) mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) discogs = discogs_client.Client('MusicBrainzBot/0.1 +https://github.com/murdos/musicbrainz-bot') """ CREATE TABLE bot_discogs_track_number ( gid uuid NOT NULL, processed timestamp with time zone DEFAULT now(), CONSTRAINT bot_discogs_track_number_pkey PRIMARY KEY (gid) ); """ query = """ WITH vinyl_releases AS ( SELECT DISTINCT r.id, u.url AS discogs_url
#!/usr/bin/python import re import sqlalchemy from editing import MusicBrainzClient import discogs_client as discogs import time import Levenshtein from utils import mangle_name, join_names, out, colored_out, bcolors, durationToMS, msToDuration, unaccent import config as cfg engine = sqlalchemy.create_engine(cfg.MB_DB) db = engine.connect() db.execute("SET search_path TO musicbrainz, %s" % cfg.BOT_SCHEMA_DB) mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE) discogs.user_agent = 'MusicBrainzBot/0.1 +https://github.com/murdos/musicbrainz-bot' """ CREATE TABLE bot_discogs_track_number ( gid uuid NOT NULL, processed timestamp with time zone DEFAULT now(), CONSTRAINT bot_discogs_track_number_pkey PRIMARY KEY (gid) ); """ query = """ WITH vinyl_releases AS ( SELECT DISTINCT r.id, u.url AS discogs_url FROM release r