Beispiel #1
0
def main():
    db.init_db_engine(config.SQLALCHEMY_DATABASE_URI)
    source = data.load_source("musicbrainz")
    scraper = data.load_latest_scraper_for_source(source)
    print(scraper)

    mbids = get_mbids()
    total = len(mbids)

    starttime = time.time()
    done = 0
    for mblist in util.chunks(mbids, 100):
        lookup(mblist, scraper)
        done += 100
        durdelta, remdelta = util.stats(done, total, starttime)
        log.info("Done %s/%s in %s; %s remaining", done, total, str(durdelta), str(remdelta))
def main():
    db.init_db_engine(config.SQLALCHEMY_DATABASE_URI)
    source = data.load_source("musicbrainz")
    scraper = data.load_latest_scraper_for_source(source)
    print(scraper)

    mbids = get_mbids()
    total = len(mbids)

    starttime = time.time()
    done = 0
    for mblist in util.chunks(mbids, 100):
        lookup(mblist, scraper)
        done += 100
        durdelta, remdelta = util.stats(done, total, starttime)
        log.info("Done %s/%s in %s; %s remaining", done, total, str(durdelta),
                 str(remdelta))
Beispiel #3
0
def process(connection, datalist):
    added = 0
    for recmbid, item in datalist:
        data = item["result"]
        if not data:
            continue
        recording_title = data["name"]
        rec_artist_credit = data["artist_credit"]
        add_to_recmeta(connection, recmbid, recording_title, rec_artist_credit)
        added += 1

        for rgmbid, rgdata in data["release_group_map"].items():
            rgtitle = rgdata["name"]
            earliest_date = rgdata["first_release_date"]
            rgartist_credit = rgdata["artist_credit"]
            add_to_rg(connection, rgmbid, recmbid, rgtitle, rgartist_credit, earliest_date)

    log.info(" * added %s recordings", added)
Beispiel #4
0
def process_file(module, filename, numworkers, save=False):
    data = []
    with open(filename) as csvfile:
        for query in csv.DictReader(csvfile):
            data.append(query)

    total = len(data)
    starttime = time.time()
    done = 0
    CHUNK_SIZE = 1

    for items in util.chunks(data, CHUNK_SIZE):
        process_items(items, module, save, numworkers)
        done += CHUNK_SIZE
        durdelta, remdelta = util.stats(done, total, starttime)
        time.sleep(random.uniform(.5, 1.5))
        log.info("Done %s/%s in %s; %s remaining", done, total, str(durdelta),
                 str(remdelta))
def get_mbids():
    source = data.load_source("musicbrainz")
    scraper = data.load_latest_scraper_for_source(source)

    existing = text("""
        SELECT mbid::text
          FROM item
         WHERE scraper_id = :scraper_id""")

    q = "SELECT mbid::text FROM recording"
    with db.engine.connect() as connection:
        res = connection.execute(existing, {"scraper_id": scraper["id"]})
        existing = [r[0] for r in res]
        log.info("got %s existing items", len(existing))
        existing = set(existing)
        res = connection.execute(q)
        remaining = [r[0] for r in res if r[0] not in existing]
        log.info("remaining %s", len(remaining))
        return remaining
def process(connection, datalist):
    added = 0
    for recmbid, item in datalist:
        data = item["result"]
        if not data:
            continue
        recording_title = data["name"]
        rec_artist_credit = data["artist_credit"]
        add_to_recmeta(connection, recmbid, recording_title, rec_artist_credit)
        added += 1

        for rgmbid, rgdata in data["release_group_map"].items():
            rgtitle = rgdata["name"]
            earliest_date = rgdata["first_release_date"]
            rgartist_credit = rgdata["artist_credit"]
            add_to_rg(connection, rgmbid, recmbid, rgtitle, rgartist_credit,
                      earliest_date)

    log.info(" * added %s recordings", added)
Beispiel #7
0
def get_mbids():
    source = data.load_source("musicbrainz")
    scraper = data.load_latest_scraper_for_source(source)

    existing = text("""
        SELECT mbid::text
          FROM item
         WHERE scraper_id = :scraper_id""")


    q = "SELECT mbid::text FROM recording"
    with db.engine.connect() as connection:
        res = connection.execute(existing, {"scraper_id": scraper["id"]})
        existing = [r[0] for r in res]
        log.info("got %s existing items", len(existing))
        existing = set(existing)
        res = connection.execute(q)
        remaining = [r[0] for r in res if r[0] not in existing]
        log.info("remaining %s", len(remaining))
        return remaining
Beispiel #8
0
def main():
    db.init_db_engine(config.SQLALCHEMY_DATABASE_URI)

    log.info("Release groups")
    releasegroups = get_rgs()
    fieldnames = ["mbid", "release_title", "artist", "year"]
    with open("release-group-meta.csv", "w") as fp:
        w = csv.DictWriter(fp, fieldnames=fieldnames)
        w.writeheader()
        for rg in releasegroups:
            w.writerow(rg)

    log.info("Recordings")
    recordings = get_recordings()
    fieldnames = ["mbid", "recording", "artist"]
    count = (len(recordings) // 8) + 1
    for i, reclist in enumerate(util.chunks(recordings, count), 1):
        with open("recording-meta-{}.csv".format(i), "w") as fp:
            w = csv.DictWriter(fp, fieldnames=fieldnames)
            w.writeheader()
            for rec in reclist:
                w.writerow(rec)
Beispiel #9
0
def main():
    db.init_db_engine(config.SQLALCHEMY_DATABASE_URI)

    log.info("Release groups")
    releasegroups = get_rgs()
    fieldnames = ["mbid", "release_title", "artist", "year"]
    with open("release-group-meta.csv", "w") as fp:
        w = csv.DictWriter(fp, fieldnames=fieldnames)
        w.writeheader()
        for rg in releasegroups:
            w.writerow(rg)

    log.info("Recordings")
    recordings = get_recordings()
    fieldnames = ["mbid", "recording", "artist"]
    count = (len(recordings)//8) + 1
    for i, reclist in enumerate(util.chunks(recordings, count), 1):
        with open("recording-meta-{}.csv".format(i), "w") as fp:
            w = csv.DictWriter(fp, fieldnames=fieldnames)
            w.writeheader()
            for rec in reclist:
                w.writerow(rec)
Beispiel #10
0
def main():
    db.init_db_engine(config.SQLALCHEMY_DATABASE_URI)
    source = data.load_source("musicbrainz")
    scraper = data.load_latest_scraper_for_source(source)

    recordings = get_recordings()
    total = len(recordings)
    done = 0
    starttime = time.time()
    log.info("starting..., %s recordings to process", total)

    for reclist in util.chunks(recordings, 10000):
        log.info("have %s recordings", len(reclist))
        with db.engine.connect() as connection:
            saveddata = get_data(connection, scraper["id"], reclist)
            log.info(" - got %s rows matching them", len(saveddata))
            process(connection, saveddata)
        done += len(reclist)
        durdelta, remdelta = util.stats(done, total, starttime)
        log.info("Done %s/%s in %s; %s remaining", done, total, str(durdelta), str(remdelta))
def main():
    db.init_db_engine(config.SQLALCHEMY_DATABASE_URI)
    source = data.load_source("musicbrainz")
    scraper = data.load_latest_scraper_for_source(source)

    recordings = get_recordings()
    total = len(recordings)
    done = 0
    starttime = time.time()
    log.info("starting..., %s recordings to process", total)

    for reclist in util.chunks(recordings, 10000):
        log.info("have %s recordings", len(reclist))
        with db.engine.connect() as connection:
            saveddata = get_data(connection, scraper["id"], reclist)
            log.info(" - got %s rows matching them", len(saveddata))
            process(connection, saveddata)
        done += len(reclist)
        durdelta, remdelta = util.stats(done, total, starttime)
        log.info("Done %s/%s in %s; %s remaining", done, total, str(durdelta),
                 str(remdelta))