def main(): db.init_db_engine(config.SQLALCHEMY_DATABASE_URI) source = data.load_source("musicbrainz") scraper = data.load_latest_scraper_for_source(source) print(scraper) mbids = get_mbids() total = len(mbids) starttime = time.time() done = 0 for mblist in util.chunks(mbids, 100): lookup(mblist, scraper) done += 100 durdelta, remdelta = util.stats(done, total, starttime) log.info("Done %s/%s in %s; %s remaining", done, total, str(durdelta), str(remdelta))
def process_file(module, filename, numworkers, save=False): data = [] with open(filename) as csvfile: for query in csv.DictReader(csvfile): data.append(query) total = len(data) starttime = time.time() done = 0 CHUNK_SIZE = 1 for items in util.chunks(data, CHUNK_SIZE): process_items(items, module, save, numworkers) done += CHUNK_SIZE durdelta, remdelta = util.stats(done, total, starttime) time.sleep(random.uniform(.5, 1.5)) log.info("Done %s/%s in %s; %s remaining", done, total, str(durdelta), str(remdelta))
def main(): db.init_db_engine(config.SQLALCHEMY_DATABASE_URI) source = data.load_source("musicbrainz") scraper = data.load_latest_scraper_for_source(source) recordings = get_recordings() total = len(recordings) done = 0 starttime = time.time() log.info("starting..., %s recordings to process", total) for reclist in util.chunks(recordings, 10000): log.info("have %s recordings", len(reclist)) with db.engine.connect() as connection: saveddata = get_data(connection, scraper["id"], reclist) log.info(" - got %s rows matching them", len(saveddata)) process(connection, saveddata) done += len(reclist) durdelta, remdelta = util.stats(done, total, starttime) log.info("Done %s/%s in %s; %s remaining", done, total, str(durdelta), str(remdelta))