def update_papers(conn, lock, status=1): """Update papers from all ReDIF documents.""" c = conn.cursor() c.execute('SELECT code FROM jel WHERE parent IS NOT NULL') alljel = [r[0] for r in c.fetchall()] c.execute('SELECT url FROM listings WHERE status = ?', (status, )) urls = [r[0] for r in c.fetchall()] urls = random.sample(urls, k=len(urls)) # to redistribute load c.close() def worker(u): return update_papers_1(conn, lock, u, alljel) size = settings.batch_size no_batches = math.ceil(len(urls) / size) status = 0 for i in range(no_batches): print('Downloading batch {}/{}...'.format(i + 1, no_batches)) batch = urls[i * size:(i + 1) * size] bs = sum(parallel(worker, batch, threads=settings.no_threads_www)) status += bs conn.commit() print(f'{bs} out of {len(batch)} records updated successfully') print(f'All batches: {status} out of {len(urls)} records' ' updated successfully')
def conformation(filename, procs=0): scr = "_tmp_ensemble_/" molecules = cheminfo.read_sdffile(filename) if procs == 0: for im, molecule in enumerate(molecules): get_conformations((im, molecule), scr=scr) else: def workpackages(): for im, molecule in enumerate(molecules): yield im, molecule lines = workpackages() results = misc.parallel(lines, get_conformations, [], {"scr": scr}, procs=procs) for result in results: pass # misc.parallel(lines)#, get_conformations, [], {"scr":scr}, procs=procs) return
def update_series(conn, lock, status=1): """Update all archive and series files in the database.""" c = conn.cursor() c.execute('SELECT file, type FROM repec WHERE status = ?', (status, )) files = c.fetchall() c.close() print('Updating archive and series files...') def worker(el): return update_series_1(conn, lock, *el) status = parallel(worker, files, threads=settings.no_threads_repec) print(f'{sum(status)} out of {len(files)} records updated successfully')
def update_listings(conn, lock, status=1): """Update remote listings for all series.""" c = conn.cursor() c.execute('SELECT url FROM remotes WHERE status = ?', (status, )) urls = [r[0] for r in c.fetchall()] urls = random.sample(urls, k=len(urls)) # to redistribute load c.close() print('Updating remote listings...') def worker(u): return update_listings_1(conn, lock, u) status = parallel(worker, urls, settings.no_threads_www) print(f'{sum(status)} out of {len(urls)} records updated successfully')
def main(datafile, procs=0, scr="_tmp_"): db = misc.load_obj(datafile) keys = db.keys() print("total keys:", len(keys)) xaxis = [] yaxis = [] if procs == 0: def get_results(): for i, key in enumerate(keys): smi = key kelvin = db[key] result = prepare_sdf_and_csv(smi, kelvin) if result is None: continue yield result results = get_results() else: def workpackages(): for i, key in enumerate(keys): # if i > 5000: break smi = key kelvin = db[key] yield smi, kelvin lines = workpackages() results = misc.parallel(lines, prepare_sdf_and_csv_procs, [], {}, procs=procs) print("streaming results") # Write results fullsdf = "" fsdf = gzip.open("data/sdf/structures.sdf.gz", 'w') fprop = open("data/sdf/properties.csv", 'w') for i, result in enumerate(results): if result is None: continue molobj, values = result sdfstr = cheminfo.molobj_to_sdfstr(molobj) fsdf.write(sdfstr.encode()) valuesstr = " ".join(values) # propstr = "{:} {:}\n".format(mean, standard_deviation) propstr = f"{i} " + valuestr fprop.write(propstr) fsdf.close() fprop.close() return