Beispiel #1
0
def update_papers(conn, lock, status=1):
    """Update papers from all ReDIF documents."""
    c = conn.cursor()
    c.execute('SELECT code FROM jel WHERE parent IS NOT NULL')
    alljel = [r[0] for r in c.fetchall()]
    c.execute('SELECT url FROM listings WHERE status = ?', (status, ))
    urls = [r[0] for r in c.fetchall()]
    urls = random.sample(urls, k=len(urls))  # to redistribute load
    c.close()

    def worker(u):
        return update_papers_1(conn, lock, u, alljel)

    size = settings.batch_size
    no_batches = math.ceil(len(urls) / size)
    status = 0
    for i in range(no_batches):
        print('Downloading batch {}/{}...'.format(i + 1, no_batches))
        batch = urls[i * size:(i + 1) * size]
        bs = sum(parallel(worker, batch, threads=settings.no_threads_www))
        status += bs
        conn.commit()
        print(f'{bs} out of {len(batch)} records updated successfully')

    print(f'All batches: {status} out of {len(urls)} records'
          ' updated successfully')
Beispiel #2
0
def conformation(filename, procs=0):

    scr = "_tmp_ensemble_/"

    molecules = cheminfo.read_sdffile(filename)

    if procs == 0:
        for im, molecule in enumerate(molecules):
            get_conformations((im, molecule), scr=scr)

    else:

        def workpackages():
            for im, molecule in enumerate(molecules):
                yield im, molecule

        lines = workpackages()

        results = misc.parallel(lines,
                                get_conformations, [], {"scr": scr},
                                procs=procs)
        for result in results:
            pass

        # misc.parallel(lines)#, get_conformations, [], {"scr":scr}, procs=procs)

    return
Beispiel #3
0
def update_series(conn, lock, status=1):
    """Update all archive and series files in the database."""
    c = conn.cursor()
    c.execute('SELECT file, type FROM repec WHERE status = ?', (status, ))
    files = c.fetchall()
    c.close()
    print('Updating archive and series files...')

    def worker(el):
        return update_series_1(conn, lock, *el)

    status = parallel(worker, files, threads=settings.no_threads_repec)
    print(f'{sum(status)} out of {len(files)} records updated successfully')
Beispiel #4
0
def update_listings(conn, lock, status=1):
    """Update remote listings for all series."""
    c = conn.cursor()
    c.execute('SELECT url FROM remotes WHERE status = ?', (status, ))
    urls = [r[0] for r in c.fetchall()]
    urls = random.sample(urls, k=len(urls))  # to redistribute load
    c.close()
    print('Updating remote listings...')

    def worker(u):
        return update_listings_1(conn, lock, u)

    status = parallel(worker, urls, settings.no_threads_www)
    print(f'{sum(status)} out of {len(urls)} records updated successfully')
Beispiel #5
0
def main(datafile, procs=0, scr="_tmp_"):

    db = misc.load_obj(datafile)

    keys = db.keys()

    print("total keys:", len(keys))

    xaxis = []
    yaxis = []

    if procs == 0:

        def get_results():

            for i, key in enumerate(keys):

                smi = key
                kelvin = db[key]
                result = prepare_sdf_and_csv(smi, kelvin)
                if result is None: continue

                yield result

        results = get_results()

    else:

        def workpackages():
            for i, key in enumerate(keys):

                # if i > 5000: break

                smi = key
                kelvin = db[key]
                yield smi, kelvin

        lines = workpackages()

        results = misc.parallel(lines,
                                prepare_sdf_and_csv_procs, [], {},
                                procs=procs)

        print("streaming results")

    # Write results

    fullsdf = ""
    fsdf = gzip.open("data/sdf/structures.sdf.gz", 'w')
    fprop = open("data/sdf/properties.csv", 'w')

    for i, result in enumerate(results):

        if result is None: continue

        molobj, values = result

        sdfstr = cheminfo.molobj_to_sdfstr(molobj)
        fsdf.write(sdfstr.encode())

        valuesstr = " ".join(values)
        # propstr = "{:} {:}\n".format(mean, standard_deviation)
        propstr = f"{i} " + valuestr
        fprop.write(propstr)

    fsdf.close()
    fprop.close()

    return