Beispiel #1
0
def simulate(probabilities, n_mutations=1, n_sequences=50000):
    '''
    Simulates mutation of antibody sequences, given a list of mutations
    and their probabilities.

    Inputs
    ------
    probabilities: a dictionary containing mutations as keys and probabilities
        as values.
    n_mutations: Number of mutations in each sequence. If n_mutations is
        greater than 1, mutations are selected from the pool of supplied
        mutations without replacement. Default is 1.
    n_sequences: Number of mutated sequences to generate. Default is 50,000.

    Returns
    -------
    Mean number of VRC01-like mutations (float) and 95% confidence interval
    (tuple of floats).
    '''
    vrc01_muts = get_vrc01_class_mutations()
    sim_muts = []
    muts, probs = list(zip(*list(probabilities.items())))
    start = datetime.now()
    for i in range(n_sequences):
        m = np.random.choice(muts, size=n_mutations, replace=False, p=probs)
        sim_muts.append(m)
        if (i + 1) % 100 == 0:
            progress_bar(i + 1, n_sequences, start)
    mut_counts = [len([x for x in sublist if x in vrc01_muts]) for sublist in sim_muts]
    # calculate mean and 95% confidence interval
    n, min_max, mean, var, skew, kurt = stats.describe(mut_counts)
    std = np.sqrt(var)
    R = stats.norm.interval(0.95, loc=mean, scale=std / np.sqrt(len(mut_counts)))
    return mean, R
Beispiel #2
0
def monitor_update(results):
    finished = 0
    jobs = len(results)
    while finished < jobs:
        time.sleep(1)
        finished = len([r for r in results if r.ready()])
        progbar.progress_bar(finished, jobs)
    progbar.progress_bar(finished, jobs)
Beispiel #3
0
def monitor_mp_jobs(results):
    finished = 0
    jobs = len(results)
    while finished < jobs:
        time.sleep(1)
        ready = [ar for ar in results if ar.ready()]
        finished = len(ready)
        progbar.progress_bar(finished, jobs)
    print('')
Beispiel #4
0
def monitor_mp_jobs(results):
    finished = 0
    jobs = len(results)
    while finished < jobs:
        time.sleep(1)
        ready = [ar for ar in results if ar.ready()]
        finished = len(ready)
        progbar.progress_bar(finished, jobs)
    print('')
Beispiel #5
0
def multiprocess_mongoimport(jsons, db, coll, args):
    progbar.progress_bar(0, len(jsons))
    async_results = []
    p = mp.Pool()
    for j in jsons:
        async_results.append(p.apply_async(do_mongoimport, args=(j, args.ip, args.port, db, coll, args.user, args.password)))
    monitor_results(async_results)
    remove_temp_files(args)
    print('')
Beispiel #6
0
def monitor_celery_jobs(results):
    finished = 0
    jobs = len(results)
    while finished < jobs:
        time.sleep(1)
        succeeded = [ar for ar in results if ar.successful()]
        failed = [ar for ar in results if ar.failed()]
        finished = len(succeeded) + len(failed)
        progbar.progress_bar(finished, jobs)
    print('')
Beispiel #7
0
def monitor_celery_jobs(results):
    finished = 0
    jobs = len(results)
    while finished < jobs:
        time.sleep(1)
        succeeded = [ar for ar in results if ar.successful()]
        failed = [ar for ar in results if ar.failed()]
        finished = len(succeeded) + len(failed)
        progbar.progress_bar(finished, jobs)
    print('')
Beispiel #8
0
def multiprocess_mongoimport(jsons, db, coll, args):
    progbar.progress_bar(0, len(jsons))
    async_results = []
    p = mp.Pool()
    for j in jsons:
        async_results.append(
            p.apply_async(do_mongoimport, args=(j, args.ip, args.port, db, coll, args.user, args.password))
        )
    monitor_results(async_results)
    remove_temp_files(args)
    print ("")
Beispiel #9
0
def update_db(db, standard, scores, collection, args):
    db = mongodb.get_db(args.db, args.ip, args.port, args.user, args.password)
    print_index_info()
    mongodb.index(db, collection, ['seq_id'])
    print_update_info()
    start = time.time()
    conn = mongodb.get_connection(args.ip, args.port,
        args.user, args.password)
    mongo_version = conn.server_info()['version']
    standard = standard.replace('.', '_')
    g = scores.groupby('identity')
    groups = regroup(g.groups)


    for g in range(0, len(groups), args.update_threads):
        tlist = []
        for group in groups[g:g + args.update_threads]:
            t = Thread(target=update, args=(db, collection, group, standard, mongo_version, args))
            t.start()
            tlist.append(t)
        for t in tlist:
            t.join()
        progbar.progress_bar(g + args.update_threads, len(groups))


    # if platform.system().lower() == 'darwin' or args.debug or args.single_process_update:
    #     for i, group in enumerate(groups):
    #         update(db, collection, group, standard, mongo_version, args)
    #         progbar.progress_bar(i, len(groups))
    # else:
    #     p = mp.Pool(processes=25)
    #     async_results = []
    #     for group in groups:
    #         async_results.append(p.apply_async(update, args=(db, collection, group, standard, mongo_version, args)))
    #     monitor_update(async_results)
    #     p.close()
    #     p.join()
    print('')
    run_time = time.time() - start
    logger.info('Updating took {} seconds. ({} sequences per second)'.format(round(run_time, 2),
        round(len(scores) / run_time, 1)))