Beispiel #1
0
        patients = ['p1', 'p2', 'p3','p5', 'p6', 'p8', 'p9','p10', 'p11'] # all subtypes
        regions = ['genomewide']
        #regions = ['gag', 'pol', 'nef'] #, 'env']
        #regions = ['p24', 'p17'] #, 'RT1', 'RT2', 'RT3', 'RT4', 'PR', 
        #           'IN1', 'IN2', 'IN3','p15', 'vif', 'nef','gp41','gp1201']
        cov_min = 1000
        Sbins = np.array([0,0.03, 0.08, 0.25, 2])
        Sbinc = 0.5*(Sbins[1:]+Sbins[:-1])

        data = {}
        for subtype in ['patient', 'any']:
            minor_variants, to_away_divergence, to_away_minor, consensus_distance = \
                collect_to_away(patients, regions, Sbins=Sbins, cov_min=cov_min, subtype = subtype)

            to_away_minor.loc[:,['reversion_spectrum', 'minor_reversion_spectrum']] = \
                            to_away_minor.loc[:,['reversion_spectrum', 'minor_reversion_spectrum']].astype(float)
            add_binned_column(to_away_minor,  [0,1000,2000,4000], 'time')
            data[subtype] = {'minor_variants':minor_variants, 'to_away':to_away_divergence,'to_away_minor':to_away_minor, 
                    'consensus_distance':consensus_distance, 'Sbins':Sbins, 'Sbinc':Sbinc}

        store_data(data, fn_data)
    else:
        print("Loading data from file")
        data = load_data(fn_data)

plot_to_away(data, fig_filename=foldername+'to_away')
for subtype in ['patient', 'any']:
    print data[subtype]['to_away_minor'].groupby(['time_bin', 'af_bin']).mean()

#
Beispiel #2
0
        score = 0
        state = env.reset()
        steps = 0
        for t in range(1000):
            action = agent.select_action(state)
            action = action * np.array([2., 1., 1.]) + np.array([-1., 0., 0.])
            if args.save_data:
                state_, reward, done, die, steps = env.step(
                    action, steps, i_ep)
                #print("Eps ", i_ep, "t ", t, "steps ", steps)
            else:
                state_, reward, done, die = env.step(
                    action
                )  # Transform the actions so that in can read left turn
            if args.render:
                env.render()
            score += reward
            state = state_
            if args.stop:
                if die or done:
                    break

        print('Ep {}\tScore: {:.2f}\t'.format(i_ep, score))
    if args.save_data:
        name = "".join([
            "data_",
            args.model_path.split("/")[-1].split(".")[0], "_",
            str(args.eps)
        ])
        store_data(data_dict, name)
Beispiel #3
0
    fn_data = foldername+'data/'
    fn_data = fn_data + 'genomewide_divdiv.pickle'

    csv_out = open(foldername+'/genomewide_divdiv.tsv','w')
    if not os.path.isfile(fn_data) or params.redo:
        #####
        ## recalculate diversity and divergence from allele frequencies
        #####
        diversity = {}
        divergence = {}
        for pcode in patients:
            p = Patient.load(pcode)
            for frag in all_fragments:
                diversity[(pcode,frag)] = (p.ysi, p.get_diversity(frag))
                divergence[(pcode,frag)] = (p.ysi, p.get_divergence(frag))
        store_data((diversity, divergence), fn_data)
    else:
        print("Loading data from file")
        diversity, divergence = load_data(fn_data)

#####
## plot diversity 
#####
    fig, axs = plt.subplots(2,3, sharey=True, sharex=True)
    for pcode in patients:
        for fi, frag in enumerate(all_fragments):
            ax = axs[fi//3][fi%3]
            ax.plot(diversity[(pcode,frag)][0], diversity[(pcode,frag)][1], 
                    '-o', label=pcode, c=patient_colors[pcode])
            csv_out.write('\t'.join(map(str, ['diversity_'+pcode+'_'+frag]+list(diversity[(pcode,frag)][1])))+'\n')
            csv_out.write('\t'.join(map(str, ['time_'+pcode+'_'+frag]+list(diversity[(pcode,frag)][0])))+'\n')
Beispiel #4
0
        avg_HXB2_syn_divs = HXB2_syn_divs.mean(axis=0)

        HXB2_nonsyn_divs = np.ma.array(HXB2_nonsyn_divs)
        HXB2_nonsyn_divs.mask = HXB2_nonsyn_divs<0
        avg_HXB2_nonsyn_divs = HXB2_nonsyn_divs.mean(axis=0)

        HXB2_nonsyn_divg = np.ma.array(HXB2_nonsyn_divg)
        HXB2_nonsyn_divg.mask = HXB2_nonsyn_divg<0
        avg_HXB2_nonsyn_divg = HXB2_nonsyn_divg.mean(axis=0)

        # determine the running average over positions
        avg_syn_divs = running_average_masked(avg_HXB2_syn_divs, window_size, 0.15)
        avg_nonsyn_divs = running_average_masked(avg_HXB2_nonsyn_divs, window_size, 0.3)
        avg_nonsyn_divg = running_average_masked(avg_HXB2_nonsyn_divg, window_size, 0.3)

        store_data((avg_nonsyn_divg, avg_nonsyn_divs, avg_syn_divs), fn_data)
    else:
        (avg_nonsyn_divg, avg_nonsyn_divs, avg_syn_divs) = load_data(fn_data)


    plt.ion()
    plt.plot(avg_nonsyn_divg)
    plt.plot(avg_syn_divs)
    plt.plot(avg_nonsyn_divs)

    fig,axs = plt.subplots(1,2,sharey=True, figsize = (fig_width, 0.6*fig_width))
    cols = HIVEVO_colormap()
    ax = axs[0]
    ax.scatter(avg_nonsyn_divg[::(window_size/2)], avg_nonsyn_divs[::(window_size/2)],  label='nonsynonymous',
                   c=[cols(p) for p in np.linspace(0,1,len(avg_nonsyn_divg[::(window_size/2)]))], s=50)
    ax.set_ylabel('diversity', fontsize = fig_fontsize)
        tol = 0.5
        for i, (id, ) in enumerate(cmp_cur):
            if output and i % 100 == 0:
                print('Compound {:4.0f}:{}'.format(i, id))
            while True:
                n_cur = conn.cursor()
                n_cur.execute('EXECUTE {}_neighbor_plan (%s)'.format(fp_name),
                              (id, ))
                for (n_id, similarity) in n_cur:
                    result[fp_name][id][n_id] = similarity

                # Did we get enough?
                if len(result[fp_name][id]) >= n:
                    break

                # Lower the threshold and try again
                tol /= 2
                base_cur.execute(tol_sql, (tol, ))

            # Reset tolerance if it changed
            if tol != 0.5:
                tol = 0.5
                base_cur.execute(default_tol_sql)

    return result


if __name__ == '__main__':
    data = gather_near_neighbors(output=True)
    store_data(data, 'nmap.pkl')
Beispiel #6
0
                # Did we get enough?
                if attempt[-2] != 0:
                    break

                # Lower the threshold and try again
                tol /= 2
                cur.execute(tol_sql, (tol, ))

            # Reset tolerance if it changed
            if tol != 0.5:
                tol = 0.5
                cur.execute(default_tol_sql)

            X += list(attempt)

        for fp_name in fp_names:
            X += list(fp2bits(fp_dict[fp_name]))

    pred = model.predict(np.array(X).reshape(1, -1))[0]

    return pred, exp


if __name__ == '__main__':
    conn = aws_context_db()
    X, y = all_data(conn)
    rf = RandomForestRegressor(n_estimators=1000, max_depth=10, n_jobs=-1)
    rf.fit(X, y)
    store_data(rf, 'lipo_model.pkl')