patients = ['p1', 'p2', 'p3','p5', 'p6', 'p8', 'p9','p10', 'p11'] # all subtypes regions = ['genomewide'] #regions = ['gag', 'pol', 'nef'] #, 'env'] #regions = ['p24', 'p17'] #, 'RT1', 'RT2', 'RT3', 'RT4', 'PR', # 'IN1', 'IN2', 'IN3','p15', 'vif', 'nef','gp41','gp1201'] cov_min = 1000 Sbins = np.array([0,0.03, 0.08, 0.25, 2]) Sbinc = 0.5*(Sbins[1:]+Sbins[:-1]) data = {} for subtype in ['patient', 'any']: minor_variants, to_away_divergence, to_away_minor, consensus_distance = \ collect_to_away(patients, regions, Sbins=Sbins, cov_min=cov_min, subtype = subtype) to_away_minor.loc[:,['reversion_spectrum', 'minor_reversion_spectrum']] = \ to_away_minor.loc[:,['reversion_spectrum', 'minor_reversion_spectrum']].astype(float) add_binned_column(to_away_minor, [0,1000,2000,4000], 'time') data[subtype] = {'minor_variants':minor_variants, 'to_away':to_away_divergence,'to_away_minor':to_away_minor, 'consensus_distance':consensus_distance, 'Sbins':Sbins, 'Sbinc':Sbinc} store_data(data, fn_data) else: print("Loading data from file") data = load_data(fn_data) plot_to_away(data, fig_filename=foldername+'to_away') for subtype in ['patient', 'any']: print data[subtype]['to_away_minor'].groupby(['time_bin', 'af_bin']).mean() #
score = 0 state = env.reset() steps = 0 for t in range(1000): action = agent.select_action(state) action = action * np.array([2., 1., 1.]) + np.array([-1., 0., 0.]) if args.save_data: state_, reward, done, die, steps = env.step( action, steps, i_ep) #print("Eps ", i_ep, "t ", t, "steps ", steps) else: state_, reward, done, die = env.step( action ) # Transform the actions so that in can read left turn if args.render: env.render() score += reward state = state_ if args.stop: if die or done: break print('Ep {}\tScore: {:.2f}\t'.format(i_ep, score)) if args.save_data: name = "".join([ "data_", args.model_path.split("/")[-1].split(".")[0], "_", str(args.eps) ]) store_data(data_dict, name)
fn_data = foldername+'data/' fn_data = fn_data + 'genomewide_divdiv.pickle' csv_out = open(foldername+'/genomewide_divdiv.tsv','w') if not os.path.isfile(fn_data) or params.redo: ##### ## recalculate diversity and divergence from allele frequencies ##### diversity = {} divergence = {} for pcode in patients: p = Patient.load(pcode) for frag in all_fragments: diversity[(pcode,frag)] = (p.ysi, p.get_diversity(frag)) divergence[(pcode,frag)] = (p.ysi, p.get_divergence(frag)) store_data((diversity, divergence), fn_data) else: print("Loading data from file") diversity, divergence = load_data(fn_data) ##### ## plot diversity ##### fig, axs = plt.subplots(2,3, sharey=True, sharex=True) for pcode in patients: for fi, frag in enumerate(all_fragments): ax = axs[fi//3][fi%3] ax.plot(diversity[(pcode,frag)][0], diversity[(pcode,frag)][1], '-o', label=pcode, c=patient_colors[pcode]) csv_out.write('\t'.join(map(str, ['diversity_'+pcode+'_'+frag]+list(diversity[(pcode,frag)][1])))+'\n') csv_out.write('\t'.join(map(str, ['time_'+pcode+'_'+frag]+list(diversity[(pcode,frag)][0])))+'\n')
avg_HXB2_syn_divs = HXB2_syn_divs.mean(axis=0) HXB2_nonsyn_divs = np.ma.array(HXB2_nonsyn_divs) HXB2_nonsyn_divs.mask = HXB2_nonsyn_divs<0 avg_HXB2_nonsyn_divs = HXB2_nonsyn_divs.mean(axis=0) HXB2_nonsyn_divg = np.ma.array(HXB2_nonsyn_divg) HXB2_nonsyn_divg.mask = HXB2_nonsyn_divg<0 avg_HXB2_nonsyn_divg = HXB2_nonsyn_divg.mean(axis=0) # determine the running average over positions avg_syn_divs = running_average_masked(avg_HXB2_syn_divs, window_size, 0.15) avg_nonsyn_divs = running_average_masked(avg_HXB2_nonsyn_divs, window_size, 0.3) avg_nonsyn_divg = running_average_masked(avg_HXB2_nonsyn_divg, window_size, 0.3) store_data((avg_nonsyn_divg, avg_nonsyn_divs, avg_syn_divs), fn_data) else: (avg_nonsyn_divg, avg_nonsyn_divs, avg_syn_divs) = load_data(fn_data) plt.ion() plt.plot(avg_nonsyn_divg) plt.plot(avg_syn_divs) plt.plot(avg_nonsyn_divs) fig,axs = plt.subplots(1,2,sharey=True, figsize = (fig_width, 0.6*fig_width)) cols = HIVEVO_colormap() ax = axs[0] ax.scatter(avg_nonsyn_divg[::(window_size/2)], avg_nonsyn_divs[::(window_size/2)], label='nonsynonymous', c=[cols(p) for p in np.linspace(0,1,len(avg_nonsyn_divg[::(window_size/2)]))], s=50) ax.set_ylabel('diversity', fontsize = fig_fontsize)
tol = 0.5 for i, (id, ) in enumerate(cmp_cur): if output and i % 100 == 0: print('Compound {:4.0f}:{}'.format(i, id)) while True: n_cur = conn.cursor() n_cur.execute('EXECUTE {}_neighbor_plan (%s)'.format(fp_name), (id, )) for (n_id, similarity) in n_cur: result[fp_name][id][n_id] = similarity # Did we get enough? if len(result[fp_name][id]) >= n: break # Lower the threshold and try again tol /= 2 base_cur.execute(tol_sql, (tol, )) # Reset tolerance if it changed if tol != 0.5: tol = 0.5 base_cur.execute(default_tol_sql) return result if __name__ == '__main__': data = gather_near_neighbors(output=True) store_data(data, 'nmap.pkl')
# Did we get enough? if attempt[-2] != 0: break # Lower the threshold and try again tol /= 2 cur.execute(tol_sql, (tol, )) # Reset tolerance if it changed if tol != 0.5: tol = 0.5 cur.execute(default_tol_sql) X += list(attempt) for fp_name in fp_names: X += list(fp2bits(fp_dict[fp_name])) pred = model.predict(np.array(X).reshape(1, -1))[0] return pred, exp if __name__ == '__main__': conn = aws_context_db() X, y = all_data(conn) rf = RandomForestRegressor(n_estimators=1000, max_depth=10, n_jobs=-1) rf.fit(X, y) store_data(rf, 'lipo_model.pkl')