for ni,node in enumerate(prediction.non_terminals):
    node.name = str(ni+1)

# write tree to file
Phylo.write(prediction.T, dirname+'/reconstructed_tree.nwk', 'newick')

# write inferred ancestral sequences to file
with open(dirname+'/ancestral_sequences.fasta', 'w') as outfile:
    for node in prediction.non_terminals:
        outfile.write('>'+node.name+'\n'+str(node.seq)+'\n')

## write sequence ranking to file
# terminal nodes
prediction.rank_by_method(nodes = prediction.terminals, method = 'mean_fitness');
with open(dirname+'/sequence_ranking_terminals.txt', 'w') as outfile:
    outfile.write('#'+'\t'.join(['name','rank', 'mean', 'standard dev'])+'\n')
    for node in prediction.terminals:
        outfile.write('\t'.join(map(str,[node.name, node.rank, node.mean_fitness, np.sqrt(node.var_fitness)]))+'\n')

# terminal nodes
prediction.rank_by_method(nodes = prediction.non_terminals, method = 'mean_fitness');
with open(dirname+'/sequence_ranking_nonterminals.txt', 'w') as outfile:
    outfile.write('#'+'\t'.join(['name','rank', 'mean', 'variance'])+'\n')
    for node in prediction.non_terminals:
        outfile.write('\t'.join(map(str,[node.name, node.rank, node.mean_fitness, np.sqrt(node.var_fitness)]))+'\n')


if params.plot:
    tree_utils.plot_prediction_tree(prediction)
    plt.savefig(dirname+'/marked_up_tree.pdf')
Example #2
0
Phylo.write(prediction.T, dirname + '/reconstructed_tree.nwk', 'newick')

# write inferred ancestral sequences to file
with open(dirname + '/ancestral_sequences.fasta', 'w') as outfile:
    for node in prediction.non_terminals:
        outfile.write('>' + node.name + '\n' + str(node.seq) + '\n')

## write sequence ranking to file
# terminal nodes
prediction.rank_by_method(nodes=prediction.terminals, method='polarizer')
with open(dirname + '/sequence_ranking_terminals.txt', 'w') as outfile:
    outfile.write('#' + '\t'.join(['name', 'rank', 'LBI']) + '\n')
    for node in prediction.terminals:
        outfile.write(
            '\t'.join(map(str, [node.name, node.rank, node.polarizer])) + '\n')

# terminal nodes
prediction.rank_by_method(nodes=prediction.non_terminals, method='polarizer')
with open(dirname + '/sequence_ranking_nonterminals.txt', 'w') as outfile:
    outfile.write('#' + '\t'.join(['name', 'rank', 'LBI']) + '\n')
    for node in prediction.non_terminals:
        outfile.write(
            '\t'.join(map(str, [node.name, node.rank, node.polarizer])) + '\n')

# plot the tree if desired
if params.plot:
    tree_utils.plot_prediction_tree(prediction,
                                    method='polarizer',
                                    internal=True)
    plt.savefig(dirname + '/marked_up_tree.pdf')
    node.name = str(ni+1)

# write tree to file
Phylo.write(prediction.T, dirname+'/reconstructed_tree.nwk', 'newick')

# write inferred ancestral sequences to file
with open(dirname+'/ancestral_sequences.fasta', 'w') as outfile:
    for node in prediction.non_terminals:
        outfile.write('>'+node.name+'\n'+str(node.seq)+'\n')

## write sequence ranking to file
# terminal nodes
prediction.rank_by_method(nodes = prediction.terminals, method = 'polarizer');
with open(dirname+'/sequence_ranking_terminals.txt', 'w') as outfile:
    outfile.write('#'+'\t'.join(['name','rank', 'LBI'])+'\n')
    for node in prediction.terminals:
        outfile.write('\t'.join(map(str,[node.name, node.rank, node.polarizer]))+'\n')

# terminal nodes
prediction.rank_by_method(nodes = prediction.non_terminals, method = 'polarizer');
with open(dirname+'/sequence_ranking_nonterminals.txt', 'w') as outfile:
    outfile.write('#'+'\t'.join(['name','rank', 'LBI'])+'\n')
    for node in prediction.non_terminals:
        outfile.write('\t'.join(map(str,[node.name, node.rank, node.polarizer]))+'\n')

# plot the tree if desired
if params.plot:
    tree_utils.plot_prediction_tree(prediction, method='polarizer', internal=True)
    plt.savefig(dirname+'/marked_up_tree.pdf')

Example #4
0
    method = methods[0]
    color_internal = True

    if params.flutype.startswith('H3N2') and year in laessig_prediction:
        seq_labels = {
            prediction.best_node(method=method[0], nodes=method[2]).name: '*',
            laessig_prediction[year].name: "L&L"
        }
    else:
        seq_labels = {prediction.best_node().name: '*'}

    tree_utils.label_nodes(prediction.T, seq_labels)
    tree_utils.erase_color(prediction.T)
    tree_utils.erase_color(combined_data.T)
    tree_utils.plot_prediction_tree(prediction, internal=color_internal)
    plt.title(
        "predicting " + params.flutype + " season " + str(year) + "/" +
        str(year + 1) + ": " + str(
            np.round(distances[method[0] + method[1]] /
                     distances['average'], 4)))
    #plt.savefig('../figures/'+base_name+'_prediction_'+method[0]+method[1]+'_'+name_mod+'.pdf')

    # plot a combined figure
    # color according to season
    pred_names = [c.name for c in prediction.terminals]
    tree_utils.erase_color(combined_data.T)
    for c in combined_data.T.get_terminals():
        if c.name in pred_names:
            c.color = (178, 34, 34)
        else:
    otherseqsnames=[]
combined_data = test_flu.make_combined_data(prediction, test_data, otherseqsnames)

seq_labels = {prediction.best_node().name:'*'} #, laessig_prediction[year].name:"L&L"}
tree_utils.label_nodes(prediction.T, seq_labels)
tree_utils.label_nodes(combined_data.T, seq_labels)

# plot a combined figure
fig = plt.figure(figsize = (12,6))
#subplot 1: only the prediction data
ax = plt.subplot(121)
#add panel label
plt.text(-0.06,0.95,'A', transform = plt.gca().transAxes, fontsize = 36)
plt.title('until Feb '+str(year))
plt.tight_layout()
tree_utils.plot_prediction_tree(prediction, axes=ax, cb=True, offset = 0.0005, internal=False)

#subplot 2: prediction data and test data
ax = plt.subplot(122)
#add panel label
plt.text(-0.06,0.95,'B', transform = plt.gca().transAxes, fontsize = 36)
plt.title('until Feb '+str(year)+' + season '+str(year)+"/"+str(year+1)+" (grey)")
pred_names = [c.name for c in prediction.terminals]
tree_utils.erase_color(combined_data.T)
prediction.color_other_tree(combined_data.T.get_terminals(), offset = 0.0005)
for c in combined_data.T.get_terminals():
    if c.name in pred_names:
        pass
    #   c.color = (178, 34, 34 )
    else:
        c.color = (0,255,255)
    # set up the filtering criteria and select sequences from the master alignment
    criteria = [(date(year - 1, 5, 1), date(year, 2,
                                            28), [region], sample_size)
                for region in prediction_regions]

    my_flu_alignment = flu_alignment(aln_fname,
                                     outgroup,
                                     annotation,
                                     criteria=criteria,
                                     cds={
                                         'begin': 0,
                                         'end': 987,
                                         'pad': 0
                                     })

    # run the prediction
    prediction = flu_ranking(my_flu_alignment, boost=0.5)
    top_seq = prediction.predict()
    print top_seq

    # plot the tree colored by the prediction
    tree_utils.plot_prediction_tree(prediction)

    # plot the distribution of sampling dates
    y, x, bin_names = my_flu_alignment.sampling_distribution(bins=10)
    import matplotlib.pyplot as plt
    plt.figure()
    plt.plot(x[1:], y)
    plt.xticks(x[1:], map(str, [b for b in bin_names]), rotation=30)
Example #7
0
    prediction.best_node('polarizer').name: '*'
}  #, laessig_prediction[year].name:"L&L"}
tree_utils.label_nodes(prediction.T, seq_labels)
tree_utils.label_nodes(combined_data.T, seq_labels)

# plot a combined figure
fig = plt.figure(figsize=(12, 6))
#subplot 1: only the prediction data
ax = plt.subplot(121)
#add panel label
plt.text(-0.06, 0.95, 'A', transform=plt.gca().transAxes, fontsize=36)
plt.title('until Feb ' + str(year))
plt.tight_layout()
tree_utils.plot_prediction_tree(prediction,
                                method='polarizer',
                                axes=ax,
                                cb=True,
                                offset=0.0005,
                                internal=False)

#subplot 2: prediction data and test data
ax = plt.subplot(122)
#add panel label
plt.text(-0.06, 0.95, 'B', transform=plt.gca().transAxes, fontsize=36)
plt.title('until Feb ' + str(year) + ' + season ' + str(year) + "/" +
          str(year + 1) + " (grey)")
pred_names = [c.name for c in prediction.terminals]
tree_utils.erase_color(combined_data.T)
prediction.color_other_tree(combined_data.T.get_terminals(),
                            method='polarizer',
                            offset=0.0005)
for c in combined_data.T.get_terminals():
Example #8
0
    prediction = sequence_ranking.sequence_ranking(
        seq_data,
        eps_branch_length=eps_branch_length,
        pseudo_count=5,
        methods=['mean_fitness'],
        D=diffusion,
        distance_scale=distance_scale,
        samp_frac=samp_frac)

    best_node = prediction.predict()

    # Write fitness tree to file (for mutation annotation)
    Phylo.write(prediction.T, outfile_fitness_tree, "newick")

    # Get fitness changes on each branch
    df_fitness = get_fitness_changes(prediction)
    df_fitness.sort_values("delta_mean_fitness", ascending=False, inplace=True)

    # Write fitness changes to file
    df_fitness.to_csv(outfile_df_fitness)

    # Plot tree colored by fitness with and without node labels
    tree_utils.plot_prediction_tree(prediction)
    plt.savefig(outfile_tree_pdf)

    tree_utils.plot_prediction_tree(prediction,
                                    node_label_func=lambda x: x.name)
    plt.savefig(outfile_tree_pdf_labeled)

    print "Done!!"
    prediction.best_node().name: '*'
}  #, laessig_prediction[year].name:"L&L"}
tree_utils.label_nodes(prediction.T, seq_labels)
tree_utils.label_nodes(combined_data.T, seq_labels)

# plot a combined figure
fig = plt.figure(figsize=(12, 6))
#subplot 1: only the prediction data
ax = plt.subplot(121)
#add panel label
plt.text(-0.06, 0.95, 'A', transform=plt.gca().transAxes, fontsize=36)
plt.title('until Feb ' + str(year))
plt.tight_layout()
tree_utils.plot_prediction_tree(prediction,
                                axes=ax,
                                cb=True,
                                offset=0.0005,
                                internal=False)

#subplot 2: prediction data and test data
ax = plt.subplot(122)
#add panel label
plt.text(-0.06, 0.95, 'B', transform=plt.gca().transAxes, fontsize=36)
plt.title('until Feb ' + str(year) + ' + season ' + str(year) + "/" +
          str(year + 1) + " (grey)")
pred_names = [c.name for c in prediction.terminals]
tree_utils.erase_color(combined_data.T)
prediction.color_other_tree(combined_data.T.get_terminals(), offset=0.0005)
for c in combined_data.T.get_terminals():
    if c.name in pred_names:
        pass
Example #10
0
    # combine the test data, the prediction data and possible other sequences
    # and build a tree
    combined_data = test_flu.make_combined_data(prediction, test_data, otherseqsnames)

    method = methods[0]
    color_internal = True
    
    if params.flutype.startswith('H3N2') and year in laessig_prediction:
        seq_labels = {prediction.best_node(method= method[0], nodes = method[2]).name:'*', laessig_prediction[year].name:"L&L"}
    else:
        seq_labels = {prediction.best_node().name:'*'}
        
    tree_utils.label_nodes(prediction.T, seq_labels)
    tree_utils.erase_color(prediction.T)
    tree_utils.erase_color(combined_data.T)
    tree_utils.plot_prediction_tree(prediction, internal=color_internal)
    plt.title("predicting "+params.flutype+" season "+str(year)+"/"+str(year+1)+": "+
              str(np.round(distances[method[0]+method[1]]/distances['average'],4)))
    #plt.savefig('../figures/'+base_name+'_prediction_'+method[0]+method[1]+'_'+name_mod+'.pdf')
    
    # plot a combined figure
    # color according to season
    pred_names = [c.name for c in prediction.terminals]
    tree_utils.erase_color(combined_data.T)
    for c in combined_data.T.get_terminals():
        if c.name in pred_names:
            c.color = (178, 34, 34 )
        else:
            c.color = (0,255,255)
    prediction.interpolate_color(combined_data.T)
    
    otherseqsnames=[]
combined_data = test_flu.make_combined_data(prediction, test_data, otherseqsnames)

seq_labels = {prediction.best_node('polarizer').name:'*'} #, laessig_prediction[year].name:"L&L"}
tree_utils.label_nodes(prediction.T, seq_labels)
tree_utils.label_nodes(combined_data.T, seq_labels)

# plot a combined figure
fig = plt.figure(figsize = (12,6))
#subplot 1: only the prediction data
ax = plt.subplot(121)
#add panel label
plt.text(-0.06,0.95,'A', transform = plt.gca().transAxes, fontsize = 36)
plt.title('until Feb '+str(year))
plt.tight_layout()
tree_utils.plot_prediction_tree(prediction, method='polarizer', axes=ax, cb=True, offset = 0.0005, internal=False)

#subplot 2: prediction data and test data
ax = plt.subplot(122)
#add panel label
plt.text(-0.06,0.95,'B', transform = plt.gca().transAxes, fontsize = 36)
plt.title('until Feb '+str(year)+' + season '+str(year)+"/"+str(year+1)+" (grey)")
pred_names = [c.name for c in prediction.terminals]
tree_utils.erase_color(combined_data.T)
prediction.color_other_tree(combined_data.T.get_terminals(), method='polarizer', offset = 0.0005)
for c in combined_data.T.get_terminals():
    if c.name in pred_names:
        pass
    #   c.color = (178, 34, 34 )
    else:
        c.color = (0,255,255)