コード例 #1
0
    if sum(1 for l in aa if l == 'X') < 10:
        
        prots.append(aa)
        names.append(name)

# <codecell>

seq_df = DataFrame({
                        'Tat':Series(prots, index = names)
                    })

# <codecell>

from SeqProcessTools import align_seq_data_frame

out_align = align_seq_data_frame(seq_df, '/home/will/HIVReportGen/Data/BlastDB/ConBseqs.txt')

# <codecell>

cohort_data = read_csv('Texas_Cohort_Data.txt', sep = '\t')
nout_align = out_align.reset_index()
nout_align['short_ind'] = nout_align['index'].map(lambda x: x.split('-')[0])


nout = merge(nout_align, cohort_data, 
            left_index = 'short_ind', right_index = 'Patient ID')

nout = nout.drop(nout['NeuroCog'] == 'Not Tested', axis = 0)
print nout

# <codecell>
コード例 #2
0
os.chdir('/home/will/Dropbox/HIVseqs/')
sys.path.append('/home/will/HIVReportGen/AnalysisCode/')
from SeqProcessTools import read_pat_seq_data, load_training_seq_data, align_seq_data_frame

# <codecell>

import glob

pat_files = glob.glob('/home/will/HIVReportGen/Data/PatientFasta/*.fasta')
pat_seq = read_pat_seq_data(pat_files, '/home/will/HIVReportGen/Data/BlastDB/ConBseqs.txt')

training_files = glob.glob('/home/will/HIVReportGen/Data/TrainingSequences/*.fasta')
training_data = load_training_seq_data(training_files)

align_lanl = align_seq_data_frame(training_data,  '/home/will/HIVReportGen/Data/BlastDB/ConBseqs.txt')
    
    


all_seqs = concat([pat_seq, align_lanl])

# <codecell>

def get_pairwise_distances(seq_series, tree_file = None, seq_file = None):
    
    if seq_file is None:
        fasta_handle = NTF()
    if tree_file is None:
        tree_handle = NTF()
    else:
コード例 #3
0
for ind, trop in zip(trops.index, trops["Tropism"].values):
    trop_dict[ind] = trop

# <codecell>


grouped_seq_df.dropna(subset=["gp120"])[["score"]].to_excel("NewPSSMScores.xlsx")

# <codecell>

wanted_seq_data = grouped_seq_df.dropna(subset=["gp120"])

# <codecell>

print "aligning"
align_data = align_seq_data_frame(wanted_seq_data, "/home/will/HIVReportGen/Data/BlastDB/ConBseqs.txt")

# <codecell>

align_data["Tropism"] = align_data["score"].map(decide_tropism)

# <codecell>

wanted_data = align_data.dropna(subset=["Tropism"])

# <codecell>

from itertools import product


def yield_regions(trop_dict):