import numpy as np import matplotlib.pyplot as ppl import matplotlib.cm as cm # Custom modules import sys sys.path.insert(0, '.') import modules.parser_Shankarappa as pS from modules.helper import is_nonsyn_table from modules.alphabet import alpha # Script if __name__ == '__main__': # Define the patients patients = pS.parse_sequences(reference='HXB2') for k, p in enumerate(patients): if str(p) != 'p10': continue print p p.filter_only_sequenced() # Measure the allele frequencies (of all alleles at all positions) paf = p.allele_frequencies afs = np.asarray([paf(seqs=p.seqs_from_visit(v)) for v in p.visit]) # Reshape so that we get the site as first axis, the nucleotide as # second, the time as third afs = afs.swapaxes(0, 2)
from conservation_syn_nonsyn_subtypeB import codon_single_mutants_synnonsyn def get_is_mutation(consensus): is_mutation = np.ones((len(alpha), len(consensus)), bool) alphal = list(alpha) for i, a in enumerate(consensus): is_mutation[alphal.index(a), i] = False return is_mutation.T # Script if __name__ == '__main__': # Define the patients patients = pS.parse_sequences(exclude=['p4', 'p7', 'p8', 'p11']) # Iterate over patients for k, p in enumerate(patients[:1]): p.filter_only_sequenced() # Measure the allele frequencies (of all alleles at all positions) paf = p.allele_frequencies afs = np.asarray([paf(seqs=p.seqs_from_visit(v)) for v in p.visit]) # Reshape so that we get the site as first axis, the nucleotide as # second, the time as third afs = afs.swapaxes(0, 2) # Eliminate gaps (whole codons are excluded to keep translation possible) # first and translate then!
# Distance from last time point to be considered likely to reach either boundary # before the sequencing stops (later sequences will probably stay floating). # Note: this is not used in the plot (only for testing). maxt = { 'Shankarappa': 40, # Months 'Bunnik': 1200 } # Days # Script if __name__ == '__main__': # Define the patients (excluding problematic ones) patientsB = pB.parse_sequences(reference='SHAPE', exclude=bad_patients['Bunnik']) patientsS = pS.parse_sequences(reference='SHAPE', exclude=bad_patients['Shankarappa']) patients = patientsB + patientsS # Counts (overall) # The first list of each nu0s is lost, the second fixed, the third floating # Moreover, record the patient number they came from counts = { x: np.zeros((len(nu0ss), 3, len(patients)), int) for x in classes } # Aggregate information from all patients for k, p in enumerate(patients): if VERBOSE >= 1: stderr.write(str(p) + '\n')
z = patient.U[ind, 2] ax.scatter(x, y, z, color=cols[i], s=60) ax.set_xlabel('PC1', fontsize=18) ax.set_ylabel('PC2', fontsize=18) ax.set_zlabel('PC3', fontsize=18) ax.set_title('Patient ' + patient.name) if show: ppl.show() # Script if __name__ == '__main__': patients = pS.parse_sequences() for p in patients[0:1]: print p p.filter_only_sequenced() # Perform PCA on all sequences p.PCA() # Plot plot3D(p) ppl.ion() ppl.show()
fixlost = {'syn': [[], []], 'nonsyn': [[], []]} # Counts and times (overall) # The first list of each nu0s is lost, the second fixed, the third floating counts_all = { key: [[0, 0, 0] for nu0s in nu0ss] for key in ['syn', 'nonsyn'] } # The first list of each nu0s is lost, the second fixed times_all = { key: [[[], []] for nu0s in nu0ss] for key in ['syn', 'nonsyn'] } # Define the patients patients = pS.parse_sequences(exclude=bad_patients) # Aggregate information from all patients for k, p in enumerate(patients): if VERBOSE >= 1: stderr.write(str(p) + '\n') # Filter the time points to sequenced times p.filter_only_sequenced() n = len(p.visit) # Filter away gaps (conservative), keep reading frame is_nongap = ((np.array(p.seqs) == '-').sum(axis=0) == 0) for i in xrange(len(is_nongap)): if not is_nongap[i]: