import numpy as np
import matplotlib.pyplot as ppl
import matplotlib.cm as cm

# Custom modules
import sys
sys.path.insert(0, '.')
import modules.parser_Shankarappa as pS
from modules.helper import is_nonsyn_table
from modules.alphabet import alpha

# Script
if __name__ == '__main__':

    # Define the patients
    patients = pS.parse_sequences(reference='HXB2')

    for k, p in enumerate(patients):
        if str(p) != 'p10':
            continue
        print p

        p.filter_only_sequenced()

        # Measure the allele frequencies (of all alleles at all positions)
        paf = p.allele_frequencies
        afs = np.asarray([paf(seqs=p.seqs_from_visit(v)) for v in p.visit])

        # Reshape so that we get the site as first axis, the nucleotide as
        # second, the time as third
        afs = afs.swapaxes(0, 2)
from conservation_syn_nonsyn_subtypeB import codon_single_mutants_synnonsyn


def get_is_mutation(consensus):
    is_mutation = np.ones((len(alpha), len(consensus)), bool)
    alphal = list(alpha)
    for i, a in enumerate(consensus):
        is_mutation[alphal.index(a), i] = False
    return is_mutation.T


# Script
if __name__ == '__main__':

    # Define the patients
    patients = pS.parse_sequences(exclude=['p4', 'p7', 'p8', 'p11'])

    # Iterate over patients
    for k, p in enumerate(patients[:1]):
        p.filter_only_sequenced()

        # Measure the allele frequencies (of all alleles at all positions)
        paf = p.allele_frequencies
        afs = np.asarray([paf(seqs=p.seqs_from_visit(v)) for v in p.visit])

        # Reshape so that we get the site as first axis, the nucleotide as
        # second, the time as third
        afs = afs.swapaxes(0, 2)

        # Eliminate gaps (whole codons are excluded to keep translation possible)
        # first and translate then!
# Distance from last time point to be considered likely to reach either boundary
# before the sequencing stops (later sequences will probably stay floating).
# Note: this is not used in the plot (only for testing).
maxt = {
    'Shankarappa': 40,  # Months
    'Bunnik': 1200
}  # Days

# Script
if __name__ == '__main__':

    # Define the patients (excluding problematic ones)
    patientsB = pB.parse_sequences(reference='SHAPE',
                                   exclude=bad_patients['Bunnik'])
    patientsS = pS.parse_sequences(reference='SHAPE',
                                   exclude=bad_patients['Shankarappa'])
    patients = patientsB + patientsS

    # Counts (overall)
    # The first list of each nu0s is lost, the second fixed, the third floating
    # Moreover, record the patient number they came from
    counts = {
        x: np.zeros((len(nu0ss), 3, len(patients)), int)
        for x in classes
    }

    # Aggregate information from all patients
    for k, p in enumerate(patients):

        if VERBOSE >= 1:
            stderr.write(str(p) + '\n')
예제 #4
0
        z = patient.U[ind, 2]

        ax.scatter(x, y, z, color=cols[i], s=60)

    ax.set_xlabel('PC1', fontsize=18)
    ax.set_ylabel('PC2', fontsize=18)
    ax.set_zlabel('PC3', fontsize=18)
    ax.set_title('Patient ' + patient.name)

    if show:
        ppl.show()


# Script
if __name__ == '__main__':

    patients = pS.parse_sequences()

    for p in patients[0:1]:
        print p
        p.filter_only_sequenced()

        # Perform PCA on all sequences
        p.PCA()

        # Plot
        plot3D(p)

    ppl.ion()
    ppl.show()
    fixlost = {'syn': [[], []], 'nonsyn': [[], []]}

    # Counts and times (overall)
    # The first list of each nu0s is lost, the second fixed, the third floating
    counts_all = {
        key: [[0, 0, 0] for nu0s in nu0ss]
        for key in ['syn', 'nonsyn']
    }
    # The first list of each nu0s is lost, the second fixed
    times_all = {
        key: [[[], []] for nu0s in nu0ss]
        for key in ['syn', 'nonsyn']
    }

    # Define the patients
    patients = pS.parse_sequences(exclude=bad_patients)

    # Aggregate information from all patients
    for k, p in enumerate(patients):

        if VERBOSE >= 1:
            stderr.write(str(p) + '\n')

        # Filter the time points to sequenced times
        p.filter_only_sequenced()
        n = len(p.visit)

        # Filter away gaps (conservative), keep reading frame
        is_nongap = ((np.array(p.seqs) == '-').sum(axis=0) == 0)
        for i in xrange(len(is_nongap)):
            if not is_nongap[i]: