Ejemplo n.º 1
0
 def test_multiprocessing(self):
     dvec = pwsd.apply_pairwise_sq(seqs[:10],
                                   pwsd.metrics.hamming_distance,
                                   ncpus=1)
     dvec_multi = pwsd.apply_pairwise_sq(seqs[:10],
                                         pwsd.metrics.hamming_distance,
                                         ncpus=2)
     self.assertTrue(np.all(dvec == dvec_multi))
Ejemplo n.º 2
0
 def test_multiprocessing_more(self):
     dvec_multi = pwsd.apply_pairwise_sq(mixed_seqs,
                                         pwsd.metrics.nw_metric,
                                         matrix='blosum62',
                                         ncpus=2)
     dvec = pwsd.apply_pairwise_sq(mixed_seqs,
                                   pwsd.metrics.nw_metric,
                                   matrix='blosum62',
                                   ncpus=1)
     self.assertTrue(np.all(dvec == dvec_multi))
Ejemplo n.º 3
0
    def test_pw_sq_nonuniq(self):
        dvec = pwsd.apply_pairwise_sq(seqs[:10],
                                      pwsd.metrics.hamming_distance,
                                      ncpus=1)
        dmat = squareform(dvec)

        dvec2 = pwsd.apply_pairwise_sq(seqs[:10] + seqs[:10],
                                       pwsd.metrics.hamming_distance,
                                       ncpus=1)
        dmat2 = squareform(dvec2)

        self.assertTrue(np.all(dmat2[:10, :][:, :10] == dmat))
Ejemplo n.º 4
0
def test_nb_pw_sq_hamming():
    dvec = pwsd.apply_pairwise_sq(seqs[:10],
                                  pwsd.metrics.hamming_distance,
                                  ncpus=1)
    dvec_nb = pwsd.numba_tools.nb_pairwise_sq(
        seqs[:10], pwsd.numba_tools.nb_hamming_distance)
    assert (np.all(dvec == dvec_nb))
Ejemplo n.º 5
0
 def test_pw_sq_subst(self):
     subst_dict = pwsd.matrices.dict_from_matrix(parasail.blosum62)
     dvec = pwsd.apply_pairwise_sq(seqs[:10],
                                   pwsd.metrics.str_subst_metric,
                                   subst_dict=subst_dict,
                                   ncpus=1)
     dmat = squareform(dvec)
     self.assertTrue(dmat.shape[0] == 10 and dmat.shape[1] == 10)
Ejemplo n.º 6
0
def test_ex6():
    import pwseqdist as pw
    from scipy.spatial.distance import squareform
    import Levenshtein
    dvec = pw.apply_pairwise_sq(seqs=['homer', 'home', 'rome'],
                                metric=Levenshtein.distance,
                                ncpus=1)
    dmat = squareform(dvec)
Ejemplo n.º 7
0
def test_nb_pw_sq():
    subst_dict = pwsd.matrices.dict_from_matrix(parasail.blosum62)
    dvec = pwsd.apply_pairwise_sq(seqs[:10],
                                  pwsd.metrics.str_subst_metric,
                                  subst_dict=subst_dict,
                                  ncpus=1)

    subst_dict = pwsd.numba_tools.nb_dict_from_matrix(parasail.blosum62)
    dvec_nb = pwsd.numba_tools.nb_pairwise_sq(seqs[:10],
                                              pwsd.numba_tools.nb_subst_metric,
                                              subst_dict)
    assert (np.all(dvec == dvec_nb))
Ejemplo n.º 8
0
def generate_peptide_data(L=5, n=300, seed=110820):
    """Attempt to generate some random peptide data with a
    phenotype enrichment associated with a motif"""
    np.random.seed(seed)
    alphabet = 'ARNDCQEGHILKMFPSTWYVBZ'
    probs = np.random.rand(len(alphabet))
    probs = probs / np.sum(probs)

    seqs = [''.join(np.random.choice(list(alphabet), size=4, p=probs)) for i in range(n)]
    
    def _assign_trait2(seq):
        if seq[1] in 'KRQ' or seq[3] in 'KRQ':
            pr = 0.99
        elif seq[0] in 'QA':
            pr = 0.01
        else:
            pr = 0.03
        return np.random.choice([1, 0], p=[pr, 1-pr])
    
    def _assign_trait1(seq):
        d = np.sum([i for i in map(operator.__ne__, seq, seqs[0])])
        return {0:'ZERO', 1:'ONE'}[int((d <= 3) * (np.random.rand() < 0.6))]

    def _assign_trait3(seq):
        return np.random.choice(['A', 'B', 'C'], p=[0.2, 0.4, 0.4])
    
    pw = pwsd.apply_pairwise_sq(seqs, metric=pwsd.metrics.hamming_distance)

    Z = sch.linkage(pw, method='complete')
    labels = sch.fcluster(Z, 50, criterion='maxclust')

    dat = pd.DataFrame({'seq':seqs,
                        'trait1':np.array([_assign_trait1(p) for p in seqs]),
                        'trait2':np.array([_assign_trait2(p) for p in seqs]),
                        'trait3':np.array([_assign_trait3(p) for p in seqs]),
                        'cluster':labels,
                        'count':np.random.randint(4, 10, size=n)})
    return dat, pw
Ejemplo n.º 9
0
def test_README_example1():
    import numpy as np

    import pwseqdist as pw
    import multiprocessing
    from scipy.spatial.distance import squareform

    peptides = [
        'CACADLGAYPDKLIF', 'CACDALLAYTDKLIF', 'CACDAVGDTLDKLIF',
        'CACDDVTEVEGDKLIF', 'CACDFISPSNWGIQSGRNTDKLIF', 'CACDPVLGDTRLTDKLIF'
    ]

    dvec = pw.apply_pairwise_sq(seqs=peptides,
                                metric=pw.metrics.nw_hamming_metric,
                                ncpus=multiprocessing.cpu_count())

    dmat = squareform(dvec).astype(int)

    exp = np.array([[0, 4, 6, 7, 15, 8], [4, 0, 5, 7, 14, 7],
                    [6, 5, 0, 6, 14, 4], [7, 7, 6, 0, 14, 8],
                    [15, 14, 14, 14, 0, 11], [8, 7, 4, 8, 11, 0]])

    assert np.all(dmat == exp)
Ejemplo n.º 10
0
 def test_pw_sq(self):
     dvec = pwsd.apply_pairwise_sq(seqs[:10],
                                   pwsd.metrics.hamming_distance,
                                   ncpus=1)
     dmat = squareform(dvec)
     self.assertTrue(dmat.shape[0] == 10 and dmat.shape[1] == 10)
Ejemplo n.º 11
0
 def test_pw_sq_nonuniq_tcrdist(self):
     tmp = [
         'PNSSL', 'KEKRN', 'KEKRN', 'PNASF', 'PNASF', 'PNASF', 'EKKES',
         'EKKER', 'IRTEH'
     ]
     res = np.array([[
         0,
         5,
         5,
         2,
         2,
         2,
         5,
         5,
         5,
     ], [
         5,
         0,
         0,
         5,
         5,
         5,
         4,
         4,
         5,
     ], [
         5,
         0,
         0,
         5,
         5,
         5,
         4,
         4,
         5,
     ], [
         2,
         5,
         5,
         0,
         0,
         0,
         5,
         5,
         5,
     ], [
         2,
         5,
         5,
         0,
         0,
         0,
         5,
         5,
         5,
     ], [
         2,
         5,
         5,
         0,
         0,
         0,
         5,
         5,
         5,
     ], [
         5,
         4,
         4,
         5,
         5,
         5,
         0,
         1,
         4,
     ], [
         5,
         4,
         4,
         5,
         5,
         5,
         1,
         0,
         4,
     ], [
         5,
         5,
         5,
         5,
         5,
         5,
         4,
         4,
         0,
     ]])
     dvec = pwsd.apply_pairwise_sq(tmp,
                                   pwsd.metrics.nw_hamming_metric,
                                   ncpus=1)
     dmat = squareform(dvec).astype(int)
     #print(dmat)
     #print(res)
     #print(tmp[0], tmp[3], res[0, 3], dmat[0, 3])
     self.assertTrue(np.all(dmat == res))