def test_multiprocessing(self): dvec = pwsd.apply_pairwise_sq(seqs[:10], pwsd.metrics.hamming_distance, ncpus=1) dvec_multi = pwsd.apply_pairwise_sq(seqs[:10], pwsd.metrics.hamming_distance, ncpus=2) self.assertTrue(np.all(dvec == dvec_multi))
def test_multiprocessing_more(self): dvec_multi = pwsd.apply_pairwise_sq(mixed_seqs, pwsd.metrics.nw_metric, matrix='blosum62', ncpus=2) dvec = pwsd.apply_pairwise_sq(mixed_seqs, pwsd.metrics.nw_metric, matrix='blosum62', ncpus=1) self.assertTrue(np.all(dvec == dvec_multi))
def test_pw_sq_nonuniq(self): dvec = pwsd.apply_pairwise_sq(seqs[:10], pwsd.metrics.hamming_distance, ncpus=1) dmat = squareform(dvec) dvec2 = pwsd.apply_pairwise_sq(seqs[:10] + seqs[:10], pwsd.metrics.hamming_distance, ncpus=1) dmat2 = squareform(dvec2) self.assertTrue(np.all(dmat2[:10, :][:, :10] == dmat))
def test_nb_pw_sq_hamming(): dvec = pwsd.apply_pairwise_sq(seqs[:10], pwsd.metrics.hamming_distance, ncpus=1) dvec_nb = pwsd.numba_tools.nb_pairwise_sq( seqs[:10], pwsd.numba_tools.nb_hamming_distance) assert (np.all(dvec == dvec_nb))
def test_pw_sq_subst(self): subst_dict = pwsd.matrices.dict_from_matrix(parasail.blosum62) dvec = pwsd.apply_pairwise_sq(seqs[:10], pwsd.metrics.str_subst_metric, subst_dict=subst_dict, ncpus=1) dmat = squareform(dvec) self.assertTrue(dmat.shape[0] == 10 and dmat.shape[1] == 10)
def test_ex6(): import pwseqdist as pw from scipy.spatial.distance import squareform import Levenshtein dvec = pw.apply_pairwise_sq(seqs=['homer', 'home', 'rome'], metric=Levenshtein.distance, ncpus=1) dmat = squareform(dvec)
def test_nb_pw_sq(): subst_dict = pwsd.matrices.dict_from_matrix(parasail.blosum62) dvec = pwsd.apply_pairwise_sq(seqs[:10], pwsd.metrics.str_subst_metric, subst_dict=subst_dict, ncpus=1) subst_dict = pwsd.numba_tools.nb_dict_from_matrix(parasail.blosum62) dvec_nb = pwsd.numba_tools.nb_pairwise_sq(seqs[:10], pwsd.numba_tools.nb_subst_metric, subst_dict) assert (np.all(dvec == dvec_nb))
def generate_peptide_data(L=5, n=300, seed=110820): """Attempt to generate some random peptide data with a phenotype enrichment associated with a motif""" np.random.seed(seed) alphabet = 'ARNDCQEGHILKMFPSTWYVBZ' probs = np.random.rand(len(alphabet)) probs = probs / np.sum(probs) seqs = [''.join(np.random.choice(list(alphabet), size=4, p=probs)) for i in range(n)] def _assign_trait2(seq): if seq[1] in 'KRQ' or seq[3] in 'KRQ': pr = 0.99 elif seq[0] in 'QA': pr = 0.01 else: pr = 0.03 return np.random.choice([1, 0], p=[pr, 1-pr]) def _assign_trait1(seq): d = np.sum([i for i in map(operator.__ne__, seq, seqs[0])]) return {0:'ZERO', 1:'ONE'}[int((d <= 3) * (np.random.rand() < 0.6))] def _assign_trait3(seq): return np.random.choice(['A', 'B', 'C'], p=[0.2, 0.4, 0.4]) pw = pwsd.apply_pairwise_sq(seqs, metric=pwsd.metrics.hamming_distance) Z = sch.linkage(pw, method='complete') labels = sch.fcluster(Z, 50, criterion='maxclust') dat = pd.DataFrame({'seq':seqs, 'trait1':np.array([_assign_trait1(p) for p in seqs]), 'trait2':np.array([_assign_trait2(p) for p in seqs]), 'trait3':np.array([_assign_trait3(p) for p in seqs]), 'cluster':labels, 'count':np.random.randint(4, 10, size=n)}) return dat, pw
def test_README_example1(): import numpy as np import pwseqdist as pw import multiprocessing from scipy.spatial.distance import squareform peptides = [ 'CACADLGAYPDKLIF', 'CACDALLAYTDKLIF', 'CACDAVGDTLDKLIF', 'CACDDVTEVEGDKLIF', 'CACDFISPSNWGIQSGRNTDKLIF', 'CACDPVLGDTRLTDKLIF' ] dvec = pw.apply_pairwise_sq(seqs=peptides, metric=pw.metrics.nw_hamming_metric, ncpus=multiprocessing.cpu_count()) dmat = squareform(dvec).astype(int) exp = np.array([[0, 4, 6, 7, 15, 8], [4, 0, 5, 7, 14, 7], [6, 5, 0, 6, 14, 4], [7, 7, 6, 0, 14, 8], [15, 14, 14, 14, 0, 11], [8, 7, 4, 8, 11, 0]]) assert np.all(dmat == exp)
def test_pw_sq(self): dvec = pwsd.apply_pairwise_sq(seqs[:10], pwsd.metrics.hamming_distance, ncpus=1) dmat = squareform(dvec) self.assertTrue(dmat.shape[0] == 10 and dmat.shape[1] == 10)
def test_pw_sq_nonuniq_tcrdist(self): tmp = [ 'PNSSL', 'KEKRN', 'KEKRN', 'PNASF', 'PNASF', 'PNASF', 'EKKES', 'EKKER', 'IRTEH' ] res = np.array([[ 0, 5, 5, 2, 2, 2, 5, 5, 5, ], [ 5, 0, 0, 5, 5, 5, 4, 4, 5, ], [ 5, 0, 0, 5, 5, 5, 4, 4, 5, ], [ 2, 5, 5, 0, 0, 0, 5, 5, 5, ], [ 2, 5, 5, 0, 0, 0, 5, 5, 5, ], [ 2, 5, 5, 0, 0, 0, 5, 5, 5, ], [ 5, 4, 4, 5, 5, 5, 0, 1, 4, ], [ 5, 4, 4, 5, 5, 5, 1, 0, 4, ], [ 5, 5, 5, 5, 5, 5, 4, 4, 0, ]]) dvec = pwsd.apply_pairwise_sq(tmp, pwsd.metrics.nw_hamming_metric, ncpus=1) dmat = squareform(dvec).astype(int) #print(dmat) #print(res) #print(tmp[0], tmp[3], res[0, 3], dmat[0, 3]) self.assertTrue(np.all(dmat == res))