Example #1
0
def main():
    parser = get_parser()
    args = validate_args(parser)

    seq_records = seqrecords.read_fasta(args.fasta)
    if args.word_size:
        p = word_pattern.create(seq_records.seq_list, args.word_size)
    else:
        p = word_pattern.read(args.word_pattern)

    if args.reduce_alphabet:
        p = p.reduce_alphabet(seqcontent.get_reduced_alphabet(args.molecule))
    if args.merge_revcomp:
        p = p.merge_revcomp()

    freqs = word_vector.Freqs(seq_records.length_list, p)

    dist = word_distance.Distance(freqs, args.distance)
    matrix = distmatrix.create(seq_records.id_list, dist)

    if args.out:
        oh = open(args.out, 'w')
        matrix.write_to_file(oh, args.outfmt)
        oh.close()
    else:
        matrix.display(args.outfmt)
 def test_distance_hamming(self):
     dist = word_sets_distance.Distance(self.pep_records, 2, 'hamming')
     matrix = distmatrix.create(self.pep_records.id_list, dist)
     exp = [
         "   4", "seq1       0 22 44 37", "seq2       22 0 26 31",
         "seq3       44 26 0 29", "seq4       37 31 29 0"
     ]
     self.assertEqual(matrix.format(0), "\n".join(exp))
Example #3
0
 def test_kld_freqs(self):
     dist = word_distance.Distance(self.freqs, 'kld')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         "   3", "seq1       0.0000000 0.0932800 0.0435210",
         "seq2       0.0932800 0.0000000 0.0447391",
         "seq3       0.0435210 0.0447391 0.0000000"
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Example #4
0
 def test_manhattan_freqs(self):
     dist = word_distance.Distance(self.freqs, 'manhattan')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         "   3", "seq1       0.0000000 1.2156863 0.7619048",
         "seq2       1.2156863 0.0000000 0.7899160",
         "seq3       0.7619048 0.7899160 0.0000000"
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Example #5
0
 def test_canberra_freqs(self):
     dist = word_distance.Distance(self.freqs, 'canberra')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         "   3", "seq1       0.0000000 10.3372258 7.1836838",
         "seq2       10.3372258 0.0000000 6.6280959",
         "seq3       7.1836838 6.6280959 0.0000000"
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Example #6
0
 def test_distance_dna_euclidnorm(self):
     dist = bbc.Distance(self.vector)
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     exp = [
         "   3", "seq1       0.0000000 1.0227476 1.9351116",
         "seq2       1.0227476 0.0000000 1.4469591",
         "seq3       1.9351116 1.4469591 0.0000000"
     ]
     self.assertEqual(matrix.format(), "\n".join(exp))
Example #7
0
 def test_distance_dna_google(self):
     dist = bbc.Distance(self.vector, 'google')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     exp = [
         "   3", "seq1       0.0000000 73.1311144 37.1219467",
         "seq2       73.1311144 0.0000000 33.2221873",
         "seq3       37.1219467 33.2221873 0.0000000"
     ]
     self.assertEqual(matrix.format(), "\n".join(exp))
Example #8
0
 def test_lcc_freqs(self):
     dist = word_distance.Distance(self.freqs, 'lcc')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         "   3", "seq1       0.0000000 0.6205496 0.4017554",
         "seq2       0.6205496 0.0000000 0.2550506",
         "seq3       0.4017554 0.2550506 0.0000000"
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Example #9
0
 def test_braycurtis_freqs(self):
     dist = word_distance.Distance(self.freqs, 'braycurtis')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         "   3", "seq1       0.0000000 0.6078431 0.3809524",
         "seq2       0.6078431 0.0000000 0.3949580",
         "seq3       0.3809524 0.3949580 0.0000000"
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Example #10
0
 def test_diff_abs_mult_freqs(self):
     dist = word_distance.Distance(self.freqs, 'diff_abs_mult')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         "   3", "seq1       0.0000000 0.0621975 0.0404611",
         "seq2       0.0621975 0.0000000 0.0531478",
         "seq3       0.0404611 0.0531478 0.0000000"
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Example #11
0
 def test_jsd_freqs(self):
     dist = word_distance.Distance(self.freqs, 'jsd')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         "   3", "seq1       0.0000000 0.4608882 0.2550278",
         "seq2       0.4608882 0.0000000 0.2457790",
         "seq3       0.2550278 0.2457790 0.0000000"
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Example #12
0
 def test_chebyshev_freqs(self):
     dist = word_distance.Distance(self.freqs, 'chebyshev')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         "   3", "seq1       0.0000000 0.1936275 0.1250000",
         "seq2       0.1936275 0.0000000 0.1428571",
         "seq3       0.1250000 0.1428571 0.0000000"
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Example #13
0
 def test_google_freqs(self):
     dist = word_distance.Distance(self.freqs, 'google')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         '   3', 'seq1       0.0000000 0.6078431 0.3809524',
         'seq2       0.6078431 0.0000000 0.3949580',
         'seq3       0.3809524 0.3949580 0.0000000'
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Example #14
0
 def test_minkowski_freqs(self):
     dist = word_distance.Distance(self.freqs, 'minkowski')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         "   3", "seq1       0.0000000 0.3763512 0.2532387",
         "seq2       0.3763512 0.0000000 0.2603008",
         "seq3       0.2532387 0.2603008 0.0000000"
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Example #15
0
 def test_distance1(self):
     vecs = fcgr.create_vectors(self.dna_records, 2)
     dist = fcgr.Distance(vecs)
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     exp = [
         "   3", "seq1       0.0000000 7.5498344 5.7445626",
         "seq2       7.5498344 0.0000000 4.2426407",
         "seq3       5.7445626 4.2426407 0.0000000"
     ]
     self.assertEqual(matrix.format(), "\n".join(exp))
Example #16
0
 def test_distance_d(self):
     matrix = distmatrix.create(self.pep_records.id_list, self.dist)
     exp = [
         "   4",
         "seq1       0 9 15 20",
         "seq2       9 0 10 18",
         "seq3       15 10 0 17",
         "seq4       20 18 17 0"
     ]
     self.assertEqual(matrix.format(decimal_places=0), "\n".join(exp))
Example #17
0
 def test_distance(self):
     dist = ncd.Distance(self.pep_records)
     matrix = distmatrix.create(self.pep_records.id_list, dist)
     exp = [
         "   4", "seq1       0.0000000 0.2698413 0.3809524 0.5238095",
         "seq2       0.2698413 0.0000000 0.2950820 0.4666667",
         "seq3       0.3809524 0.2950820 0.0000000 0.4754098",
         "seq4       0.5238095 0.4666667 0.4754098 0.0000000"
     ]
     self.assertEqual(matrix.format(), "\n".join(exp))
Example #18
0
 def test_euclid_squared_counts(self):
     # The result of this method is identical to that from decaf+py.
     dist = distance.Distance(self.counts, 'euclid_squared')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         '   3', 'seq1       0.0000000 57.0000000 30.0000000',
         'seq2       57.0000000 0.0000000 19.0000000',
         'seq3       30.0000000 19.0000000 0.0000000'
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Example #19
0
def calc_distances(seqs):
    seq_records = seqrecords.SeqRecords()

    for seq in seqs:
        seq_records.add(seq.name, "".join(seq.sequence))

    dist = ncd.Distance(seq_records)
    matrix = distmatrix.create(seq_records.id_list, dist)

    return matrix.data
Example #20
0
 def test_euclid_seqlen2_freqs(self):
     # The result of this method is identical to that from decaf+py.
     dist = word_distance.Distance(self.freqs, 'euclid_seqlen2')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         '   3', 'seq1       0.0000000 0.0072101 0.0038263',
         'seq2       0.0072101 0.0000000 0.0039866',
         'seq3       0.0038263 0.0039866 0.0000000'
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Example #21
0
 def test_distance2(self):
     vecs = fcgr.create_vectors(self.dna_records, 2)
     dist = fcgr.Distance(vecs, 'google')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     exp = [
         "   3", "seq1       0.0000000 0.5833333 0.5416667",
         "seq2       0.5833333 0.0000000 0.4210526",
         "seq3       0.5416667 0.4210526 0.0000000"
     ]
     self.assertEqual(matrix.format(), "\n".join(exp))
Example #22
0
 def test_diff_abs_add_freqs(self):
     # The result of this method is identical to that from decaf+py.
     dist = word_distance.Distance(self.freqs, 'diff_abs_add')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         '   3', 'seq1       0.0000000 0.0810458 0.0507937',
         'seq2       0.0810458 0.0000000 0.0526611',
         'seq3       0.0507937 0.0526611 0.0000000'
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Example #23
0
 def test_diff_abs_mult2_freqs(self):
     # The result of this method is identical to that from decaf+py.
     dist = word_distance.Distance(self.freqs, 'diff_abs_mult2')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         '   3', 'seq1       0.0000000 0.0621975 0.0404611',
         'seq2       0.0621975 0.0000000 0.0531478',
         'seq3       0.0404611 0.0531478 0.0000000'
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Example #24
0
 def test_euclid_norm_freqs(self):
     # The result of this method is identical to that from decaf+py.
     dist = word_distance.Distance(self.freqs, 'euclid_norm')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         '   3', 'seq1       0.0000000 0.3763512 0.2532387',
         'seq2       0.3763512 0.0000000 0.2603008',
         'seq3       0.2532387 0.2603008 0.0000000'
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Example #25
0
 def test_angle_cos_evol_freqs(self):
     # The result of this method is identical to that from decaf+py.
     dist = word_distance.Distance(self.freqs, 'angle_cos_evol')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         '   3', 'seq1       0.0000000 0.3281368 0.1625980',
         'seq2       0.3281368 0.0000000 0.1347925',
         'seq3       0.1625980 0.1347925 0.0000000'
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Example #26
0
 def test_euclid_norm_counts(self):
     # The result of this method is identical to that from decaf+py.
     dist = word_distance.Distance(self.counts, 'euclid_norm')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         '   3', 'seq1       0.0000000 7.5498344 5.4772256',
         'seq2       7.5498344 0.0000000 4.3588989',
         'seq3       5.4772256 4.3588989 0.0000000'
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Example #27
0
 def test_angle_cos_diss_freqs(self):
     # The result of this method is identical to that from decaf+py.
     dist = word_distance.Distance(self.freqs, 'angle_cos_diss')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         '   3', 'seq1       0.0000000 0.2797355 0.1500672',
         'seq2       0.2797355 0.0000000 0.1261027',
         'seq3       0.1500672 0.1261027 0.0000000'
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Example #28
0
 def test_euclid_squared_freqs(self):
     # The result of this method is identical to that from decaf+py.
     dist = word_distance.Distance(self.freqs, 'euclid_squared')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         '   3', 'seq1       0.0000000 0.1416402 0.0641298',
         'seq2       0.1416402 0.0000000 0.0677565',
         'seq3       0.0641298 0.0677565 0.0000000'
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Example #29
0
 def test_distance_d1_star(self):
     self.dist.set_disttype('d1_star')
     matrix = distmatrix.create(self.pep_records.id_list, self.dist)
     exp = [
         "   4",
         "seq1       0.0000000 0.3404255 0.5283019 0.5348837",
         "seq2       0.3404255 0.0000000 0.4042553 0.5121951",
         "seq3       0.5283019 0.4042553 0.0000000 0.5135135",
         "seq4       0.5348837 0.5121951 0.5135135 0.0000000"
     ]
     self.assertEqual(matrix.format(), "\n".join(exp))
Example #30
0
 def test_distance_d1_star2(self):
     self.dist.set_disttype('d1_star2')
     matrix = distmatrix.create(self.pep_records.id_list, self.dist)
     exp = [
         "   4",
         "seq1       0.0000000 0.3404255 0.5436893 0.5609756",
         "seq2       0.3404255 0.0000000 0.4130435 0.5384615",
         "seq3       0.5436893 0.4130435 0.0000000 0.5205479",
         "seq4       0.5609756 0.5384615 0.5205479 0.0000000"
     ]
     self.assertEqual(matrix.format(), "\n".join(exp))