Ejemplo n.º 1
0
def main():
    parser = get_parser()
    args = validate_args(parser)

    seq_records = seqrecords.read_fasta(args.fasta)
    if args.word_size:
        p = word_pattern.create(seq_records.seq_list, args.word_size)
    else:
        p = word_pattern.read(args.word_pattern)

    if args.reduce_alphabet:
        p = p.reduce_alphabet(seqcontent.get_reduced_alphabet(args.molecule))
    if args.merge_revcomp:
        p = p.merge_revcomp()

    freqs = word_vector.Freqs(seq_records.length_list, p)

    dist = word_distance.Distance(freqs, args.distance)
    matrix = distmatrix.create(seq_records.id_list, dist)

    if args.out:
        oh = open(args.out, 'w')
        matrix.write_to_file(oh, args.outfmt)
        oh.close()
    else:
        matrix.display(args.outfmt)
Ejemplo n.º 2
0
 def test_minkowski_freqs(self):
     dist = word_distance.Distance(self.freqs, 'minkowski')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         "   3", "seq1       0.0000000 0.3763512 0.2532387",
         "seq2       0.3763512 0.0000000 0.2603008",
         "seq3       0.2532387 0.2603008 0.0000000"
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Ejemplo n.º 3
0
 def test_manhattan_freqs(self):
     dist = word_distance.Distance(self.freqs, 'manhattan')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         "   3", "seq1       0.0000000 1.2156863 0.7619048",
         "seq2       1.2156863 0.0000000 0.7899160",
         "seq3       0.7619048 0.7899160 0.0000000"
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Ejemplo n.º 4
0
 def test_chebyshev_freqs(self):
     dist = word_distance.Distance(self.freqs, 'chebyshev')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         "   3", "seq1       0.0000000 0.1936275 0.1250000",
         "seq2       0.1936275 0.0000000 0.1428571",
         "seq3       0.1250000 0.1428571 0.0000000"
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Ejemplo n.º 5
0
 def test_lcc_freqs(self):
     dist = word_distance.Distance(self.freqs, 'lcc')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         "   3", "seq1       0.0000000 0.6205496 0.4017554",
         "seq2       0.6205496 0.0000000 0.2550506",
         "seq3       0.4017554 0.2550506 0.0000000"
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Ejemplo n.º 6
0
 def test_google_freqs(self):
     dist = word_distance.Distance(self.freqs, 'google')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         '   3', 'seq1       0.0000000 0.6078431 0.3809524',
         'seq2       0.6078431 0.0000000 0.3949580',
         'seq3       0.3809524 0.3949580 0.0000000'
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Ejemplo n.º 7
0
 def test_canberra_freqs(self):
     dist = word_distance.Distance(self.freqs, 'canberra')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         "   3", "seq1       0.0000000 10.3372258 7.1836838",
         "seq2       10.3372258 0.0000000 6.6280959",
         "seq3       7.1836838 6.6280959 0.0000000"
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Ejemplo n.º 8
0
 def test_kld_freqs(self):
     dist = word_distance.Distance(self.freqs, 'kld')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         "   3", "seq1       0.0000000 0.0932800 0.0435210",
         "seq2       0.0932800 0.0000000 0.0447391",
         "seq3       0.0435210 0.0447391 0.0000000"
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Ejemplo n.º 9
0
 def test_diff_abs_mult_freqs(self):
     dist = word_distance.Distance(self.freqs, 'diff_abs_mult')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         "   3", "seq1       0.0000000 0.0621975 0.0404611",
         "seq2       0.0621975 0.0000000 0.0531478",
         "seq3       0.0404611 0.0531478 0.0000000"
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Ejemplo n.º 10
0
 def test_braycurtis_freqs(self):
     dist = word_distance.Distance(self.freqs, 'braycurtis')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         "   3", "seq1       0.0000000 0.6078431 0.3809524",
         "seq2       0.6078431 0.0000000 0.3949580",
         "seq3       0.3809524 0.3949580 0.0000000"
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Ejemplo n.º 11
0
 def test_jsd_freqs(self):
     dist = word_distance.Distance(self.freqs, 'jsd')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         "   3", "seq1       0.0000000 0.4608882 0.2550278",
         "seq2       0.4608882 0.0000000 0.2457790",
         "seq3       0.2550278 0.2457790 0.0000000"
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Ejemplo n.º 12
0
 def test_euclid_norm_counts(self):
     # The result of this method is identical to that from decaf+py.
     dist = word_distance.Distance(self.counts, 'euclid_norm')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         '   3', 'seq1       0.0000000 7.5498344 5.4772256',
         'seq2       7.5498344 0.0000000 4.3588989',
         'seq3       5.4772256 4.3588989 0.0000000'
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Ejemplo n.º 13
0
 def test_angle_cos_diss_freqs(self):
     # The result of this method is identical to that from decaf+py.
     dist = word_distance.Distance(self.freqs, 'angle_cos_diss')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         '   3', 'seq1       0.0000000 0.2797355 0.1500672',
         'seq2       0.2797355 0.0000000 0.1261027',
         'seq3       0.1500672 0.1261027 0.0000000'
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Ejemplo n.º 14
0
 def test_euclid_squared_freqs(self):
     # The result of this method is identical to that from decaf+py.
     dist = word_distance.Distance(self.freqs, 'euclid_squared')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         '   3', 'seq1       0.0000000 0.1416402 0.0641298',
         'seq2       0.1416402 0.0000000 0.0677565',
         'seq3       0.0641298 0.0677565 0.0000000'
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Ejemplo n.º 15
0
 def test_euclid_seqlen2_freqs(self):
     # The result of this method is identical to that from decaf+py.
     dist = word_distance.Distance(self.freqs, 'euclid_seqlen2')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         '   3', 'seq1       0.0000000 0.0072101 0.0038263',
         'seq2       0.0072101 0.0000000 0.0039866',
         'seq3       0.0038263 0.0039866 0.0000000'
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Ejemplo n.º 16
0
 def test_euclid_norm_freqs(self):
     # The result of this method is identical to that from decaf+py.
     dist = word_distance.Distance(self.freqs, 'euclid_norm')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         '   3', 'seq1       0.0000000 0.3763512 0.2532387',
         'seq2       0.3763512 0.0000000 0.2603008',
         'seq3       0.2532387 0.2603008 0.0000000'
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Ejemplo n.º 17
0
 def test_diff_abs_mult2_freqs(self):
     # The result of this method is identical to that from decaf+py.
     dist = word_distance.Distance(self.freqs, 'diff_abs_mult2')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         '   3', 'seq1       0.0000000 0.0621975 0.0404611',
         'seq2       0.0621975 0.0000000 0.0531478',
         'seq3       0.0404611 0.0531478 0.0000000'
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Ejemplo n.º 18
0
 def test_angle_cos_evol_freqs(self):
     # The result of this method is identical to that from decaf+py.
     dist = word_distance.Distance(self.freqs, 'angle_cos_evol')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         '   3', 'seq1       0.0000000 0.3281368 0.1625980',
         'seq2       0.3281368 0.0000000 0.1347925',
         'seq3       0.1625980 0.1347925 0.0000000'
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Ejemplo n.º 19
0
 def test_diff_abs_add_freqs(self):
     # The result of this method is identical to that from decaf+py.
     dist = word_distance.Distance(self.freqs, 'diff_abs_add')
     matrix = distmatrix.create(self.dna_records.id_list, dist)
     data = [
         '   3', 'seq1       0.0000000 0.0810458 0.0507937',
         'seq2       0.0810458 0.0000000 0.0526611',
         'seq3       0.0507937 0.0526611 0.0000000'
     ]
     self.assertEqual(matrix.format(), "\n".join(data))
Ejemplo n.º 20
0
 def test_create_matrix(self):
     l = [[3, 6, 4, 1, 3, 4, 3, 0, 1, 1, 6, 4, 5, 0, 3, 4],
          [0, 3, 0, 3, 0, 0, 0, 2, 9, 0, 3, 3, 0, 6, 3, 6],
          [9, 0, 0, 3, 0, 0, 0, 2, 6, 0, 3, 3, 0, 3, 3, 3]]
     vector = np.array(l)
     dist = word_distance.Distance(vector, 'minkowski')
     id_list = ['seq1', 'seq2', 'seq3']
     matrix = distmatrix.create(id_list, dist)
     exp = [
         '   3', 'seq1       0.0000000 14.6969385 14.1774469',
         'seq2       14.6969385 0.0000000 10.8166538',
         'seq3       14.1774469 10.8166538 0.0000000'
     ]
     self.assertEqual(matrix.format(), "\n".join(exp))
Ejemplo n.º 21
0
def main():
    parser = get_parser()
    args = validate_args(parser)

    seq_records = seqrecords.read_fasta(args.fasta)

    if args.word_size:
        p = word_pattern.create(seq_records.seq_list, args.word_size)
    else:
        p = word_pattern.read(args.word_pattern)

    veccls = {'counts': word_vector.Counts, 'freqs': word_vector.Freqs}
    vecclsw = {
        'counts': word_vector.CountsWeight,
        'freqs': word_vector.FreqsWeight
    }

    if args.vector == 'counts' or args.vector == 'freqs':
        if args.char_weights is None:
            vec = veccls[args.vector](seq_records.length_list, p)
        else:
            weightmodel = word_vector.WeightModel(
                char_weights=args.char_weights)
            vec = vecclsw[args.vector](seq_records.length_list, p, weightmodel)
    else:
        if args.alphabet_size:
            freqmodel = word_vector.EqualFreqs(
                alphabet_size=args.alphabet_size)
        else:
            freqmodel = word_vector.EquilibriumFreqs(args.char_freqs)
        vec = word_vector.FreqsStd(seq_records.length_list, p, freqmodel)

    dist = word_distance.Distance(vec, args.distance)
    matrix = distmatrix.create(seq_records.id_list, dist)

    if args.out:
        oh = open(args.out, 'w')
        matrix.write_to_file(oh, args.outfmt)
        oh.close()
    else:
        matrix.display(args.outfmt)
Ejemplo n.º 22
0
def main():
    parser = get_parser()
    args = validate_args(parser)

    seq_records = seqrecords.read_fasta(args.fasta)

    if args.word_patterns:
        l = args.word_patterns
    else:
        l = []
        for i in range(args.word_size, args.word_size - 3, -1):
            p = word_pattern.create(seq_records.seq_list, i)
            l.append(p)

    compos = word_vector.Composition(seq_records.length_list, *l)
    dist = word_distance.Distance(compos, 'angle_cos_diss')
    matrix = distmatrix.create(seq_records.id_list, dist)

    if args.out:
        oh = open(args.out, 'w')
        matrix.write_to_file(oh, args.outfmt)
        oh.close()
    else:
        matrix.display(args.outfmt)
Ejemplo n.º 23
0
 def test_minkowski_throws_exception(self):
     dist = word_distance.Distance(self.freqs, 'minkowski')
     with self.assertRaises(Exception) as context:
         dist.pwdist_minkowski(0, 1, 0.2)
     self.assertIn('p must be at least 1', str(context.exception))