Exemple #1
0
 def test_counts_from1_to1(self):
     dist = word_d2.Distance([self.counts[0]])
     matrix = distmatrix.create(self.pep_records.id_list, dist)
     exp = [
         '   4',
         'seq1       0 37 57 140',
         'seq2       37 0 28 137',
         'seq3       57 28 0 111',
         'seq4       140 137 111 0'
     ]
     self.assertEqual(matrix.format(decimal_places=0), "\n".join(exp))
Exemple #2
0
 def test_counts_from1_to4(self):
     dist = word_d2.Distance(self.counts)
     matrix = distmatrix.create(self.pep_records.id_list, dist)
     exp = [
         '   4',
         'seq1       0 130 236 286',
         'seq2       130 0 142 258',
         'seq3       236 142 0 212',
         'seq4       286 258 212 0'
     ]
     self.assertEqual(matrix.format(decimal_places=0), "\n".join(exp))
Exemple #3
0
    def test_freqs_from1_to4(self):
        dist = word_d2.Distance(self.freqs)
        matrix = distmatrix.create(self.pep_records.id_list, dist)
        exp = [
            '   4',
            'seq1       0.0000000 0.0313590 0.0573154 0.1020235',
            'seq2       0.0313590 0.0000000 0.0373677 0.0907196',
            'seq3       0.0573154 0.0373677 0.0000000 0.0870581',
            'seq4       0.1020235 0.0907196 0.0870581 0.0000000'

        ]
        self.assertEqual(matrix.format(), "\n".join(exp))
Exemple #4
0
    def test_freqs_from1_to4_d2_squareroot(self):
        dist = word_d2.Distance(self.freqs)
        dist.set_disttype('d2_squareroot')
        matrix = distmatrix.create(self.pep_records.id_list, dist)
        exp = [
            "   4",
            "seq1       0.0000000 0.1770847 0.2394063 0.3194113",
            "seq2       0.1770847 0.0000000 0.1933073 0.3011969",
            "seq3       0.2394063 0.1933073 0.0000000 0.2950560",
            "seq4       0.3194113 0.3011969 0.2950560 0.0000000"

        ]
        self.assertEqual(matrix.format(), "\n".join(exp))
Exemple #5
0
def main():
    parser = get_parser()
    args = validate_args(parser)

    seq_records = seqrecords.read_fasta(args.fasta)

    patterns = []
    for i in range(args.min_word_size, args.max_word_size + 1):
        p = word_pattern.create(seq_records.seq_list, i)
        patterns.append(p)

    vecs = []
    if args.char_weights is not None:
        weightmodel = word_vector.WeightModel(char_weights=args.char_weights)
        vecklas = {
            'counts': word_vector.CountsWeight,
            'freqs': word_vector.FreqsWeight
        }[args.vector]
        kwargs = {
            'seq_lengths': seq_records.length_list,
            'weightmodel': weightmodel
        }
    else:
        vecklas = {
            'counts': word_vector.Counts,
            'freqs': word_vector.Freqs
        }[args.vector]
        kwargs = {'seq_lengths': seq_records.length_list}
    for p in patterns:
        v = vecklas(patterns=p, **kwargs)
        vecs.append(v)

    dist = word_d2.Distance(vecs)
    matrix = distmatrix.create(seq_records.id_list, dist)

    if args.out:
        oh = open(args.out, 'w')
        matrix.write_to_file(oh, args.outfmt)
        oh.close()
    else:
        matrix.display(args.outfmt)
Exemple #6
0
    matrix.display()

elif method == "d2":
    patterns = []
    for i in range(1, 5 + 1):
        p = word_pattern.create(seq_records.seq_list, i)
        patterns.append(p)

    counts = []
    for p in patterns:
        c = word_vector.Counts(seq_records.length_list, p)
        counts.append(c)

    countsweight = []
    weights = seqcontent.get_weights('protein')
    weightmodel = word_vector.WeightModel(weights)
    for p in patterns:
        c = word_vector.CountsWeight(seq_records, p, weightmodel)
        countsweight.append(c)
    dist = word_d2.Distance(countsweight)
    matrix = distmatrix.create(seq_records.id_list, dist)
    matrix.display()

elif method == "lempelziv":
    distance = lempelziv.Distance(seq_records)
    l = ['d', 'd_star', 'd1', 'd1_star', 'd1_star2']
    for el in l:
        distance.set_disttype(el)
        matrix = distmatrix.create(seq_records.id_list, distance)
        matrix.display()
Exemple #7
0
 def test_set_disttype_throws_exception(self):
     dist = word_d2.Distance(self.freqs)
     with self.assertRaises(Exception) as context:
         dist.set_disttype('nonexistent')
     self.assertIn('unknown disttype', str(context.exception))