Example #1
0
def main():
    parser = get_parser()
    args = validate_args(parser)

    seq_records = seqrecords.read_fasta(args.fasta)
    if args.word_size:
        p = word_pattern.create(seq_records.seq_list, args.word_size)
    else:
        p = word_pattern.read(args.word_pattern)

    if args.reduce_alphabet:
        p = p.reduce_alphabet(seqcontent.get_reduced_alphabet(args.molecule))
    if args.merge_revcomp:
        p = p.merge_revcomp()

    freqs = word_vector.Freqs(seq_records.length_list, p)

    dist = word_distance.Distance(freqs, args.distance)
    matrix = distmatrix.create(seq_records.id_list, dist)

    if args.out:
        oh = open(args.out, 'w')
        matrix.write_to_file(oh, args.outfmt)
        oh.close()
    else:
        matrix.display(args.outfmt)
Example #2
0
 def __init__(self, *args, **kwargs):
     super(DistanceTest, self).__init__(*args, **kwargs)
     utils.ModulesCommonTest.set_test_data()
     self.pattern = word_pattern.create(self.dna_records.seq_list, 2)
     self.counts = word_vector.Counts(self.dna_records.length_list,
                                      self.pattern)
     self.freqs = word_vector.Freqs(self.dna_records.length_list,
                                    self.pattern)
Example #3
0
 def test_word_pattern_create_wordsize1_wordposFalse(self):
     p = word_pattern.create(self.dna_records.seq_list,
                             word_size=1,
                             wordpos=False)
     exp = [
         "18\t3\tA 0:8 1:4 2:6",
         "15\t3\tG 0:6 1:6 2:3",
         "13\t3\tC 0:6 1:3 2:4",
         "12\t3\tT 0:5 1:5 2:2"
     ]
     self.assertEqual(p.format(), "\n".join(exp))
Example #4
0
    def test_reduce_alphabet_wordsize1(self):
        alphabet_dict = {'A': 'R', 'C': 'Y', 'T': 'Y', 'G': 'R'}
        p = word_pattern.create(self.dna_records.seq_list,
                                word_size=1,
                                wordpos=False)

        expected_format = [
            '33\t3\tR 0:14 1:10 2:9',
            '25\t3\tY 0:11 1:8 2:6'
        ]
        p = p.reduce_alphabet(alphabet_dict)
        self.assertEqual(p.format(), "\n".join(expected_format))
Example #5
0
 def __init__(self, *args, **kwargs):
     super(DistanceTest, self).__init__(*args, **kwargs)
     utils.ModulesCommonTest.set_test_data()
     self.patterns = []
     self.counts = []
     self.freqs = []
     for i in range(1, 5):
         p = word_pattern.create(self.pep_records.seq_list, i)
         self.patterns.append(p)
         c = word_vector.Counts(self.pep_records.length_list, p)
         self.counts.append(c)
         f = word_vector.Freqs(self.pep_records.length_list, p)
         self.freqs.append(f)
Example #6
0
 def test_reduce_alphabet_wordsize2(self):
     alphabet_dict = {'A': 'R', 'C': 'Y', 'T': 'Y', 'G': 'R'}
     p = word_pattern.create(self.dna_records.seq_list,
                             word_size=2,
                             wordpos=False)
     expected_format = [
         '17\t3\tRR 0:5 1:7 2:5',
         '15\t3\tYR 0:8 1:3 2:4',
         '13\t3\tRY 0:8 1:2 2:3',
         '10\t3\tYY 0:3 1:5 2:2'
     ]
     p = p.reduce_alphabet(alphabet_dict)
     self.assertEqual(p.format(), "\n".join(expected_format))
Example #7
0
    def test_input_output_file_pattern(self):

        for wordpos in [True, False]:
            p1 = word_pattern.create(self.dna_records.seq_list,
                                     word_size=1,
                                     wordpos=wordpos)
            oh = open(utils.get_test_data('pattern.txt'), 'w')
            oh.write(p1.format())
            oh.close()
            fh = open(utils.get_test_data('pattern.txt'))
            p2 = word_pattern.read(fh)
            fh.close()
            self.assertEqual(p1.format(), p2.format())
        os.remove(utils.get_test_data('pattern.txt'))
Example #8
0
 def test_word_pattern_format_teiresias(self):
     p = word_pattern.create(self.dna_records.seq_list,
                             word_size=1,
                             wordpos=True)
     exp = [
         '18\t3\tA 0 0 0 1 0 5 0 8 0 12 0 13 0 17 0 22 1 2 1 7 1 11 1 ' +
         '15 2 2 2 6 2 7 2 9 2 11 2 14',
         '15\t3\tG 0 3 0 11 0 15 0 20 0 23 0 24 1 3 1 4 1 5 1 6 1 16 1 ' +
         '17 2 3 2 4 2 5',
         '13\t3\tC 0 2 0 6 0 7 0 14 0 18 0 19 1 0 1 8 1 13 2 0 2 8 2 12 ' +
         '2 13',
         '12\t3\tT 0 4 0 9 0 10 0 16 0 21 1 1 1 9 1 10 1 12 1 14 2 ' +
         '1 2 10'
     ]
     self.assertEqual(p.format('teiresias'), "\n".join(exp))
Example #9
0
 def test_equilibrium_freqs_pattern2(self):
     p = word_pattern.create(self.dna_records.seq_list, 2, True)
     dna_freqs = {'A': 0.24, 'C': 0.26, 'G': 0.23, 'T': 0.27}
     freqmodel = word_vector.EquilibriumFreqs(dna_freqs)
     freqs_std = word_vector.FreqsStd(self.dna_records.length_list, p,
                                      freqmodel)
     exp = [
         "TA\t0.111 0.186 0.166", "GG\t0.033 0.219 0.147",
         "AC\t0.151 0.063 0.169", "CT\t0.000 0.181 0.081",
         "AG\t0.040 0.132 0.089", "CA\t0.038 0.000 0.169",
         "GA\t0.040 0.066 0.089", "AT\t0.037 0.062 0.083",
         "AA\t0.062 0.000 0.070", "CC\t0.057 0.000 0.065",
         "CG\t0.115 0.000 0.000", "GT\t0.113 0.000 0.000",
         "TT\t0.028 0.046 0.000", "TC\t0.000 0.060 0.000",
         "TG\t0.038 0.000 0.000"
     ]
     self.assertEqual(freqs_std.format(), "\n".join(exp))
Example #10
0
 def test_equal_freqs_pattern2(self):
     # The result of this method is identical to that from decaf+py.
     p = word_pattern.create(self.dna_records.seq_list, 2, True)
     freq = word_vector.Freqs(self.dna_records.length_list, p)
     freqmodel = word_vector.EqualFreqs(alphabet_size=4)
     freqs_std = word_vector.FreqsStd(self.dna_records.length_list, p,
                                      freqmodel)
     exp = [
         "TA\t0.113 0.189 0.169", "AC\t0.150 0.063 0.169",
         "GG\t0.030 0.201 0.135", "CT\t0.000 0.189 0.084",
         "AG\t0.038 0.126 0.084", "CA\t0.038 0.000 0.169",
         "AT\t0.038 0.063 0.084", "GA\t0.038 0.063 0.084",
         "AA\t0.060 0.000 0.067", "CC\t0.060 0.000 0.067",
         "CG\t0.113 0.000 0.000", "GT\t0.113 0.000 0.000",
         "TT\t0.030 0.050 0.000", "TC\t0.000 0.063 0.000",
         "TG\t0.038 0.000 0.000"
     ]
     self.assertEqual(freqs_std.format(), "\n".join(exp))
Example #11
0
 def test_reduce_alphabet_wordsize1(self):
     p = word_pattern.create(self.dna_records.seq_list,
                             word_size=2,
                             wordpos=False)
     p1 = p.merge_revcomp()
     pat_list = ['AA', 'AC', 'AG', 'CC', 'CA', 'CG', 'AT', 'GA', 'TA']
     occr_list = [
         {0: 3, 1: 1, 2: 1},
         {0: 7, 1: 1, 2: 2},
         {0: 1, 1: 5, 2: 2},
         {0: 3, 1: 4, 2: 3},
         {0: 2, 2: 2},
         {0: 3},
         {0: 1, 1: 1, 2: 1},
         {0: 1, 1: 2, 2: 1},
         {0: 3, 1: 3, 2: 2}
     ]
     p2 = word_pattern.Pattern(pat_list, occr_list, [])
     self.assertEqual(p1.format(), p2.format())
Example #12
0
def main():
    parser = get_parser()
    args = validate_args(parser)

    seq_records = seqrecords.read_fasta(args.fasta)
    if args.word_size:
        p = word_pattern.create(seq_records.seq_list, args.word_size)
    else:
        p = word_pattern.read(args.word_pattern)

    bools = word_vector.Bools(seq_records.length_list, p)
    dist = word_bool_distance.Distance(bools, args.distance)
    matrix = distmatrix.create(seq_records.id_list, dist)

    if args.out:
        oh = open(args.out, 'w')
        matrix.write_to_file(oh, args.outfmt)
        oh.close()
    else:
        matrix.display(args.outfmt)
Example #13
0
def main():
    parser = get_parser()
    args = validate_args(parser)

    seq_records = seqrecords.read_fasta(args.fasta)

    if args.word_size:
        p = word_pattern.create(seq_records.seq_list, args.word_size)
    else:
        p = word_pattern.read(args.word_pattern)

    veccls = {'counts': word_vector.Counts, 'freqs': word_vector.Freqs}
    vecclsw = {
        'counts': word_vector.CountsWeight,
        'freqs': word_vector.FreqsWeight
    }

    if args.vector == 'counts' or args.vector == 'freqs':
        if args.char_weights is None:
            vec = veccls[args.vector](seq_records.length_list, p)
        else:
            weightmodel = word_vector.WeightModel(
                char_weights=args.char_weights)
            vec = vecclsw[args.vector](seq_records.length_list, p, weightmodel)
    else:
        if args.alphabet_size:
            freqmodel = word_vector.EqualFreqs(
                alphabet_size=args.alphabet_size)
        else:
            freqmodel = word_vector.EquilibriumFreqs(args.char_freqs)
        vec = word_vector.FreqsStd(seq_records.length_list, p, freqmodel)

    dist = word_distance.Distance(vec, args.distance)
    matrix = distmatrix.create(seq_records.id_list, dist)

    if args.out:
        oh = open(args.out, 'w')
        matrix.write_to_file(oh, args.outfmt)
        oh.close()
    else:
        matrix.display(args.outfmt)
Example #14
0
def main():
    parser = get_parser()
    args = validate_args(parser)

    seq_records = seqrecords.read_fasta(args.fasta)

    patterns = []
    for i in range(args.min_word_size, args.max_word_size + 1):
        p = word_pattern.create(seq_records.seq_list, i)
        patterns.append(p)

    vecs = []
    if args.char_weights is not None:
        weightmodel = word_vector.WeightModel(char_weights=args.char_weights)
        vecklas = {
            'counts': word_vector.CountsWeight,
            'freqs': word_vector.FreqsWeight
        }[args.vector]
        kwargs = {
            'seq_lengths': seq_records.length_list,
            'weightmodel': weightmodel
        }
    else:
        vecklas = {
            'counts': word_vector.Counts,
            'freqs': word_vector.Freqs
        }[args.vector]
        kwargs = {'seq_lengths': seq_records.length_list}
    for p in patterns:
        v = vecklas(patterns=p, **kwargs)
        vecs.append(v)

    dist = word_d2.Distance(vecs)
    matrix = distmatrix.create(seq_records.id_list, dist)

    if args.out:
        oh = open(args.out, 'w')
        matrix.write_to_file(oh, args.outfmt)
        oh.close()
    else:
        matrix.display(args.outfmt)
Example #15
0
def main():
    parser = get_parser()
    args = validate_args(parser)

    seq_records = seqrecords.read_fasta(args.fasta)
    if args.word_size:
        p = word_pattern.create(seq_records.seq_list, args.word_size, True)
    else:
        p = args.word_pattern

    vector = word_rtd.create_vector(seq_records.count, p)
    dist = word_rtd.Distance(vector, args.distance)

    matrix = distmatrix.create(seq_records.id_list, dist)

    if args.out:
        oh = open(args.out, 'w')
        matrix.write_to_file(oh, args.outfmt)
        oh.close()
    else:
        matrix.display(args.outfmt)
Example #16
0
 def test_word_pattern_create_wordsize2_wordposFalse(self):
     p = word_pattern.create(self.dna_records.seq_list,
                             word_size=2,
                             wordpos=False)
     exp = ["8\t3\tTA 0:3 1:3 2:2",
            "7\t3\tAC 0:4 1:1 2:2",
            "7\t3\tGG 0:1 1:4 2:2",
            "4\t3\tAG 0:1 1:2 2:1",
            "4\t2\tCT 1:3 2:1",
            "3\t3\tAT 0:1 1:1 2:1",
            "3\t3\tGA 0:1 1:1 2:1",
            "3\t2\tAA 0:2 2:1",
            "3\t2\tCA 0:1 2:2",
            "3\t2\tCC 0:2 2:1",
            "3\t1\tCG 0:3",
            "3\t1\tGT 0:3",
            "2\t2\tTT 0:1 1:1",
            "1\t1\tTC 1:1",
            "1\t1\tTG 0:1",
            ]
     self.assertEqual(p.format(), "\n".join(exp))
Example #17
0
def main():
    parser = get_parser()
    args = validate_args(parser)

    if args.teiresias:
        args.fasta.close()
        p = word_pattern.run_teiresias(args.fasta.name,
                                       w=args.word_size,
                                       l=args.l,
                                       k=args.k,
                                       output_filename=args.out)
    else:
        seq_records = seqrecords.read_fasta(args.fasta)
        args.fasta.close()
        p = word_pattern.create(seq_records.seq_list, args.word_size,
                                args.word_position)

    if args.out:
        oh = open(args.out, 'w')
        oh.write(p.format())
        oh.close()
    else:
        print(p.format())
Example #18
0
def main():
    parser = get_parser()
    args = validate_args(parser)

    seq_records = seqrecords.read_fasta(args.fasta)

    if args.word_patterns:
        l = args.word_patterns
    else:
        l = []
        for i in range(args.word_size, args.word_size - 3, -1):
            p = word_pattern.create(seq_records.seq_list, i)
            l.append(p)

    compos = word_vector.Composition(seq_records.length_list, *l)
    dist = word_distance.Distance(compos, 'angle_cos_diss')
    matrix = distmatrix.create(seq_records.id_list, dist)

    if args.out:
        oh = open(args.out, 'w')
        matrix.write_to_file(oh, args.outfmt)
        oh.close()
    else:
        matrix.display(args.outfmt)
Example #19
0
elif method == "ncd":
    dist = ncd.Distance(seq_records)
    matrix = distmatrix.create(seq_records.id_list, dist)
    matrix.display()

elif method == "wmetric":
    matrix = subsmat.get('blosum62')
    dist = wmetric.Distance(seq_records, matrix)
    matrix = distmatrix.create(seq_records.id_list, dist)
    matrix.display()

elif method == "d2":
    patterns = []
    for i in range(1, 5 + 1):
        p = word_pattern.create(seq_records.seq_list, i)
        patterns.append(p)

    counts = []
    for p in patterns:
        c = word_vector.Counts(seq_records.length_list, p)
        counts.append(c)

    countsweight = []
    weights = seqcontent.get_weights('protein')
    weightmodel = word_vector.WeightModel(weights)
    for p in patterns:
        c = word_vector.CountsWeight(seq_records, p, weightmodel)
        countsweight.append(c)
    dist = word_d2.Distance(countsweight)
    matrix = distmatrix.create(seq_records.id_list, dist)
Example #20
0
 def __init__(self, *args, **kwargs):
     super(WordVectorTest, self).__init__(*args, **kwargs)
     utils.ModulesCommonTest.set_test_data()
     self.pattern1 = word_pattern.create(self.dna_records.seq_list, 1)
     self.pattern2 = word_pattern.create(self.dna_records.seq_list, 2)
     self.pattern3 = word_pattern.create(self.dna_records.seq_list, 3)
 def __init__(self, *args, **kwargs):
     super(Test, self).__init__(*args, **kwargs)
     utils.ModulesCommonTest.set_test_data()
     self.p = word_pattern.create(self.pep_records.seq_list, 2)