Exemple #1
0
 def test_weighted_counts_pattern1(self):
     weights = {'A': 2, 'C': 2, 'G': 2, 'T': 2}
     weightmodel = word_vector.WeightModel(weights)
     cw = word_vector.CountsWeight(self.dna_records.length_list,
                                   self.pattern1, weightmodel)
     exp = ["A\t16 8 12", "G\t12 12 6", "C\t12 6 8", "T\t10 10 4"]
     self.assertEqual(cw.format(0), "\n".join(exp))
     for i in range(len(cw.data)):
         self.assertEqual(sum(cw.data[i]), cw.seq_lengths[i] * 2)
Exemple #2
0
 def test_weighted_freqs_pattern1(self):
     weights = {'A': 2, 'C': 2, 'G': 2, 'T': 2}
     weightmodel = word_vector.WeightModel(weights)
     fw = word_vector.FreqsWeight(self.dna_records.length_list,
                                  self.pattern1, weightmodel)
     exp = [
         "A\t0.640 0.444 0.800", "G\t0.480 0.667 0.400",
         "C\t0.480 0.333 0.533", "T\t0.400 0.556 0.267"
     ]
     self.assertEqual(fw.format(), "\n".join(exp))
Exemple #3
0
 def test_weighted_counts_pattern2(self):
     weights = {'A': 2, 'C': 2, 'G': 2, 'T': 2}
     weightmodel = word_vector.WeightModel(weights)
     cw = word_vector.CountsWeight(self.dna_records.length_list,
                                   self.pattern2, weightmodel)
     exp = [
         "TA\t12 12 8", "AC\t16 4 8", "GG\t4 16 8", "AG\t4 8 4",
         "CT\t0 12 4", "AA\t8 0 4", "AT\t4 4 4", "CA\t4 0 8", "CC\t8 0 4",
         "CG\t12 0 0", "GA\t4 4 4", "GT\t12 0 0", "TT\t4 4 0", "TC\t0 4 0",
         "TG\t4 0 0"
     ]
     self.assertEqual(cw.format(0), "\n".join(exp))
Exemple #4
0
 def test_weighted_freqs_pattern1(self):
     weights = {'A': 2, 'C': 2, 'G': 2, 'T': 2}
     weightmodel = word_vector.WeightModel(weights)
     fw = word_vector.FreqsWeight(self.dna_records.length_list,
                                  self.pattern2, weightmodel)
     exp = [
         "TA\t0.500 0.706 0.571", "GG\t0.167 0.941 0.571",
         "AC\t0.667 0.235 0.571", "CT\t0.000 0.706 0.286",
         "AG\t0.167 0.471 0.286", "CA\t0.167 0.000 0.571",
         "AT\t0.167 0.235 0.286", "GA\t0.167 0.235 0.286",
         "AA\t0.333 0.000 0.286", "CC\t0.333 0.000 0.286",
         "CG\t0.500 0.000 0.000", "GT\t0.500 0.000 0.000",
         "TT\t0.167 0.235 0.000", "TC\t0.000 0.235 0.000",
         "TG\t0.167 0.000 0.000"
     ]
     self.assertEqual(fw.format(), "\n".join(exp))
Exemple #5
0
def main():
    parser = get_parser()
    args = validate_args(parser)

    seq_records = seqrecords.read_fasta(args.fasta)

    if args.word_size:
        p = word_pattern.create(seq_records.seq_list, args.word_size)
    else:
        p = word_pattern.read(args.word_pattern)

    veccls = {'counts': word_vector.Counts, 'freqs': word_vector.Freqs}
    vecclsw = {
        'counts': word_vector.CountsWeight,
        'freqs': word_vector.FreqsWeight
    }

    if args.vector == 'counts' or args.vector == 'freqs':
        if args.char_weights is None:
            vec = veccls[args.vector](seq_records.length_list, p)
        else:
            weightmodel = word_vector.WeightModel(
                char_weights=args.char_weights)
            vec = vecclsw[args.vector](seq_records.length_list, p, weightmodel)
    else:
        if args.alphabet_size:
            freqmodel = word_vector.EqualFreqs(
                alphabet_size=args.alphabet_size)
        else:
            freqmodel = word_vector.EquilibriumFreqs(args.char_freqs)
        vec = word_vector.FreqsStd(seq_records.length_list, p, freqmodel)

    dist = word_distance.Distance(vec, args.distance)
    matrix = distmatrix.create(seq_records.id_list, dist)

    if args.out:
        oh = open(args.out, 'w')
        matrix.write_to_file(oh, args.outfmt)
        oh.close()
    else:
        matrix.display(args.outfmt)
Exemple #6
0
def main():
    parser = get_parser()
    args = validate_args(parser)

    seq_records = seqrecords.read_fasta(args.fasta)

    patterns = []
    for i in range(args.min_word_size, args.max_word_size + 1):
        p = word_pattern.create(seq_records.seq_list, i)
        patterns.append(p)

    vecs = []
    if args.char_weights is not None:
        weightmodel = word_vector.WeightModel(char_weights=args.char_weights)
        vecklas = {
            'counts': word_vector.CountsWeight,
            'freqs': word_vector.FreqsWeight
        }[args.vector]
        kwargs = {
            'seq_lengths': seq_records.length_list,
            'weightmodel': weightmodel
        }
    else:
        vecklas = {
            'counts': word_vector.Counts,
            'freqs': word_vector.Freqs
        }[args.vector]
        kwargs = {'seq_lengths': seq_records.length_list}
    for p in patterns:
        v = vecklas(patterns=p, **kwargs)
        vecs.append(v)

    dist = word_d2.Distance(vecs)
    matrix = distmatrix.create(seq_records.id_list, dist)

    if args.out:
        oh = open(args.out, 'w')
        matrix.write_to_file(oh, args.outfmt)
        oh.close()
    else:
        matrix.display(args.outfmt)
Exemple #7
0
    matrix.display()

elif method == "d2":
    patterns = []
    for i in range(1, 5 + 1):
        p = word_pattern.create(seq_records.seq_list, i)
        patterns.append(p)

    counts = []
    for p in patterns:
        c = word_vector.Counts(seq_records.length_list, p)
        counts.append(c)

    countsweight = []
    weights = seqcontent.get_weights('protein')
    weightmodel = word_vector.WeightModel(weights)
    for p in patterns:
        c = word_vector.CountsWeight(seq_records, p, weightmodel)
        countsweight.append(c)
    dist = word_d2.Distance(countsweight)
    matrix = distmatrix.create(seq_records.id_list, dist)
    matrix.display()

elif method == "lempelziv":
    distance = lempelziv.Distance(seq_records)
    l = ['d', 'd_star', 'd1', 'd1_star', 'd1_star2']
    for el in l:
        distance.set_disttype(el)
        matrix = distmatrix.create(seq_records.id_list, distance)
        matrix.display()
Exemple #8
0
 def test_weightmodel_invalid_wtype(self):
     weights = {'A': 2, 'C': 2, 'G': 2, 'T': 2}
     with self.assertRaises(Exception) as context:
         weightmodel = word_vector.WeightModel(weights, 'nonexistent')
     self.assertIn('unknown weight model', str(context.exception))