def test_counts_from1_to1(self): dist = word_d2.Distance([self.counts[0]]) matrix = distmatrix.create(self.pep_records.id_list, dist) exp = [ ' 4', 'seq1 0 37 57 140', 'seq2 37 0 28 137', 'seq3 57 28 0 111', 'seq4 140 137 111 0' ] self.assertEqual(matrix.format(decimal_places=0), "\n".join(exp))
def test_counts_from1_to4(self): dist = word_d2.Distance(self.counts) matrix = distmatrix.create(self.pep_records.id_list, dist) exp = [ ' 4', 'seq1 0 130 236 286', 'seq2 130 0 142 258', 'seq3 236 142 0 212', 'seq4 286 258 212 0' ] self.assertEqual(matrix.format(decimal_places=0), "\n".join(exp))
def test_freqs_from1_to4(self): dist = word_d2.Distance(self.freqs) matrix = distmatrix.create(self.pep_records.id_list, dist) exp = [ ' 4', 'seq1 0.0000000 0.0313590 0.0573154 0.1020235', 'seq2 0.0313590 0.0000000 0.0373677 0.0907196', 'seq3 0.0573154 0.0373677 0.0000000 0.0870581', 'seq4 0.1020235 0.0907196 0.0870581 0.0000000' ] self.assertEqual(matrix.format(), "\n".join(exp))
def test_freqs_from1_to4_d2_squareroot(self): dist = word_d2.Distance(self.freqs) dist.set_disttype('d2_squareroot') matrix = distmatrix.create(self.pep_records.id_list, dist) exp = [ " 4", "seq1 0.0000000 0.1770847 0.2394063 0.3194113", "seq2 0.1770847 0.0000000 0.1933073 0.3011969", "seq3 0.2394063 0.1933073 0.0000000 0.2950560", "seq4 0.3194113 0.3011969 0.2950560 0.0000000" ] self.assertEqual(matrix.format(), "\n".join(exp))
def main(): parser = get_parser() args = validate_args(parser) seq_records = seqrecords.read_fasta(args.fasta) patterns = [] for i in range(args.min_word_size, args.max_word_size + 1): p = word_pattern.create(seq_records.seq_list, i) patterns.append(p) vecs = [] if args.char_weights is not None: weightmodel = word_vector.WeightModel(char_weights=args.char_weights) vecklas = { 'counts': word_vector.CountsWeight, 'freqs': word_vector.FreqsWeight }[args.vector] kwargs = { 'seq_lengths': seq_records.length_list, 'weightmodel': weightmodel } else: vecklas = { 'counts': word_vector.Counts, 'freqs': word_vector.Freqs }[args.vector] kwargs = {'seq_lengths': seq_records.length_list} for p in patterns: v = vecklas(patterns=p, **kwargs) vecs.append(v) dist = word_d2.Distance(vecs) matrix = distmatrix.create(seq_records.id_list, dist) if args.out: oh = open(args.out, 'w') matrix.write_to_file(oh, args.outfmt) oh.close() else: matrix.display(args.outfmt)
matrix.display() elif method == "d2": patterns = [] for i in range(1, 5 + 1): p = word_pattern.create(seq_records.seq_list, i) patterns.append(p) counts = [] for p in patterns: c = word_vector.Counts(seq_records.length_list, p) counts.append(c) countsweight = [] weights = seqcontent.get_weights('protein') weightmodel = word_vector.WeightModel(weights) for p in patterns: c = word_vector.CountsWeight(seq_records, p, weightmodel) countsweight.append(c) dist = word_d2.Distance(countsweight) matrix = distmatrix.create(seq_records.id_list, dist) matrix.display() elif method == "lempelziv": distance = lempelziv.Distance(seq_records) l = ['d', 'd_star', 'd1', 'd1_star', 'd1_star2'] for el in l: distance.set_disttype(el) matrix = distmatrix.create(seq_records.id_list, distance) matrix.display()
def test_set_disttype_throws_exception(self): dist = word_d2.Distance(self.freqs) with self.assertRaises(Exception) as context: dist.set_disttype('nonexistent') self.assertIn('unknown disttype', str(context.exception))