def test_count(self): counts = utils.counts(utils.SEQUENCES, 8) filename = self.empty() with open(self.fasta(utils.SEQUENCES)) as fasta_handle: with utils.open_profile(filename, 'w') as profile_handle: kmer.count([fasta_handle], profile_handle, 8) utils.test_profile_file(filename, counts, 8)
def test_smooth(self): # See test_kdistlib.test_ProfileDistance_dynamic_smooth counts_left = Counter([ 'AC', 'AG', 'AT', 'CA', 'CC', 'CG', 'CT', 'GA', 'GC', 'GG', 'GT', 'TA', 'TG', 'TT' ]) counts_right = Counter([ 'AC', 'AT', 'CA', 'CC', 'CG', 'CT', 'GA', 'GC', 'GG', 'GT', 'TA', 'TC', 'TG', 'TT' ]) filename_left = self.empty() filename_right = self.empty() with utils.open_profile(self.profile(counts_left, 2)) as handle_left: with utils.open_profile(self.profile(counts_right, 2)) as handle_right: with utils.open_profile(filename_left, 'w') as out_left: with utils.open_profile(filename_right, 'w') as out_right: kmer.smooth(handle_left, handle_right, out_left, out_right, summary='min') counts_left = Counter([ 'AA', 'AA', 'AA', 'CA', 'CC', 'CG', 'CT', 'GA', 'GC', 'GG', 'GT', 'TA', 'TA', 'TA' ]) counts_right = Counter([ 'AA', 'AA', 'CA', 'CC', 'CG', 'CT', 'GA', 'GC', 'GG', 'GT', 'TA', 'TA', 'TA', 'TA' ]) utils.test_profile_file(filename_left, counts_left, 2) utils.test_profile_file(filename_right, counts_right, 2)
def test_scale(self): counts_left = utils.counts(utils.SEQUENCES_LEFT, 8) counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8) filename_left = self.empty() filename_right = self.empty() with utils.open_profile(self.profile(counts_left, 8)) as handle_left: with utils.open_profile(self.profile(counts_right, 8)) as handle_right: with utils.open_profile(filename_left, 'w') as out_left: with utils.open_profile(filename_right, 'w') as out_right: kmer.scale(handle_left, handle_right, out_left, out_right) if sum(counts_left.values()) < sum(counts_right.values()): scale_left = sum(counts_right.values()) / sum(counts_left.values()) scale_right = 1.0 else: scale_left = 1.0 scale_right = sum(counts_left.values()) / sum(counts_right.values()) for s in counts_left: counts_left[s] *= scale_left for s in counts_right: counts_right[s] *= scale_right utils.test_profile_file(filename_left, counts_left, 8) utils.test_profile_file(filename_right, counts_right, 8)
def test_convert(self): counts = utils.counts(utils.SEQUENCES, 8) filename = self.empty() with open(self.profile_old_format(counts, 8)) as handle: with utils.open_profile(filename, 'w') as profile_handle: kmer.convert([handle], profile_handle) utils.test_profile_file(filename, counts, 8)
def test_balance(self): counts = utils.counts(utils.SEQUENCES, 8) filename = self.empty() with utils.open_profile(self.profile(counts, 8)) as input_handle: with utils.open_profile(filename, 'w') as output_handle: kmer.balance(input_handle, output_handle) counts.update(dict((utils.reverse_complement(s), c) for s, c in counts.items())) utils.test_profile_file(filename, counts, 8)
def test_count_by_record(self): counts_by_record = [utils.counts(record, 8) for record in utils.SEQUENCES] names = [str(i) for i, _ in enumerate(counts_by_record)] filename = self.empty() with open(self.fasta(utils.SEQUENCES, names=names)) as fasta_handle: with utils.open_profile(filename, 'w') as profile_handle: kmer.count([fasta_handle], profile_handle, 8, by_record=True) for name, counts in zip(names, counts_by_record): utils.test_profile_file(filename, counts, 8, name=name)
def test_profile_save(self): counts = utils.counts(utils.SEQUENCES, 4) profile = klib.Profile(utils.as_array(counts, 4)) filename = self.empty() with utils.open_profile(filename, 'w') as profile_handle: profile.save(profile_handle) utils.test_profile_file(filename, counts, 4)
def test_merge(self): counts_left = utils.counts(utils.SEQUENCES_LEFT, 8) counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8) filename = self.empty() with utils.open_profile(self.profile(counts_left, 8)) as handle_left: with utils.open_profile(self.profile(counts_right, 8)) as handle_right: with utils.open_profile(filename, 'w') as profile_handle: kmer.merge(handle_left, handle_right, profile_handle) utils.test_profile_file(filename, counts_left + counts_right, 8)
def test_count_multi(self): counts_left = utils.counts(utils.SEQUENCES_LEFT, 8) counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8) filename = self.empty() with open(self.fasta(utils.SEQUENCES_LEFT)) as handle_left: with open(self.fasta(utils.SEQUENCES_RIGHT)) as handle_right: with utils.open_profile(filename, 'w') as profile_handle: kmer.count([handle_left, handle_right], profile_handle, 8, names=['a', 'b']) utils.test_profile_file(filename, counts_left, 8, name='a') utils.test_profile_file(filename, counts_right, 8, name='b')
def test_cat_prefixes(self): counts_a = utils.counts(utils.SEQUENCES_LEFT, 8) counts_b = utils.counts(utils.SEQUENCES_RIGHT, 8) filename = self.empty() with utils.open_profile(self.profile(counts_a, 8, name='X')) as handle_a: with utils.open_profile(self.profile(counts_b, 8, name='X')) as handle_b: with utils.open_profile(filename, 'w') as profile_handle: kmer.cat([handle_a, handle_b], profile_handle, prefixes=['a_', 'b_']) utils.test_profile_file(filename, counts_a, 8, name='a_X') utils.test_profile_file(filename, counts_b, 8, name='b_X')
def test_shrink(self): counts = utils.counts(utils.SEQUENCES, 8) filename = self.empty() with utils.open_profile(self.profile(counts, 8)) as input_handle: with utils.open_profile(filename, 'w') as output_handle: kmer.shrink(input_handle, output_handle, 1) counts = Counter(dict((t, sum(counts[u] for u in counts if u.startswith(t))) for t in set(s[:-1] for s in counts))) utils.test_profile_file(filename, counts, 7)
def test_shuffle(self): # See test_klib.profile_shuffle counts = utils.counts(utils.SEQUENCES, 2) filename = self.empty() with utils.open_profile(self.profile(counts, 2)) as input_handle: with utils.open_profile(filename, 'w') as output_handle: np.random.seed(100) kmer.shuffle(input_handle, output_handle) counts = dict(zip([''.join(s) for s in itertools.product('ACGT', repeat=2)], [13, 7, 6, 18, 12, 1, 13, 17, 16, 12, 23, 27, 24, 17, 18, 12])) utils.test_profile_file(filename, counts, 2)
def test_count_multi_by_record(self): counts_by_record_left = [utils.counts(record, 8) for record in utils.SEQUENCES_LEFT] counts_by_record_right = [utils.counts(record, 8) for record in utils.SEQUENCES_RIGHT] names_left = [str(i) for i, _ in enumerate(counts_by_record_left)] names_right = [str(i) for i, _ in enumerate(counts_by_record_right)] filename = self.empty() with open(self.fasta(utils.SEQUENCES_LEFT, names=names_left)) as handle_left: with open(self.fasta(utils.SEQUENCES_RIGHT, names=names_right)) as handle_right: with utils.open_profile(filename, 'w') as profile_handle: kmer.count([handle_left, handle_right], profile_handle, 8, names=['a', 'b'], by_record=True) for name, counts in zip(names_left, counts_by_record_left): utils.test_profile_file(filename, counts, 8, name='a_' + name) for name, counts in zip(names_right, counts_by_record_right): utils.test_profile_file(filename, counts, 8, name='b_' + name)
def test_merge_custom_name(self): counts_left = utils.counts(utils.SEQUENCES_LEFT, 8) counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8) filename = self.empty() with utils.open_profile(self.profile(counts_left, 8)) as handle_left: with utils.open_profile(self.profile(counts_right, 8)) as handle_right: with utils.open_profile(filename, 'w') as profile_handle: kmer.merge(handle_left, handle_right, profile_handle, custom_merger='numpy.multiply') counts_mult = Counter(dict((s, counts_left[s] * counts_right[s]) for s in set(counts_left) & set(counts_right))) utils.test_profile_file(filename, counts_mult, 8)
def test_merge_custom_expr(self): counts_left = utils.counts(utils.SEQUENCES_LEFT, 8) counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8) filename = self.empty() with utils.open_profile(self.profile(counts_left, 8)) as handle_left: with utils.open_profile(self.profile(counts_right, 8)) as handle_right: with utils.open_profile(filename, 'w') as profile_handle: kmer.merge(handle_left, handle_right, profile_handle, custom_merger='(left + right) * np.logical_xor(left, right)') counts_xor = counts_left + counts_right for s in set(counts_left) & set(counts_right): del counts_xor[s] utils.test_profile_file(filename, counts_xor, 8)
def test_positive(self): counts_left = utils.counts(utils.SEQUENCES_LEFT, 8) counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8) filename_left = self.empty() filename_right = self.empty() with utils.open_profile(self.profile(counts_left, 8)) as handle_left: with utils.open_profile(self.profile(counts_right, 8)) as handle_right: with utils.open_profile(filename_left, 'w') as out_left: with utils.open_profile(filename_right, 'w') as out_right: kmer.positive(handle_left, handle_right, out_left, out_right) utils.test_profile_file(filename_left, Counter(s for s in counts_left.elements() if s in counts_right), 8) utils.test_profile_file(filename_right, Counter(s for s in counts_right.elements() if s in counts_left), 8)
def test_smooth(self): # See test_kdistlib.test_ProfileDistance_dynamic_smooth counts_left = Counter(['AC', 'AG', 'AT', 'CA', 'CC', 'CG', 'CT', 'GA', 'GC', 'GG', 'GT', 'TA', 'TG', 'TT']) counts_right = Counter(['AC', 'AT', 'CA', 'CC', 'CG', 'CT', 'GA', 'GC', 'GG', 'GT', 'TA', 'TC', 'TG', 'TT']) filename_left = self.empty() filename_right = self.empty() with utils.open_profile(self.profile(counts_left, 2)) as handle_left: with utils.open_profile(self.profile(counts_right, 2)) as handle_right: with utils.open_profile(filename_left, 'w') as out_left: with utils.open_profile(filename_right, 'w') as out_right: kmer.smooth(handle_left, handle_right, out_left, out_right, summary='min') counts_left = Counter(['AA', 'AA', 'AA', 'CA', 'CC', 'CG', 'CT', 'GA', 'GC', 'GG', 'GT', 'TA', 'TA', 'TA']) counts_right = Counter(['AA', 'AA', 'CA', 'CC', 'CG', 'CT', 'GA', 'GC', 'GG', 'GT', 'TA', 'TA', 'TA', 'TA']) utils.test_profile_file(filename_left, counts_left, 2) utils.test_profile_file(filename_right, counts_right, 2)