def test_profile_from_file(self): counts = utils.counts(utils.SEQUENCES, 4) with utils.open_profile(self.profile(counts, 4), 'r') as profile_handle: profile = klib.Profile.from_file(profile_handle) utils.test_profile(profile, counts, 4)
def test_profile_balance(self): counts = utils.counts(utils.SEQUENCES, 8) profile = klib.Profile(utils.as_array(counts, 8)) profile.balance() counts.update( dict((utils.reverse_complement(s), c) for s, c in counts.items())) utils.test_profile(profile, counts, 8)
def test_profile_balance_palindrome(self): counts = utils.counts(['AATT'], 4) profile = klib.Profile(utils.as_array(counts, 4)) profile.balance() counts.update( dict((utils.reverse_complement(s), c) for s, c in counts.items())) utils.test_profile(profile, counts, 4)
def _test_from_fasta_by_record(self, sequences, k, prefix=None): counts_by_record = [utils.counts(sequence, k) for sequence in sequences] names = [str(i) for i, _ in enumerate(counts_by_record)] with open(self.fasta(sequences, names=names)) as fasta_handle: profiles = klib.Profile.from_fasta_by_record(fasta_handle, k, prefix=prefix) for name, counts, profile in zip(names, counts_by_record, profiles): prefixed_name = prefix + '_' + name if prefix else name utils.test_profile(profile, counts, k, name=prefixed_name)
def test_profile_balance_palindrome(self): counts = utils.counts(['AATT'], 4) profile = klib.Profile(utils.as_array(counts, 4)) profile.balance() counts.update(dict((utils.reverse_complement(s), c) for s, c in counts.items())) utils.test_profile(profile, counts, 4)
def test_profile_balance(self): counts = utils.counts(utils.SEQUENCES, 8) profile = klib.Profile(utils.as_array(counts, 8)) profile.balance() counts.update(dict((utils.reverse_complement(s), c) for s, c in counts.items())) utils.test_profile(profile, counts, 8)
def test_profile_shrink_max(self): counts = utils.counts(utils.SEQUENCES, 4) profile = klib.Profile(utils.as_array(counts, 4)) profile.shrink(3) counts = Counter( dict((t, sum(counts[u] for u in counts if u.startswith(t))) for t in set(s[:-3] for s in counts))) utils.test_profile(profile, counts, 1)
def test_profile_shrink_max(self): counts = utils.counts(utils.SEQUENCES, 4) profile = klib.Profile(utils.as_array(counts, 4)) profile.shrink(3) counts = Counter(dict((t, sum(counts[u] for u in counts if u.startswith(t))) for t in set(s[:-3] for s in counts))) utils.test_profile(profile, counts, 1)
def test_profile_merge(self): counts_left = utils.counts(utils.SEQUENCES_LEFT, 8) counts_right = utils.counts(utils.SEQUENCES_RIGHT, 8) profile_left = klib.Profile(utils.as_array(counts_left, 8)) profile_right = klib.Profile(utils.as_array(counts_right, 8)) profile_left.merge(profile_right) utils.test_profile(profile_left, counts_left + counts_right, 8)
def test_profile_shuffle(self): counts = utils.counts(utils.SEQUENCES, 2) profile = klib.Profile(utils.as_array(counts, 2)) np.random.seed(100) profile.shuffle() counts = dict(zip([''.join(s) for s in itertools.product('ACGT', repeat=2)], [13, 7, 6, 18, 12, 1, 13, 17, 16, 12, 23, 27, 24, 17, 18, 12])) utils.test_profile(profile, counts, 2)
def test_profile_shuffle(self): counts = utils.counts(utils.SEQUENCES, 2) profile = klib.Profile(utils.as_array(counts, 2)) np.random.seed(100) profile.shuffle() counts = dict( zip([''.join(s) for s in itertools.product('ACGT', repeat=2)], [13, 7, 6, 18, 12, 1, 13, 17, 16, 12, 23, 27, 24, 17, 18, 12])) utils.test_profile(profile, counts, 2)
def test_profile_save_from_file(self): counts = utils.counts(utils.SEQUENCES, 4) profile = klib.Profile(utils.as_array(counts, 4)) filename = self.empty() with utils.open_profile(filename, 'w') as profile_handle: profile.save(profile_handle) with utils.open_profile(filename, 'r') as profile_handle: profile = klib.Profile.from_file(profile_handle) utils.test_profile(profile, counts, 4)
def test_ProfileDistance_distance_unmodified(self): counts_a = utils.counts(utils.SEQUENCES_LEFT, 8) counts_b = utils.counts(utils.SEQUENCES_RIGHT, 8) profile_a = klib.Profile(utils.as_array(counts_a, 8)) profile_b = klib.Profile(utils.as_array(counts_b, 8)) k_dist = kdistlib.ProfileDistance(do_balance=True) k_dist.distance(profile_a, profile_b) utils.test_profile(profile_a, counts_a, 8) utils.test_profile(profile_b, counts_b, 8)
def _test_from_fasta_by_record(self, sequences, k, prefix=None): counts_by_record = [ utils.counts(sequence, k) for sequence in sequences ] names = [str(i) for i, _ in enumerate(counts_by_record)] with open(self.fasta(sequences, names=names)) as fasta_handle: profiles = klib.Profile.from_fasta_by_record(fasta_handle, k, prefix=prefix) for name, counts, profile in zip(names, counts_by_record, profiles): prefixed_name = prefix + '_' + name if prefix else name utils.test_profile(profile, counts, k, name=prefixed_name)
def test_profile_from_file_old_format(self): counts = utils.counts(utils.SEQUENCES, 4) with open(self.profile_old_format(counts, 4)) as handle: profile = klib.Profile.from_file_old_format(handle) utils.test_profile(profile, counts, 4)
def _test_from_fasta(self, sequences, k, name=None): counts = utils.counts(sequences, k) with open(self.fasta(sequences)) as fasta_handle: profile = klib.Profile.from_fasta(fasta_handle, k, name=name) utils.test_profile(profile, counts, k, name=name)
def test_profile(self): counts = utils.counts(utils.SEQUENCES, 8) profile = klib.Profile(utils.as_array(counts, 8)) utils.test_profile(profile, counts, 8)