def test_gather(self): # ensure the test will function as expected for result in self.results: for hsps in result.hsps: assert hsps.query_id == self.gene_id gathered = refinement.gather_by_query(self.results) # make sure we only found the one we're interested assert len(gathered) == 1 assert [self.gene_id] == list(gathered.keys()) for result in gathered[self.gene_id]: assert isinstance(result, refinement.HMMResult)
def test_merges(self): results = refinement.gather_by_query(self.results)[self.gene_id] results = sorted(list(results), key=lambda result: result.query_start) hit_ids = set(result.hit_id for result in results) assert len(hit_ids) == 3 assert hit_ids == self.hit_ids # 2 hits each for 1048 and 1237, for 5 hits total assert len(results) == 5 new = refinement._merge_domain_list(results, self.hmm_lengths) # after merging, only one hit should remain for each assert len(new) == 3 # and the original list should be untouched assert len(results) == 5
def test_incomplete_regulator(self): results = refinement.gather_by_query(self.results)[self.gene_id] results = sorted(list(results), key=lambda result: result.query_start) assert len(results) == 5 regulator_id = "DUMMY:some_regulator_desc" regulator_result = refinement.HMMResult(regulator_id, 1, 2, 1e-10, 1) results.append(regulator_result) new_lengths = dict(self.hmm_lengths) new_lengths[regulator_id] = len(regulator_result) * 100 # always big # set the thresholds to be unreachable new = refinement._remove_incomplete(results, new_lengths, threshold=2., fallback=2.) # ensure the tiny, but present, regulator is still in the list assert len(new) == 1 assert new[0].hit_id == regulator_id
def test_incomplete_removal(self): results = refinement.gather_by_query(self.results)[self.gene_id] results = sorted(list(results), key=lambda result: result.query_start) assert len(results) == 5 # ensure they're all too short to be caught for result in results: assert len(result) / self.hmm_lengths[result.hit_id] < 1 new = refinement._remove_incomplete(results, self.hmm_lengths) # ensure all were removed assert not new # and original list untouched assert len(results) == 5 longest = 0 for result in results: proportional_length = len(result) / self.hmm_lengths[result.hit_id] if proportional_length > longest: longest = proportional_length assert longest < 1. / 3. # ensure the fallback works as intended new = refinement._remove_incomplete(results, self.hmm_lengths) # ensure all were removed assert not new new = refinement._remove_incomplete(results, self.hmm_lengths, fallback=longest - 0.01) # ensure the longest, and longer than the fallback, remain assert len(new) == 1 assert len(new[0]) / self.hmm_lengths[new[0].hit_id] == longest # change the fallback to 0 and ensure only one comes back new = refinement._remove_incomplete(results, self.hmm_lengths, fallback=0.) assert len(new) == 1 assert len(new[0]) / self.hmm_lengths[new[0].hit_id] == longest