def test_gather(self):
        # ensure the test will function as expected
        for result in self.results:
            for hsps in result.hsps:
                assert hsps.query_id == self.gene_id

        gathered = refinement.gather_by_query(self.results)
        # make sure we only found the one we're interested
        assert len(gathered) == 1
        assert [self.gene_id] == list(gathered.keys())

        for result in gathered[self.gene_id]:
            assert isinstance(result, refinement.HMMResult)
 def test_merges(self):
     results = refinement.gather_by_query(self.results)[self.gene_id]
     results = sorted(list(results), key=lambda result: result.query_start)
     hit_ids = set(result.hit_id for result in results)
     assert len(hit_ids) == 3
     assert hit_ids == self.hit_ids
     # 2 hits each for 1048 and 1237, for 5 hits total
     assert len(results) == 5
     new = refinement._merge_domain_list(results, self.hmm_lengths)
     # after merging, only one hit should remain for each
     assert len(new) == 3
     # and the original list should be untouched
     assert len(results) == 5
 def test_incomplete_regulator(self):
     results = refinement.gather_by_query(self.results)[self.gene_id]
     results = sorted(list(results), key=lambda result: result.query_start)
     assert len(results) == 5
     regulator_id = "DUMMY:some_regulator_desc"
     regulator_result = refinement.HMMResult(regulator_id, 1, 2, 1e-10, 1)
     results.append(regulator_result)
     new_lengths = dict(self.hmm_lengths)
     new_lengths[regulator_id] = len(regulator_result) * 100  # always big
     # set the thresholds to be unreachable
     new = refinement._remove_incomplete(results,
                                         new_lengths,
                                         threshold=2.,
                                         fallback=2.)
     # ensure the tiny, but present, regulator is still in the list
     assert len(new) == 1
     assert new[0].hit_id == regulator_id
    def test_incomplete_removal(self):
        results = refinement.gather_by_query(self.results)[self.gene_id]
        results = sorted(list(results), key=lambda result: result.query_start)
        assert len(results) == 5
        # ensure they're all too short to be caught
        for result in results:
            assert len(result) / self.hmm_lengths[result.hit_id] < 1
        new = refinement._remove_incomplete(results, self.hmm_lengths)
        # ensure all were removed
        assert not new
        # and original list untouched
        assert len(results) == 5

        longest = 0
        for result in results:
            proportional_length = len(result) / self.hmm_lengths[result.hit_id]
            if proportional_length > longest:
                longest = proportional_length

        assert longest < 1. / 3.
        # ensure the fallback works as intended
        new = refinement._remove_incomplete(results, self.hmm_lengths)
        # ensure all were removed
        assert not new

        new = refinement._remove_incomplete(results,
                                            self.hmm_lengths,
                                            fallback=longest - 0.01)
        # ensure the longest, and longer than the fallback, remain
        assert len(new) == 1
        assert len(new[0]) / self.hmm_lengths[new[0].hit_id] == longest

        # change the fallback to 0 and ensure only one comes back
        new = refinement._remove_incomplete(results,
                                            self.hmm_lengths,
                                            fallback=0.)
        assert len(new) == 1
        assert len(new[0]) / self.hmm_lengths[new[0].hit_id] == longest