def _paths(finished_paths, path_so_far, start, muts): """Return all paths from the start position through the mutation graph :param finished_paths: completed paths :param path_so_far: the accumulated mutation->mutation path so far :param start: the start position to travers the muts from :param muts: a dictionary in the form {start_position: [Mutation]} :return: All paths through adjacent mutations starting with mutations at chromosome position start """ if muts == [] or start not in muts: # No mutations available to continue this chain finished_paths.append(path_so_far) else: # return reduce(operator.concat, lambda mut: OnpCombiner._paths(path + [mut], mut.end+1, muts), []) # path = map(lambda mut: OnpQueue._paths(path + [mut], int(mut.end)+1, muts), muts[start]) for mut in muts[start]: if len(path_so_far) > 0 and not PhasingUtils.is_in_phase( path_so_far[-1], mut): # Next mutation not in phase, so stop this path here. finished_paths.append(path_so_far) else: OnpQueue._paths(finished_paths, path_so_far + [mut], int(mut.end) + 1, muts) return finished_paths
def _paths(finished_paths, path_so_far, start, muts): """Return all paths from the start position through the mutation graph :param finished_paths: completed paths :param path_so_far: the accumulated mutation->mutation path so far :param start: the start position to travers the muts from :param muts: a dictionary in the form {start_position: [Mutation]} :return: All paths through adjacent mutations starting with mutations at chromosome position start """ if muts == [] or start not in muts: # No mutations available to continue this chain finished_paths.append(path_so_far) else: # return reduce(operator.concat, lambda mut: OnpCombiner._paths(path + [mut], mut.end+1, muts), []) # path = map(lambda mut: OnpQueue._paths(path + [mut], int(mut.end)+1, muts), muts[start]) for mut in muts[start]: if len(path_so_far) > 0 and not PhasingUtils.is_in_phase(path_so_far[-1], mut): # Next mutation not in phase, so stop this path here. finished_paths.append(path_so_far) else: OnpQueue._paths(finished_paths, path_so_far + [mut], int(mut.end) + 1, muts) return finished_paths
def test_phasing_check(self): """ Test the actual phasing check. """ m1 = MutationData() m2 = MutationData() m3 = MutationData() m4 = MutationData() m5 = MutationData() m6 = MutationData() m7 = MutationData() m1.createAnnotation("phasing_id", "blah") m2.createAnnotation("phasing_id", "blah") m2.createAnnotation("phasing_genotype", "0|1") m4.createAnnotation("phasing_genotype", "0|1") m5.createAnnotation("phasing_id", "blah") m5.createAnnotation("phasing_genotype", "0|1") m6.createAnnotation("phasing_id", "blahdifferent") m6.createAnnotation("phasing_genotype", "0|1") # m1 and m2 should not be in phase, even though they share IDs, since m1 is missing the genotype info unknown_val = True self.assertFalse(PhasingUtils.is_in_phase(m1, m2, unknown_val)) self.assertFalse(PhasingUtils.is_in_phase(m2, m1, unknown_val)) unknown_val = False self.assertFalse(PhasingUtils.is_in_phase(m1, m2, unknown_val)) self.assertFalse(PhasingUtils.is_in_phase(m2, m1, unknown_val)) # m2 and m4 should not be in phase, since m4 is missing the ID unknown_val = True self.assertFalse(PhasingUtils.is_in_phase(m4, m2, unknown_val)) self.assertFalse(PhasingUtils.is_in_phase(m2, m4, unknown_val)) unknown_val = False self.assertFalse(PhasingUtils.is_in_phase(m4, m2, unknown_val)) self.assertFalse(PhasingUtils.is_in_phase(m2, m4, unknown_val)) # m3 and m7 should be unknown_val, since phasing info is missing. unknown_val = True self.assertTrue(PhasingUtils.is_in_phase(m3, m7, unknown_val) == unknown_val) self.assertTrue(PhasingUtils.is_in_phase(m7, m3, unknown_val) == unknown_val) unknown_val = False self.assertTrue(PhasingUtils.is_in_phase(m3, m7, unknown_val) == unknown_val) self.assertTrue(PhasingUtils.is_in_phase(m7, m3, unknown_val) == unknown_val) # m2 and m5 should be in phase, regardless of the unknown_val parameter self.assertTrue(PhasingUtils.is_in_phase(m2, m5, True)) self.assertTrue(PhasingUtils.is_in_phase(m5, m2, False)) self.assertTrue(PhasingUtils.is_in_phase(m2, m5, False)) self.assertTrue(PhasingUtils.is_in_phase(m5, m2, True)) # m2 and m6 should not be in phase, since the ID is different, regardless of the unknown_val parameter self.assertFalse(PhasingUtils.is_in_phase(m2, m6, True)) self.assertFalse(PhasingUtils.is_in_phase(m6, m2, False)) self.assertFalse(PhasingUtils.is_in_phase(m2, m6, False)) self.assertFalse(PhasingUtils.is_in_phase(m6, m2, True))
def test_phasing_check(self): """ Test the actual phasing check. """ m1 = MutationData() m2 = MutationData() m3 = MutationData() m4 = MutationData() m5 = MutationData() m6 = MutationData() m7 = MutationData() m1.createAnnotation("phasing_id", "blah") m2.createAnnotation("phasing_id", "blah") m2.createAnnotation("phasing_genotype", "0|1") m4.createAnnotation("phasing_genotype", "0|1") m5.createAnnotation("phasing_id", "blah") m5.createAnnotation("phasing_genotype", "0|1") m6.createAnnotation("phasing_id", "blahdifferent") m6.createAnnotation("phasing_genotype", "0|1") # m1 and m2 should not be in phase, even though they share IDs, since m1 is missing the genotype info unknown_val = True self.assertFalse(PhasingUtils.is_in_phase(m1, m2, unknown_val)) self.assertFalse(PhasingUtils.is_in_phase(m2, m1, unknown_val)) unknown_val = False self.assertFalse(PhasingUtils.is_in_phase(m1, m2, unknown_val)) self.assertFalse(PhasingUtils.is_in_phase(m2, m1, unknown_val)) # m2 and m4 should not be in phase, since m4 is missing the ID unknown_val = True self.assertFalse(PhasingUtils.is_in_phase(m4, m2, unknown_val)) self.assertFalse(PhasingUtils.is_in_phase(m2, m4, unknown_val)) unknown_val = False self.assertFalse(PhasingUtils.is_in_phase(m4, m2, unknown_val)) self.assertFalse(PhasingUtils.is_in_phase(m2, m4, unknown_val)) # m3 and m7 should be unknown_val, since phasing info is missing. unknown_val = True self.assertTrue( PhasingUtils.is_in_phase(m3, m7, unknown_val) == unknown_val) self.assertTrue( PhasingUtils.is_in_phase(m7, m3, unknown_val) == unknown_val) unknown_val = False self.assertTrue( PhasingUtils.is_in_phase(m3, m7, unknown_val) == unknown_val) self.assertTrue( PhasingUtils.is_in_phase(m7, m3, unknown_val) == unknown_val) # m2 and m5 should be in phase, regardless of the unknown_val parameter self.assertTrue(PhasingUtils.is_in_phase(m2, m5, True)) self.assertTrue(PhasingUtils.is_in_phase(m5, m2, False)) self.assertTrue(PhasingUtils.is_in_phase(m2, m5, False)) self.assertTrue(PhasingUtils.is_in_phase(m5, m2, True)) # m2 and m6 should not be in phase, since the ID is different, regardless of the unknown_val parameter self.assertFalse(PhasingUtils.is_in_phase(m2, m6, True)) self.assertFalse(PhasingUtils.is_in_phase(m6, m2, False)) self.assertFalse(PhasingUtils.is_in_phase(m2, m6, False)) self.assertFalse(PhasingUtils.is_in_phase(m6, m2, True))