def make_translocation_from_deletions(self, deletion: Pattern, other_deletion: Pattern) -> TranslocationPattern: first, second, third, fourth = sort_coords(deletion, other_deletion) if second - first < fourth - third: return TranslocationPattern( source=Pattern( start=first, end=second, chromosome=deletion.chromosome ), destination=Pattern( start=fourth, end=fourth + 1, chromosome=deletion.chromosome ), support_alignments=[*deletion.supporting_alignments, *other_deletion.supporting_alignments] ) else: return TranslocationPattern( source=Pattern( start=third, end=fourth, chromosome=deletion.chromosome ), destination=Pattern( start=first - 1, end=first, chromosome=deletion.chromosome ), support_alignments=[*deletion.supporting_alignments, *other_deletion.supporting_alignments] )
def make_translocation_from_deletion_duplication(self, first: int, second: int, third: int, deletion: Pattern, duplication: Pattern) -> TranslocationPattern: if second - first < third - second: source_start = first source_end = second destination_start = third destination_end = third + 1 else: source_start = second source_end = third destination_start = first - 1 destination_end = first return TranslocationPattern( source=Pattern( start=source_start, end=source_end, chromosome=deletion.chromosome ), destination=Pattern( start=destination_start, end=destination_end, chromosome=deletion.chromosome ), support_alignments=[deletion, duplication, *deletion.supporting_alignments, *duplication.supporting_alignments] )
def find_deletion_duplication_pattern(self, deletions : List[Pattern], duplications : List[Pattern])\ ->List[Pattern]: """ Looks for deletion and duplication pairs that share their left or right coordinate (with a small margin of error). Such occuring indicates a translocation took place. Returns unused deletions, and appends detected translocations to a member variable """ unused_deletions = [] for deletion in deletions: found = False for duplication in duplications: if deletion.start <= duplication.start <= deletion.end \ or duplication.start <= deletion.start <= duplication.end: left, inner_left, inner_right, right = sort_coords(deletion, duplication) if left-3 <= inner_left <= left+3: found = True inner = inner_right elif right-3 <= inner_right <= right+3: found = True inner = inner_left if found: pattern = \ self.make_translocation_from_deletion_duplication( left, inner, right, deletion=deletion, duplication=duplication) self.translocations.append(pattern) self.helper_patterns.append( Pattern(start=left, end=right, chromosome=deletion.chromosome)) break if not found: unused_deletions.append(deletion) return unused_deletions
def find_alignment_patterns(alignments): alignments.sort(key=lambda alignment: (alignment.chromosome, alignment.start)) insertions = [] duplications = [] others = [] for first, second in pairwise(alignments): same_chromosome = second.chromosome == first.chromosome if same_chromosome: if are_they_adjacent(first, second, margin_of_error=3): pattern = Pattern( chromosome=first.chromosome, start=first.end, end=second.start, supporting_alignments=[first, second] ) insertions.append(pattern) else: others.append(first) else: others.append(first) return { "insertions": insertions, "duplications": duplications, "others": others }
def helper_pattern(self, deletion: Pattern, other_deletion: Pattern) -> Pattern: """ Creates a helper pattern. Used to filter out duplication pattern's caused by translocations. """ start, _, _, end = sort_coords(deletion, other_deletion) return Pattern( start=start, end=end, chromosome=deletion.chromosome )
def translocation_pattern(self) -> TranslocationPattern: destination_start, _, _, destination_end = sort_coords(*self.nonadjacent) _, source_start, source_end, _ = sort_coords(*self.adjacent) destination = Pattern( start=destination_start + 1, end=destination_end - 1, chromosome=self.nonadjacent[0].chromosome ) source = Pattern( start=source_start, end=source_end, chromosome=self.adjacent[0].chromosome ) return TranslocationPattern( source=destination, destination=source, support_alignments=[*self.adjacent, *self.nonadjacent] )
def load_pattern_bed(path): patterns = [] with open(path, "r") as file: for line in file: line = line.split() patterns.append( Pattern(chromosome=line[0], start=int(line[1]), end=int(line[2]))) return patterns
def load_translocation_as_separate_patterns(path): patterns = [] data = pandas.read_csv(path, sep='\t') for idx, row in data.iterrows(): chromosome = row['ChrA'].split(' ', 1)[0] chromosome = correct_ref_name(chromosome) start = row['StartA'] end = row['EndA'] patterns.append( Pattern(chromosome=chromosome, start=int(start), end=int(end))) chromosome = row['ChrB'].split(' ', 1)[0] chromosome = correct_ref_name(chromosome) start = row['StartB'] end = row['EndB'] patterns.append( Pattern(chromosome=chromosome, start=int(start), end=int(end))) return patterns
def load_translocation(path): translocations = [] data = pandas.read_csv(path, sep='\t') for idx, row in data.iterrows(): chromosome = row['ChrA'].split(' ', 1)[0] chromosome = correct_ref_name(chromosome) start = row['StartA'] end = row['EndA'] first = Pattern(chromosome=chromosome, start=int(start), end=int(end)) chromosome = row['ChrB'].split(' ', 1)[0] chromosome = correct_ref_name(chromosome) start = row['StartB'] - 1 end = row['StartB'] second = Pattern(chromosome=chromosome, start=int(start), end=int(end)) translocations.append( TranslocationPattern(source=first, destination=second)) return translocations
def load_regular(path): patterns = [] data = pandas.read_csv(path, sep='\t') for idx, row in data.iterrows(): chromosome = row['Chr'].split(' ', 1)[0] chromosome = correct_ref_name(chromosome) start = row['Start'] end = row['End'] region_name = row['Name'] patterns.append( Pattern(chromosome=chromosome, start=int(start), end=int(end))) return patterns
def translocation_pattern(self, breakpoint: Contig, deletion: Pattern): if self.intersecting.start == deletion.start: start = self.nonintersecting.end end = start + 1 else: end = self.nonintersecting.start start = end - 1 destination = Pattern( start=start, end=end, chromosome=self.nonintersecting.chromosome, ) source = deletion return TranslocationPattern( source=source, destination=destination, support_alignments=[self.nonintersecting, self.intersecting, *deletion.supporting_alignments] )
def filter_inversions(inversion_patterns): inversion_patterns.sort(key=lambda alignment: (alignment.chromosome, alignment.start)) inversions = [] skip_next = False for first_pattern, second_pattern in pairwise(inversion_patterns): same_chromosome = second_pattern.chromosome == first_pattern.chromosome intersects = second_pattern.start < first_pattern.end if same_chromosome and intersects: skip_next = True inversions.append( Pattern( chromosome=first_pattern.chromosome, start=second_pattern.start, end=first_pattern.end, supporting_alignments=first_pattern.supporting_alignments + second_pattern.supporting_alignments )) elif not skip_next: inversions.append( first_pattern ) else: skip_next = False return inversions