def extend_scaffold(contig): visited.add(contig) scf_name = "ragout-scaffold-{0}".format(counter[0]) counter[0] += 1 scf = Scaffold.with_contigs(scf_name, contig.blocks[0], contig.blocks[-1], [contig]) scaffolds.append(scf) #go right while scf.right in connections: adjacent = connections[scf.right].end #print adjacent, contig_index[abs(adjacent)] assert len(contig_index[abs(adjacent)]) == 1 contig = contig_index[abs(adjacent)][0] if contig in visited: break if contig.blocks[0] == adjacent: scf.contigs.append(contig) scf.right = contig.blocks[-1] visited.add(contig) continue if -contig.blocks[-1] == adjacent: scf.contigs.append(contig) scf.contigs[-1].sign = -1 scf.right = -contig.blocks[0] visited.add(contig) continue break #go left while -scf.left in connections: adjacent = -connections[-scf.left].end assert len(contig_index[abs(adjacent)]) == 1 contig = contig_index[abs(adjacent)][0] if contig in visited: break if contig.blocks[-1] == adjacent: scf.contigs.insert(0, contig) scf.left = contig.blocks[0] visited.add(contig) continue if -contig.blocks[0] == adjacent: scf.contigs.insert(0, contig) scf.contigs[0].sign = -1 scf.left = -contig.blocks[-1] visited.add(contig) continue break
def extend_scaffold(contig): visited.add(contig) scf_name = "ragout-scaffold-{0}".format(counter[0]) counter[0] += 1 scf = Scaffold.with_contigs(scf_name, contig.blocks[0], contig.blocks[-1], [contig]) scaffolds.append(scf) #go right while scf.right in connections: adjacent = connections[scf.right].end #print adjacent, contig_index[abs(adjacent)] assert len(contig_index[abs(adjacent)]) == 1 contig = contig_index[abs(adjacent)][0] if contig in visited: break if contig.blocks[0] == adjacent: scf.contigs.append(contig) scf.right = contig.blocks[-1] visited.add(contig) continue if -contig.blocks[-1] == adjacent: scf.contigs.append(contig) scf.contigs[-1].sign = -1 scf.right = -contig.blocks[0] visited.add(contig) continue break #go left while -scf.left in connections: adjacent = -connections[-scf.left].end assert len(contig_index[abs(adjacent)]) == 1 contig = contig_index[abs(adjacent)][0] if contig in visited: break if contig.blocks[-1] == adjacent: scf.contigs.insert(0, contig) scf.left = contig.blocks[0] visited.add(contig) continue if -contig.blocks[0] == adjacent: scf.contigs.insert(0, contig) scf.contigs[0].sign = -1 scf.left = -contig.blocks[-1] visited.add(contig) continue break
def _update_scaffolds(scaffolds, perm_container): """ Updates scaffolds wrt to given permutations """ perm_index = defaultdict(list) for perm in perm_container.target_perms: perm_index[(perm.chr_name, perm.repeat_id)].append(perm) new_scaffolds = [] for scf in scaffolds: new_contigs = [] for contig in scf.contigs: inner_perms = [] for new_perm in perm_index[(contig.perm.chr_name, contig.perm.repeat_id)]: if (contig.perm.seq_start <= new_perm.seq_start < contig.perm.seq_end): inner_perms.append(new_perm) assert (contig.perm.seq_start < new_perm.seq_end <= contig.perm.seq_end) if not inner_perms: logger.debug("Lost: %s", str(contig.perm)) continue inner_perms.sort(key=lambda p: p.seq_start, reverse=contig.sign < 0) for prev_perm, next_perm in zip(inner_perms[:-1], inner_perms[1:]): if contig.sign > 0: gap_length = next_perm.seq_start - prev_perm.seq_end else: gap_length = prev_perm.seq_start - next_perm.seq_end support = [ GenChrPair(prev_perm.genome_name, prev_perm.chr_name) ] new_contigs.append( Contig.with_perm(prev_perm, contig.sign, Link(gap_length, support))) new_contigs.append( Contig.with_perm(inner_perms[-1], contig.sign, copy(contig.link))) if len(new_contigs): new_scaffolds.append( Scaffold.with_contigs(scf.name, None, None, new_contigs)) return new_scaffolds
def _update_scaffolds(scaffolds, perm_container, ancestral=False): """ Updates scaffolds wrt to given permutations """ perm_index = defaultdict(list) if not ancestral: for perm in perm_container.target_perms: perm_index[(perm.chr_name, perm.repeat_id)].append(perm) else: for perm in perm_container.ancestor_perms: perm_index[(perm.chr_name, perm.repeat_id)].append(perm) new_scaffolds = [] for scf in scaffolds: new_contigs = [] for contig in scf.contigs: inner_perms = [] for new_perm in perm_index[(contig.perm.chr_name, contig.perm.repeat_id)]: if (contig.perm.seq_start <= new_perm.seq_start < contig.perm.seq_end): inner_perms.append(new_perm) assert (contig.perm.seq_start < new_perm.seq_end <= contig.perm.seq_end) if not inner_perms: logger.debug("Lost: {0}".format(contig.perm)) continue inner_perms.sort(key=lambda p: p.seq_start, reverse=contig.sign < 0) gap_length = contig.link.gap for new_perm in inner_perms: gap_length -= new_perm.length() new_link = Link(gap_length, contig.link.supporting_genomes) new_contigs.append( Contig.with_perm(new_perm, contig.sign, new_link)) new_contigs[-1].link = contig.link new_scaffolds.append( Scaffold.with_contigs(scf.name, None, None, new_contigs)) return new_scaffolds
def _update_scaffolds(scaffolds, perm_container, ancestral=False): """ Updates scaffolds wrt to given permutations """ perm_index = defaultdict(list) if not ancestral: for perm in perm_container.target_perms: perm_index[(perm.chr_name, perm.repeat_id)].append(perm) else: for perm in perm_container.ancestor_perms: perm_index[(perm.chr_name, perm.repeat_id)].append(perm) new_scaffolds = [] for scf in scaffolds: new_contigs = [] for contig in scf.contigs: inner_perms = [] for new_perm in perm_index[(contig.perm.chr_name, contig.perm.repeat_id)]: if (contig.perm.seq_start <= new_perm.seq_start < contig.perm.seq_end): inner_perms.append(new_perm) assert (contig.perm.seq_start < new_perm.seq_end <= contig.perm.seq_end) if not inner_perms: logger.debug("Lost: {0}".format(contig.perm)) continue inner_perms.sort(key=lambda p: p.seq_start, reverse=contig.sign < 0) gap_length = contig.link.gap for new_perm in inner_perms: gap_length -= new_perm.length() new_link = Link(gap_length, contig.link.supporting_genomes) new_contigs.append(Contig.with_perm(new_perm, contig.sign, new_link)) new_contigs[-1].link = contig.link new_scaffolds.append(Scaffold.with_contigs(scf.name, None, None, new_contigs)) return new_scaffolds
def _merge_consecutive_contigs(scaffolds): """ Merges consecutive contig fragments originating from a same contig """ new_scaffolds = [] num_contigs = 0 for scf in scaffolds: new_contigs = [] cur_sign, cur_perm, cur_link = None, None, None for cnt in scf.contigs: consistent = False if cur_sign == cnt.sign and cnt.perm.chr_name == cur_perm.chr_name: if cur_sign > 0 and cur_perm.seq_end == cnt.perm.seq_start: cur_perm.seq_end = cnt.perm.seq_end cur_perm.blocks.extend(cnt.perm.blocks) consistent = True if cur_sign < 0 and cur_perm.seq_start == cnt.perm.seq_end: cur_perm.seq_start = cnt.perm.seq_start cur_perm.blocks = cnt.perm.blocks + cur_perm.blocks consistent = True if not consistent: if cur_perm: new_contigs.append( Contig.with_perm(cur_perm, cur_sign, cur_link)) cur_perm = deepcopy(cnt.perm) cur_sign = cnt.sign cur_link = cnt.link if cur_perm: new_contigs.append(Contig.with_perm(cur_perm, cur_sign, cur_link)) num_contigs += len(new_contigs) new_scaffolds.append( Scaffold.with_contigs(scf.name, None, None, new_contigs)) logger.debug("Merging consequtive contigs: %d left", num_contigs) return new_scaffolds
def _merge_consecutive_contigs(scaffolds): """ Merges consecutive contig fragments originating from a same contig """ new_scaffolds = [] num_contigs = 0 for scf in scaffolds: new_contigs = [] cur_sign, cur_perm, cur_link = None, None, None for cnt in scf.contigs: consistent = False if cur_sign == cnt.sign and cnt.perm.chr_name == cur_perm.chr_name: if cur_sign > 0 and cur_perm.seq_end == cnt.perm.seq_start: cur_perm.seq_end = cnt.perm.seq_end cur_perm.blocks.extend(cnt.perm.blocks) consistent = True if cur_sign < 0 and cur_perm.seq_start == cnt.perm.seq_end: cur_perm.seq_start = cnt.perm.seq_start cur_perm.blocks = cnt.perm.blocks + cur_perm.blocks consistent = True if not consistent: if cur_perm: new_contigs.append(Contig.with_perm(cur_perm, cur_sign, cur_link)) cur_perm = deepcopy(cnt.perm) cur_sign = cnt.sign cur_link = cnt.link if cur_perm: new_contigs.append(Contig.with_perm(cur_perm, cur_sign, cur_link)) num_contigs += len(new_contigs) new_scaffolds.append(Scaffold.with_contigs(scf.name, None, None, new_contigs)) logger.debug("Merging consequtive contigs: {0} left".format(num_contigs)) return new_scaffolds
def extend_scaffold(contig): visited.add(contig) scf_name = "ragout-scaffold-{0}".format(counter[0]) counter[0] += 1 scf = Scaffold.with_contigs(scf_name, contig.left_end(), contig.right_end(), [contig]) already_complete = (scf.right in adjacencies and adjacencies[scf.right].block == scf.left and adjacencies[scf.right].infinity) if already_complete: scaffolds.append(scf) return #go right while scf.right in adjacencies and not adjacencies[scf.right].infinity: adj_block = adjacencies[scf.right].block adj_distance = adjacencies[scf.right].distance adj_supporting_genomes = adjacencies[scf.right].supporting_genomes contig = contig_index[abs(adj_block)] if contig in visited: break if adj_block in [contig.left_end(), contig.right_end()]: if contig.left_end() == adj_block: scf.contigs.append(contig) else: scf.contigs.append(contig.reverse_copy()) flank = scf.contigs[-2].right_gap() + scf.contigs[-1].left_gap( ) gap = adj_distance - flank if correct_distances else adj_distance scf.contigs[-2].link = Link(gap, adj_supporting_genomes) scf.right = scf.contigs[-1].right_end() visited.add(contig) continue break #go left while scf.left in adjacencies and not adjacencies[scf.left].infinity: adj_block = adjacencies[scf.left].block adj_distance = adjacencies[scf.left].distance adj_supporting_genomes = adjacencies[scf.left].supporting_genomes contig = contig_index[abs(adj_block)] if contig in visited: break if adj_block in [contig.right_end(), contig.left_end()]: if contig.right_end() == adj_block: scf.contigs.insert(0, contig) else: scf.contigs.insert(0, contig.reverse_copy()) flank = scf.contigs[0].right_gap() + scf.contigs[1].left_gap() gap = adj_distance - flank if correct_distances else adj_distance scf.contigs[0].link = Link(gap, adj_supporting_genomes) scf.left = scf.contigs[0].left_end() visited.add(contig) continue break if len(scf.contigs) > 1: scaffolds.append(scf)
def extend_scaffold(contig): visited.add(contig) scf_name = "ragout-scaffold-{0}".format(counter[0]) counter[0] += 1 scf = Scaffold.with_contigs(scf_name, contig.left_end(), contig.right_end(), [contig]) already_complete = (scf.right in adjacencies and adjacencies[scf.right].block == scf.left and adjacencies[scf.right].infinity) if already_complete: scaffolds.append(scf) return #go right while scf.right in adjacencies and not adjacencies[scf.right].infinity: adj_block = adjacencies[scf.right].block adj_distance = adjacencies[scf.right].distance adj_supporting_genomes = adjacencies[scf.right].supporting_genomes ###KeyError may be throw when ref.indels = False contig = contig_index[abs(adj_block)] if contig in visited: break if adj_block in [contig.left_end(), contig.right_end()]: if contig.left_end() == adj_block: scf.contigs.append(contig) else: scf.contigs.append(contig.reverse_copy()) flank = scf.contigs[-2].right_gap() + scf.contigs[-1].left_gap() try: gap = adj_distance - flank if correct_distances else adj_distance except Exception as e: print scf.right print adj_distance, flank scf.contigs[-2].link = Link(gap, adj_supporting_genomes) scf.right = scf.contigs[-1].right_end() visited.add(contig) continue break #go left while scf.left in adjacencies and not adjacencies[scf.left].infinity: adj_block = adjacencies[scf.left].block adj_distance = adjacencies[scf.left].distance adj_supporting_genomes = adjacencies[scf.left].supporting_genomes contig = contig_index[abs(adj_block)] if contig in visited: break if adj_block in [contig.right_end(), contig.left_end()]: if contig.right_end() == adj_block: scf.contigs.insert(0, contig) else: scf.contigs.insert(0, contig.reverse_copy()) flank = scf.contigs[0].right_gap() + scf.contigs[1].left_gap() try: gap = adj_distance - flank if correct_distances else adj_distance except Exception as e: print adj_distance, flank scf.contigs[0].link = Link(gap, adj_supporting_genomes) scf.left = scf.contigs[0].left_end() visited.add(contig) continue break if len(scf.contigs) > 1: scaffolds.append(scf)