def _insert_from_graph_experement(graph_file, scaffolds_in, max_path_len): new_scaffolds = [] graph = _load_dot(graph_file) ordered_contigs = set() for scf in scaffolds_in: ordered_contigs |= set(map(lambda s: s.name, scf.contigs)) for scf in scaffolds_in: new_scaffolds.append(Scaffold(scf.name)) for prev_cont, new_cont in zip(scf.contigs[:-1], scf.contigs[1:]): new_scaffolds[-1].contigs.append(prev_cont) #find unique path path_nodes = _get_unigue_path_experiment(graph, prev_cont, new_cont, max_path_len, ordered_contigs) if not path_nodes: continue #insert contigs along the path for node in path_nodes: new_scaffolds[-1].contigs.append(Contig.from_sting(node)) new_scaffolds[-1].contigs.append(new_cont) return new_scaffolds
def _update_scaffolds(scaffolds, perm_container): """ Updates scaffolds wrt to given permutations """ perm_index = defaultdict(list) for perm in perm_container.target_perms: perm_index[(perm.chr_name, perm.repeat_id)].append(perm) new_scaffolds = [] for scf in scaffolds: new_contigs = [] for contig in scf.contigs: inner_perms = [] for new_perm in perm_index[(contig.perm.chr_name, contig.perm.repeat_id)]: if (contig.perm.seq_start <= new_perm.seq_start < contig.perm.seq_end): inner_perms.append(new_perm) assert (contig.perm.seq_start < new_perm.seq_end <= contig.perm.seq_end) if not inner_perms: logger.debug("Lost: %s", str(contig.perm)) continue inner_perms.sort(key=lambda p: p.seq_start, reverse=contig.sign < 0) for prev_perm, next_perm in zip(inner_perms[:-1], inner_perms[1:]): if contig.sign > 0: gap_length = next_perm.seq_start - prev_perm.seq_end else: gap_length = prev_perm.seq_start - next_perm.seq_end support = [ GenChrPair(prev_perm.genome_name, prev_perm.chr_name) ] new_contigs.append( Contig.with_perm(prev_perm, contig.sign, Link(gap_length, support))) new_contigs.append( Contig.with_perm(inner_perms[-1], contig.sign, copy(contig.link))) if len(new_contigs): new_scaffolds.append( Scaffold.with_contigs(scf.name, None, None, new_contigs)) return new_scaffolds
def _merge_consecutive_contigs(scaffolds): """ Merges consecutive contig fragments originating from a same contig """ new_scaffolds = [] num_contigs = 0 for scf in scaffolds: new_contigs = [] cur_sign, cur_perm, cur_link = None, None, None for cnt in scf.contigs: consistent = False if cur_sign == cnt.sign and cnt.perm.chr_name == cur_perm.chr_name: if cur_sign > 0 and cur_perm.seq_end == cnt.perm.seq_start: cur_perm.seq_end = cnt.perm.seq_end cur_perm.blocks.extend(cnt.perm.blocks) consistent = True if cur_sign < 0 and cur_perm.seq_start == cnt.perm.seq_end: cur_perm.seq_start = cnt.perm.seq_start cur_perm.blocks = cnt.perm.blocks + cur_perm.blocks consistent = True if not consistent: if cur_perm: new_contigs.append( Contig.with_perm(cur_perm, cur_sign, cur_link)) cur_perm = deepcopy(cnt.perm) cur_sign = cnt.sign cur_link = cnt.link if cur_perm: new_contigs.append(Contig.with_perm(cur_perm, cur_sign, cur_link)) num_contigs += len(new_contigs) new_scaffolds.append( Scaffold.with_contigs(scf.name, None, None, new_contigs)) logger.debug("Merging consequtive contigs: %d left", num_contigs) return new_scaffolds
def _merge_consecutive_contigs(scaffolds): """ Merges consecutive contig fragments originating from a same contig """ new_scaffolds = [] num_contigs = 0 for scf in scaffolds: new_contigs = [] cur_sign, cur_perm, cur_link = None, None, None for cnt in scf.contigs: consistent = False if cur_sign == cnt.sign and cnt.perm.chr_name == cur_perm.chr_name: if cur_sign > 0 and cur_perm.seq_end == cnt.perm.seq_start: cur_perm.seq_end = cnt.perm.seq_end cur_perm.blocks.extend(cnt.perm.blocks) consistent = True if cur_sign < 0 and cur_perm.seq_start == cnt.perm.seq_end: cur_perm.seq_start = cnt.perm.seq_start cur_perm.blocks = cnt.perm.blocks + cur_perm.blocks consistent = True if not consistent: if cur_perm: new_contigs.append(Contig.with_perm(cur_perm, cur_sign, cur_link)) cur_perm = deepcopy(cnt.perm) cur_sign = cnt.sign cur_link = cnt.link if cur_perm: new_contigs.append(Contig.with_perm(cur_perm, cur_sign, cur_link)) num_contigs += len(new_contigs) new_scaffolds.append(Scaffold.with_contigs(scf.name, None, None, new_contigs)) logger.debug("Merging consequtive contigs: {0} left".format(num_contigs)) return new_scaffolds
def _make_contigs(perm_container): """ A helper function to make Contig structures """ contigs = [] index = {} for perm in perm_container.target_perms: assert len(perm.blocks) contigs.append(Contig.with_perm(perm)) for block in perm.blocks: assert block.block_id not in index index[block.block_id] = contigs[-1] return contigs, index
def _make_contigs(perm_container, ancestral=False): """ A helper function to make Contig structures """ contigs = [] index = {} if not ancestral: for perm in perm_container.target_perms: assert len(perm.blocks) contigs.append(Contig.with_perm(perm)) for block in perm.blocks: assert block.block_id not in index index[block.block_id] = contigs[-1] else: for perm in perm_container.ancestor_perms: #print idx #print perm.name(), [block.block_id for block in perm.blocks] assert len(perm.blocks) contigs.append(Contig.with_perm(perm)) for block in perm.blocks: assert block.block_id not in index index[block.block_id] = contigs[-1] return contigs, index
def _make_contigs(perm_container): contigs = [] index = defaultdict(list) for perm in perm_container.target_perms_filtered: if len(perm.blocks) == 0: continue contigs.append(Contig(perm.chr_id)) contigs[-1].blocks = copy.copy(perm.blocks) for block in perm.blocks: index[abs(block)].append(contigs[-1]) return contigs, index
def _make_contigs(perm_container): """ A helper function to make Contig structures """ contigs = [] index = {} for perm in perm_container.target_perms: assert len(perm.blocks) contigs.append(Contig.with_perm(perm)) for block in perm.blocks: assert block.block_id not in index index[block.block_id] = contigs[-1] return contigs, index
def _make_contigs(perm_container, ancestral=False): """ A helper function to make Contig structures """ contigs = [] index = {} if not ancestral: for perm in perm_container.target_perms: assert len(perm.blocks) contigs.append(Contig.with_perm(perm)) for block in perm.blocks: assert block.block_id not in index index[block.block_id] = contigs[-1] else: for perm in perm_container.ancestor_perms: #print idx #print perm.name(), [block.block_id for block in perm.blocks] assert len(perm.blocks) contigs.append(Contig.with_perm(perm)) for block in perm.blocks: assert block.block_id not in index index[block.block_id] = contigs[-1] return contigs, index
def _insert_from_graph(graph, scaffolds_in, max_path_len, contigs_fasta): """ Inserts contigs from the assembly graph into scaffolds """ new_scaffolds = [] ordered_contigs = set() for scf in scaffolds_in: ordered_contigs |= set([c.name() for c in scf.contigs]) reverse_graph = graph.reverse() for scf in scaffolds_in: new_scaffolds.append(Scaffold(scf.name)) for prev_cont, new_cont in zip(scf.contigs[:-1], scf.contigs[1:]): new_scaffolds[-1].contigs.append(prev_cont) #find contigs to insert path_nodes = _get_cut_vertices(graph, reverse_graph, prev_cont, new_cont, max_path_len, ordered_contigs) if not path_nodes: continue #insert contigs along the path supp_genomes = prev_cont.link.supporting_genomes prev_cont.link.supporting_assembly = True prev_cont.link.gap = config.vals["min_scaffold_gap"] for node in path_nodes: sign = 1 if node[0] == "+" else -1 name = node[1:] new_contig = Contig.with_sequence(name, len(contigs_fasta[name]), sign) new_contig.link.supporting_assembly = True new_contig.link.gap = config.vals["min_scaffold_gap"] new_contig.link.supporting_genomes = supp_genomes new_scaffolds[-1].contigs.append(new_contig) new_scaffolds[-1].contigs.append(scf.contigs[-1]) return new_scaffolds
def _update_scaffolds(scaffolds, perm_container, ancestral=False): """ Updates scaffolds wrt to given permutations """ perm_index = defaultdict(list) if not ancestral: for perm in perm_container.target_perms: perm_index[(perm.chr_name, perm.repeat_id)].append(perm) else: for perm in perm_container.ancestor_perms: perm_index[(perm.chr_name, perm.repeat_id)].append(perm) new_scaffolds = [] for scf in scaffolds: new_contigs = [] for contig in scf.contigs: inner_perms = [] for new_perm in perm_index[(contig.perm.chr_name, contig.perm.repeat_id)]: if (contig.perm.seq_start <= new_perm.seq_start < contig.perm.seq_end): inner_perms.append(new_perm) assert (contig.perm.seq_start < new_perm.seq_end <= contig.perm.seq_end) if not inner_perms: logger.debug("Lost: {0}".format(contig.perm)) continue inner_perms.sort(key=lambda p: p.seq_start, reverse=contig.sign < 0) gap_length = contig.link.gap for new_perm in inner_perms: gap_length -= new_perm.length() new_link = Link(gap_length, contig.link.supporting_genomes) new_contigs.append( Contig.with_perm(new_perm, contig.sign, new_link)) new_contigs[-1].link = contig.link new_scaffolds.append( Scaffold.with_contigs(scf.name, None, None, new_contigs)) return new_scaffolds
def _insert_from_graph(graph, scaffolds_in, max_path_len, contigs_fasta): """ Inserts contigs from the assembly graph into scaffolds """ new_scaffolds = [] ordered_contigs = set() for scf in scaffolds_in: ordered_contigs |= set(map(lambda c: c.name(), scf.contigs)) reverse_graph = graph.reverse() for scf in scaffolds_in: new_scaffolds.append(Scaffold(scf.name)) for prev_cont, new_cont in zip(scf.contigs[:-1], scf.contigs[1:]): new_scaffolds[-1].contigs.append(prev_cont) #find contigs to insert path_nodes = _get_cut_vertices(graph, reverse_graph, prev_cont, new_cont, max_path_len, ordered_contigs) if not path_nodes: continue #insert contigs along the path supp_genomes = prev_cont.link.supporting_genomes prev_cont.link.supporting_assembly = True prev_cont.link.gap = config.vals["min_scaffold_gap"] for node in path_nodes: sign = 1 if node[0] == "+" else -1 name = node[1:] new_contig = Contig.with_sequence(name, len(contigs_fasta[name]), sign) new_contig.link.supporting_assembly = True new_contig.link.gap = config.vals["min_scaffold_gap"] new_contig.link.supporting_genomes = supp_genomes new_scaffolds[-1].contigs.append(new_contig) new_scaffolds[-1].contigs.append(scf.contigs[-1]) return new_scaffolds
def _update_scaffolds(scaffolds, perm_container, ancestral=False): """ Updates scaffolds wrt to given permutations """ perm_index = defaultdict(list) if not ancestral: for perm in perm_container.target_perms: perm_index[(perm.chr_name, perm.repeat_id)].append(perm) else: for perm in perm_container.ancestor_perms: perm_index[(perm.chr_name, perm.repeat_id)].append(perm) new_scaffolds = [] for scf in scaffolds: new_contigs = [] for contig in scf.contigs: inner_perms = [] for new_perm in perm_index[(contig.perm.chr_name, contig.perm.repeat_id)]: if (contig.perm.seq_start <= new_perm.seq_start < contig.perm.seq_end): inner_perms.append(new_perm) assert (contig.perm.seq_start < new_perm.seq_end <= contig.perm.seq_end) if not inner_perms: logger.debug("Lost: {0}".format(contig.perm)) continue inner_perms.sort(key=lambda p: p.seq_start, reverse=contig.sign < 0) gap_length = contig.link.gap for new_perm in inner_perms: gap_length -= new_perm.length() new_link = Link(gap_length, contig.link.supporting_genomes) new_contigs.append(Contig.with_perm(new_perm, contig.sign, new_link)) new_contigs[-1].link = contig.link new_scaffolds.append(Scaffold.with_contigs(scf.name, None, None, new_contigs)) return new_scaffolds
def _insert_from_graph(graph_file, scaffolds_in, max_path_len): new_scaffolds = [] graph = _load_dot(graph_file) logger.debug("Loaded overlap graph with {0} nodes".format(len(graph))) ordered_contigs = set() for scf in scaffolds_in: ordered_contigs |= set(map(lambda s: s.name, scf.contigs)) for scf in scaffolds_in: new_scaffolds.append(Scaffold(scf.name)) for prev_cont, new_cont in zip(scf.contigs[:-1], scf.contigs[1:]): new_scaffolds[-1].contigs.append(prev_cont) #find unique path path_nodes = _get_unique_path(graph, prev_cont, new_cont, max_path_len) if not path_nodes: continue #check path consistency consistent = True for node in path_nodes: if node[1:] in ordered_contigs: logger.debug("Path inconsistency {0} -- {1}: {2}" .format(prev_cont, new_cont, node)) consistent = False break if not consistent: continue #insert contigs along the path for node in path_nodes: new_scaffolds[-1].contigs.append(Contig.from_sting(node)) new_scaffolds[-1].contigs.append(new_cont) return new_scaffolds
def merge(big_scaffolds, small_scaffolds): logger.info("Merging two iterations") big_index = set() for scf in big_scaffolds: for c in scf.contigs: big_index.add(c.name) small_index = {} for scf in small_scaffolds: for pos, contig in enumerate(scf.contigs): assert contig.name not in small_index small_index[contig.name] = (scf, pos) count = 0 new_scafflods = [] for scf in big_scaffolds: result = [] for prev_cont, new_cont in zip(scf.contigs[:-1], scf.contigs[1:]): result.append(prev_cont) try: scf_prev, begin = small_index[prev_cont.name] scf_new, end = small_index[new_cont.name] except KeyError: continue if scf_prev.name != scf_new.name: continue assert end != begin same_dir = True if end < begin: same_dir = False end, begin = begin, end consistent = True for c in scf_prev.contigs[begin + 1:end]: if c.name in big_index: consistent = False break if not consistent or end - begin == 1: continue if ((prev_cont.sign == new_cont.sign) != (scf_prev.contigs[begin].sign == scf_prev.contigs[end].sign)): continue count += end - begin - 1 contigs = scf_prev.contigs[begin + 1:end] if not same_dir: contigs = contigs[::-1] contigs = list( map(lambda c: Contig(c.name, -c.sign, 0), contigs)) result.extend(contigs) result.append(new_cont) s = Scaffold(scf.name) s.contigs = result new_scafflods.append(s) return new_scafflods