コード例 #1
0
def _insert_from_graph_experement(graph_file, scaffolds_in, max_path_len):
    new_scaffolds = []
    graph = _load_dot(graph_file)

    ordered_contigs = set()
    for scf in scaffolds_in:
        ordered_contigs |= set(map(lambda s: s.name, scf.contigs))

    for scf in scaffolds_in:
        new_scaffolds.append(Scaffold(scf.name))

        for prev_cont, new_cont in zip(scf.contigs[:-1], scf.contigs[1:]):
            new_scaffolds[-1].contigs.append(prev_cont)

            #find unique path
            path_nodes = _get_unigue_path_experiment(graph, prev_cont, new_cont,
                                                     max_path_len, ordered_contigs)

            if not path_nodes:
                continue

            #insert contigs along the path
            for node in path_nodes:
                new_scaffolds[-1].contigs.append(Contig.from_sting(node))

        new_scaffolds[-1].contigs.append(new_cont)
    return new_scaffolds
コード例 #2
0
ファイル: merge_iters.py プロジェクト: wangdi2014/Ragout
def _update_scaffolds(scaffolds, perm_container):
    """
    Updates scaffolds wrt to given permutations
    """
    perm_index = defaultdict(list)
    for perm in perm_container.target_perms:
        perm_index[(perm.chr_name, perm.repeat_id)].append(perm)

    new_scaffolds = []
    for scf in scaffolds:
        new_contigs = []
        for contig in scf.contigs:
            inner_perms = []
            for new_perm in perm_index[(contig.perm.chr_name,
                                        contig.perm.repeat_id)]:
                if (contig.perm.seq_start <= new_perm.seq_start <
                        contig.perm.seq_end):
                    inner_perms.append(new_perm)
                    assert (contig.perm.seq_start < new_perm.seq_end <=
                            contig.perm.seq_end)

            if not inner_perms:
                logger.debug("Lost: %s", str(contig.perm))
                continue

            inner_perms.sort(key=lambda p: p.seq_start,
                             reverse=contig.sign < 0)
            for prev_perm, next_perm in zip(inner_perms[:-1], inner_perms[1:]):
                if contig.sign > 0:
                    gap_length = next_perm.seq_start - prev_perm.seq_end
                else:
                    gap_length = prev_perm.seq_start - next_perm.seq_end
                support = [
                    GenChrPair(prev_perm.genome_name, prev_perm.chr_name)
                ]
                new_contigs.append(
                    Contig.with_perm(prev_perm, contig.sign,
                                     Link(gap_length, support)))
            new_contigs.append(
                Contig.with_perm(inner_perms[-1], contig.sign,
                                 copy(contig.link)))

        if len(new_contigs):
            new_scaffolds.append(
                Scaffold.with_contigs(scf.name, None, None, new_contigs))
    return new_scaffolds
コード例 #3
0
ファイル: merge_iters.py プロジェクト: wangdi2014/Ragout
def _merge_consecutive_contigs(scaffolds):
    """
    Merges consecutive contig fragments originating from a same contig
    """
    new_scaffolds = []
    num_contigs = 0
    for scf in scaffolds:
        new_contigs = []

        cur_sign, cur_perm, cur_link = None, None, None
        for cnt in scf.contigs:
            consistent = False
            if cur_sign == cnt.sign and cnt.perm.chr_name == cur_perm.chr_name:
                if cur_sign > 0 and cur_perm.seq_end == cnt.perm.seq_start:
                    cur_perm.seq_end = cnt.perm.seq_end
                    cur_perm.blocks.extend(cnt.perm.blocks)
                    consistent = True
                if cur_sign < 0 and cur_perm.seq_start == cnt.perm.seq_end:
                    cur_perm.seq_start = cnt.perm.seq_start
                    cur_perm.blocks = cnt.perm.blocks + cur_perm.blocks
                    consistent = True

            if not consistent:
                if cur_perm:
                    new_contigs.append(
                        Contig.with_perm(cur_perm, cur_sign, cur_link))
                cur_perm = deepcopy(cnt.perm)

            cur_sign = cnt.sign
            cur_link = cnt.link

        if cur_perm:
            new_contigs.append(Contig.with_perm(cur_perm, cur_sign, cur_link))
        num_contigs += len(new_contigs)
        new_scaffolds.append(
            Scaffold.with_contigs(scf.name, None, None, new_contigs))

    logger.debug("Merging consequtive contigs: %d left", num_contigs)
    return new_scaffolds
コード例 #4
0
ファイル: merge_iters.py プロジェクト: ptdtan/Ragout
def _merge_consecutive_contigs(scaffolds):
    """
    Merges consecutive contig fragments originating from a same contig
    """
    new_scaffolds = []
    num_contigs = 0
    for scf in scaffolds:
        new_contigs = []

        cur_sign, cur_perm, cur_link = None, None, None
        for cnt in scf.contigs:
            consistent = False
            if cur_sign == cnt.sign and cnt.perm.chr_name == cur_perm.chr_name:
                if cur_sign > 0 and cur_perm.seq_end == cnt.perm.seq_start:
                    cur_perm.seq_end = cnt.perm.seq_end
                    cur_perm.blocks.extend(cnt.perm.blocks)
                    consistent = True
                if cur_sign < 0 and cur_perm.seq_start == cnt.perm.seq_end:
                    cur_perm.seq_start = cnt.perm.seq_start
                    cur_perm.blocks = cnt.perm.blocks + cur_perm.blocks
                    consistent = True

            if not consistent:
                if cur_perm:
                    new_contigs.append(Contig.with_perm(cur_perm, cur_sign, cur_link))
                cur_perm = deepcopy(cnt.perm)

            cur_sign = cnt.sign
            cur_link = cnt.link

        if cur_perm:
            new_contigs.append(Contig.with_perm(cur_perm, cur_sign, cur_link))
        num_contigs += len(new_contigs)
        new_scaffolds.append(Scaffold.with_contigs(scf.name, None,
                                                   None, new_contigs))

    logger.debug("Merging consequtive contigs: {0} left".format(num_contigs))
    return new_scaffolds
コード例 #5
0
def _make_contigs(perm_container):
    """
    A helper function to make Contig structures
    """
    contigs = []
    index = {}
    for perm in perm_container.target_perms:
        assert len(perm.blocks)
        contigs.append(Contig.with_perm(perm))
        for block in perm.blocks:
            assert block.block_id not in index
            index[block.block_id] = contigs[-1]

    return contigs, index
コード例 #6
0
ファイル: scaffolder.py プロジェクト: ptdtan/Ragout
def _make_contigs(perm_container, ancestral=False):
    """
    A helper function to make Contig structures
    """
    contigs = []
    index = {}
    if not ancestral:
        for perm in perm_container.target_perms:
            assert len(perm.blocks)
            contigs.append(Contig.with_perm(perm))
            for block in perm.blocks:
                assert block.block_id not in index
                index[block.block_id] = contigs[-1]
    else:
        for perm in perm_container.ancestor_perms:
            #print idx
            #print perm.name(), [block.block_id for block in perm.blocks]
            assert len(perm.blocks)
            contigs.append(Contig.with_perm(perm))
            for block in perm.blocks:
                assert block.block_id not in index
                index[block.block_id] = contigs[-1]
    return contigs, index
コード例 #7
0
def _make_contigs(perm_container):
    contigs = []
    index = defaultdict(list)
    for perm in perm_container.target_perms_filtered:
        if len(perm.blocks) == 0:
            continue

        contigs.append(Contig(perm.chr_id))
        contigs[-1].blocks = copy.copy(perm.blocks)

        for block in perm.blocks:
            index[abs(block)].append(contigs[-1])

    return contigs, index
コード例 #8
0
ファイル: scaffolder.py プロジェクト: fenderglass/Ragout
def _make_contigs(perm_container):
    """
    A helper function to make Contig structures
    """
    contigs = []
    index = {}
    for perm in perm_container.target_perms:
        assert len(perm.blocks)
        contigs.append(Contig.with_perm(perm))
        for block in perm.blocks:
            assert block.block_id not in index
            index[block.block_id] = contigs[-1]

    return contigs, index
コード例 #9
0
def _make_contigs(perm_container, ancestral=False):
    """
    A helper function to make Contig structures
    """
    contigs = []
    index = {}
    if not ancestral:
        for perm in perm_container.target_perms:
            assert len(perm.blocks)
            contigs.append(Contig.with_perm(perm))
            for block in perm.blocks:
                assert block.block_id not in index
                index[block.block_id] = contigs[-1]
    else:
        for perm in perm_container.ancestor_perms:
            #print idx
            #print perm.name(), [block.block_id for block in perm.blocks]
            assert len(perm.blocks)
            contigs.append(Contig.with_perm(perm))
            for block in perm.blocks:
                assert block.block_id not in index
                index[block.block_id] = contigs[-1]
    return contigs, index
コード例 #10
0
ファイル: assembly_refine.py プロジェクト: yuzhenpeng/Ragout
def _insert_from_graph(graph, scaffolds_in, max_path_len, contigs_fasta):
    """
    Inserts contigs from the assembly graph into scaffolds
    """
    new_scaffolds = []
    ordered_contigs = set()
    for scf in scaffolds_in:
        ordered_contigs |= set([c.name() for c in scf.contigs])
    reverse_graph = graph.reverse()

    for scf in scaffolds_in:
        new_scaffolds.append(Scaffold(scf.name))

        for prev_cont, new_cont in zip(scf.contigs[:-1], scf.contigs[1:]):
            new_scaffolds[-1].contigs.append(prev_cont)

            #find contigs to insert
            path_nodes = _get_cut_vertices(graph, reverse_graph, prev_cont,
                                           new_cont, max_path_len,
                                           ordered_contigs)

            if not path_nodes:
                continue

            #insert contigs along the path
            supp_genomes = prev_cont.link.supporting_genomes
            prev_cont.link.supporting_assembly = True
            prev_cont.link.gap = config.vals["min_scaffold_gap"]
            for node in path_nodes:
                sign = 1 if node[0] == "+" else -1
                name = node[1:]

                new_contig = Contig.with_sequence(name,
                                                  len(contigs_fasta[name]),
                                                  sign)
                new_contig.link.supporting_assembly = True
                new_contig.link.gap = config.vals["min_scaffold_gap"]
                new_contig.link.supporting_genomes = supp_genomes
                new_scaffolds[-1].contigs.append(new_contig)

        new_scaffolds[-1].contigs.append(scf.contigs[-1])

    return new_scaffolds
コード例 #11
0
ファイル: merge_iters.py プロジェクト: ptdtan/RagoutAPI
def _update_scaffolds(scaffolds, perm_container, ancestral=False):
    """
    Updates scaffolds wrt to given permutations
    """
    perm_index = defaultdict(list)
    if not ancestral:
        for perm in perm_container.target_perms:
            perm_index[(perm.chr_name, perm.repeat_id)].append(perm)
    else:
        for perm in perm_container.ancestor_perms:
            perm_index[(perm.chr_name, perm.repeat_id)].append(perm)

    new_scaffolds = []
    for scf in scaffolds:
        new_contigs = []
        for contig in scf.contigs:
            inner_perms = []
            for new_perm in perm_index[(contig.perm.chr_name,
                                        contig.perm.repeat_id)]:
                if (contig.perm.seq_start <= new_perm.seq_start <
                        contig.perm.seq_end):
                    inner_perms.append(new_perm)
                    assert (contig.perm.seq_start < new_perm.seq_end <=
                            contig.perm.seq_end)

            if not inner_perms:
                logger.debug("Lost: {0}".format(contig.perm))
                continue

            inner_perms.sort(key=lambda p: p.seq_start,
                             reverse=contig.sign < 0)
            gap_length = contig.link.gap
            for new_perm in inner_perms:
                gap_length -= new_perm.length()
                new_link = Link(gap_length, contig.link.supporting_genomes)
                new_contigs.append(
                    Contig.with_perm(new_perm, contig.sign, new_link))
            new_contigs[-1].link = contig.link

        new_scaffolds.append(
            Scaffold.with_contigs(scf.name, None, None, new_contigs))
    return new_scaffolds
コード例 #12
0
ファイル: assembly_refine.py プロジェクト: fenderglass/Ragout
def _insert_from_graph(graph, scaffolds_in, max_path_len, contigs_fasta):
    """
    Inserts contigs from the assembly graph into scaffolds
    """
    new_scaffolds = []
    ordered_contigs = set()
    for scf in scaffolds_in:
        ordered_contigs |= set(map(lambda c: c.name(), scf.contigs))
    reverse_graph = graph.reverse()

    for scf in scaffolds_in:
        new_scaffolds.append(Scaffold(scf.name))

        for prev_cont, new_cont in zip(scf.contigs[:-1], scf.contigs[1:]):
            new_scaffolds[-1].contigs.append(prev_cont)

            #find contigs to insert
            path_nodes = _get_cut_vertices(graph, reverse_graph, prev_cont,
                                           new_cont, max_path_len,
                                           ordered_contigs)

            if not path_nodes:
                continue

            #insert contigs along the path
            supp_genomes = prev_cont.link.supporting_genomes
            prev_cont.link.supporting_assembly = True
            prev_cont.link.gap = config.vals["min_scaffold_gap"]
            for node in path_nodes:
                sign = 1 if node[0] == "+" else -1
                name = node[1:]

                new_contig = Contig.with_sequence(name, 
                                    len(contigs_fasta[name]), sign)
                new_contig.link.supporting_assembly = True
                new_contig.link.gap = config.vals["min_scaffold_gap"]
                new_contig.link.supporting_genomes = supp_genomes
                new_scaffolds[-1].contigs.append(new_contig)

        new_scaffolds[-1].contigs.append(scf.contigs[-1])

    return new_scaffolds
コード例 #13
0
ファイル: merge_iters.py プロジェクト: ptdtan/Ragout
def _update_scaffolds(scaffolds, perm_container, ancestral=False):
    """
    Updates scaffolds wrt to given permutations
    """
    perm_index = defaultdict(list)
    if not ancestral:
        for perm in perm_container.target_perms:
            perm_index[(perm.chr_name, perm.repeat_id)].append(perm)
    else:
        for perm in perm_container.ancestor_perms:
            perm_index[(perm.chr_name, perm.repeat_id)].append(perm)

    new_scaffolds = []
    for scf in scaffolds:
        new_contigs = []
        for contig in scf.contigs:
            inner_perms = []
            for new_perm in perm_index[(contig.perm.chr_name,
                                        contig.perm.repeat_id)]:
                if (contig.perm.seq_start <= new_perm.seq_start
                    < contig.perm.seq_end):
                    inner_perms.append(new_perm)
                    assert (contig.perm.seq_start < new_perm.seq_end
                            <= contig.perm.seq_end)

            if not inner_perms:
                logger.debug("Lost: {0}".format(contig.perm))
                continue

            inner_perms.sort(key=lambda p: p.seq_start, reverse=contig.sign < 0)
            gap_length = contig.link.gap
            for new_perm in inner_perms:
                gap_length -= new_perm.length()
                new_link = Link(gap_length, contig.link.supporting_genomes)
                new_contigs.append(Contig.with_perm(new_perm, contig.sign,
                                                    new_link))
            new_contigs[-1].link = contig.link

        new_scaffolds.append(Scaffold.with_contigs(scf.name, None,
                                                   None, new_contigs))
    return new_scaffolds
コード例 #14
0
def _insert_from_graph(graph_file, scaffolds_in, max_path_len):
    new_scaffolds = []
    graph = _load_dot(graph_file)
    logger.debug("Loaded overlap graph with {0} nodes".format(len(graph)))

    ordered_contigs = set()
    for scf in scaffolds_in:
        ordered_contigs |= set(map(lambda s: s.name, scf.contigs))

    for scf in scaffolds_in:
        new_scaffolds.append(Scaffold(scf.name))

        for prev_cont, new_cont in zip(scf.contigs[:-1], scf.contigs[1:]):
            new_scaffolds[-1].contigs.append(prev_cont)

            #find unique path
            path_nodes = _get_unique_path(graph, prev_cont,
                                          new_cont, max_path_len)

            if not path_nodes:
                continue

            #check path consistency
            consistent = True
            for node in path_nodes:
                if node[1:] in ordered_contigs:
                    logger.debug("Path inconsistency {0} -- {1}: {2}"
                                 .format(prev_cont, new_cont, node))
                    consistent = False
                    break
            if not consistent:
                continue

            #insert contigs along the path
            for node in path_nodes:
                new_scaffolds[-1].contigs.append(Contig.from_sting(node))

        new_scaffolds[-1].contigs.append(new_cont)
    return new_scaffolds
コード例 #15
0
def merge(big_scaffolds, small_scaffolds):
    logger.info("Merging two iterations")
    big_index = set()
    for scf in big_scaffolds:
        for c in scf.contigs:
            big_index.add(c.name)

    small_index = {}
    for scf in small_scaffolds:
        for pos, contig in enumerate(scf.contigs):
            assert contig.name not in small_index
            small_index[contig.name] = (scf, pos)

    count = 0
    new_scafflods = []
    for scf in big_scaffolds:
        result = []
        for prev_cont, new_cont in zip(scf.contigs[:-1], scf.contigs[1:]):
            result.append(prev_cont)

            try:
                scf_prev, begin = small_index[prev_cont.name]
                scf_new, end = small_index[new_cont.name]
            except KeyError:
                continue
            if scf_prev.name != scf_new.name:
                continue

            assert end != begin
            same_dir = True
            if end < begin:
                same_dir = False
                end, begin = begin, end

            consistent = True
            for c in scf_prev.contigs[begin + 1:end]:
                if c.name in big_index:
                    consistent = False
                    break

            if not consistent or end - begin == 1:
                continue

            if ((prev_cont.sign == new_cont.sign) !=
                (scf_prev.contigs[begin].sign == scf_prev.contigs[end].sign)):
                continue

            count += end - begin - 1
            contigs = scf_prev.contigs[begin + 1:end]
            if not same_dir:
                contigs = contigs[::-1]
                contigs = list(
                    map(lambda c: Contig(c.name, -c.sign, 0), contigs))
            result.extend(contigs)

        result.append(new_cont)
        s = Scaffold(scf.name)
        s.contigs = result
        new_scafflods.append(s)

    return new_scafflods