def extend_scaffold(contig):
        visited.add(contig)
        scf_name = "ragout-scaffold-{0}".format(counter[0])
        counter[0] += 1
        scf = Scaffold.with_contigs(scf_name, contig.blocks[0],
                                    contig.blocks[-1], [contig])
        scaffolds.append(scf)

        #go right
        while scf.right in connections:
            adjacent = connections[scf.right].end

            #print adjacent, contig_index[abs(adjacent)]
            assert len(contig_index[abs(adjacent)]) == 1

            contig = contig_index[abs(adjacent)][0]
            if contig in visited:
                break

            if contig.blocks[0] == adjacent:
                scf.contigs.append(contig)
                scf.right = contig.blocks[-1]
                visited.add(contig)
                continue

            if -contig.blocks[-1] == adjacent:
                scf.contigs.append(contig)
                scf.contigs[-1].sign = -1
                scf.right = -contig.blocks[0]
                visited.add(contig)
                continue

            break

        #go left
        while -scf.left in connections:
            adjacent = -connections[-scf.left].end

            assert len(contig_index[abs(adjacent)]) == 1

            contig = contig_index[abs(adjacent)][0]
            if contig in visited:
                break

            if contig.blocks[-1] == adjacent:
                scf.contigs.insert(0, contig)
                scf.left = contig.blocks[0]
                visited.add(contig)
                continue

            if -contig.blocks[0] == adjacent:
                scf.contigs.insert(0, contig)
                scf.contigs[0].sign = -1
                scf.left = -contig.blocks[-1]
                visited.add(contig)
                continue

            break
    def extend_scaffold(contig):
        visited.add(contig)
        scf_name = "ragout-scaffold-{0}".format(counter[0])
        counter[0] += 1
        scf = Scaffold.with_contigs(scf_name, contig.blocks[0], contig.blocks[-1], [contig])
        scaffolds.append(scf)

        #go right
        while scf.right in connections:
            adjacent = connections[scf.right].end

            #print adjacent, contig_index[abs(adjacent)]
            assert len(contig_index[abs(adjacent)]) == 1

            contig = contig_index[abs(adjacent)][0]
            if contig in visited:
                break

            if contig.blocks[0] == adjacent:
                scf.contigs.append(contig)
                scf.right = contig.blocks[-1]
                visited.add(contig)
                continue

            if -contig.blocks[-1] == adjacent:
                scf.contigs.append(contig)
                scf.contigs[-1].sign = -1
                scf.right = -contig.blocks[0]
                visited.add(contig)
                continue

            break

        #go left
        while -scf.left in connections:
            adjacent = -connections[-scf.left].end

            assert len(contig_index[abs(adjacent)]) == 1

            contig = contig_index[abs(adjacent)][0]
            if contig in visited:
                break

            if contig.blocks[-1] == adjacent:
                scf.contigs.insert(0, contig)
                scf.left = contig.blocks[0]
                visited.add(contig)
                continue

            if -contig.blocks[0] == adjacent:
                scf.contigs.insert(0, contig)
                scf.contigs[0].sign = -1
                scf.left = -contig.blocks[-1]
                visited.add(contig)
                continue

            break
Example #3
0
def _update_scaffolds(scaffolds, perm_container):
    """
    Updates scaffolds wrt to given permutations
    """
    perm_index = defaultdict(list)
    for perm in perm_container.target_perms:
        perm_index[(perm.chr_name, perm.repeat_id)].append(perm)

    new_scaffolds = []
    for scf in scaffolds:
        new_contigs = []
        for contig in scf.contigs:
            inner_perms = []
            for new_perm in perm_index[(contig.perm.chr_name,
                                        contig.perm.repeat_id)]:
                if (contig.perm.seq_start <= new_perm.seq_start <
                        contig.perm.seq_end):
                    inner_perms.append(new_perm)
                    assert (contig.perm.seq_start < new_perm.seq_end <=
                            contig.perm.seq_end)

            if not inner_perms:
                logger.debug("Lost: %s", str(contig.perm))
                continue

            inner_perms.sort(key=lambda p: p.seq_start,
                             reverse=contig.sign < 0)
            for prev_perm, next_perm in zip(inner_perms[:-1], inner_perms[1:]):
                if contig.sign > 0:
                    gap_length = next_perm.seq_start - prev_perm.seq_end
                else:
                    gap_length = prev_perm.seq_start - next_perm.seq_end
                support = [
                    GenChrPair(prev_perm.genome_name, prev_perm.chr_name)
                ]
                new_contigs.append(
                    Contig.with_perm(prev_perm, contig.sign,
                                     Link(gap_length, support)))
            new_contigs.append(
                Contig.with_perm(inner_perms[-1], contig.sign,
                                 copy(contig.link)))

        if len(new_contigs):
            new_scaffolds.append(
                Scaffold.with_contigs(scf.name, None, None, new_contigs))
    return new_scaffolds
Example #4
0
def _update_scaffolds(scaffolds, perm_container, ancestral=False):
    """
    Updates scaffolds wrt to given permutations
    """
    perm_index = defaultdict(list)
    if not ancestral:
        for perm in perm_container.target_perms:
            perm_index[(perm.chr_name, perm.repeat_id)].append(perm)
    else:
        for perm in perm_container.ancestor_perms:
            perm_index[(perm.chr_name, perm.repeat_id)].append(perm)

    new_scaffolds = []
    for scf in scaffolds:
        new_contigs = []
        for contig in scf.contigs:
            inner_perms = []
            for new_perm in perm_index[(contig.perm.chr_name,
                                        contig.perm.repeat_id)]:
                if (contig.perm.seq_start <= new_perm.seq_start <
                        contig.perm.seq_end):
                    inner_perms.append(new_perm)
                    assert (contig.perm.seq_start < new_perm.seq_end <=
                            contig.perm.seq_end)

            if not inner_perms:
                logger.debug("Lost: {0}".format(contig.perm))
                continue

            inner_perms.sort(key=lambda p: p.seq_start,
                             reverse=contig.sign < 0)
            gap_length = contig.link.gap
            for new_perm in inner_perms:
                gap_length -= new_perm.length()
                new_link = Link(gap_length, contig.link.supporting_genomes)
                new_contigs.append(
                    Contig.with_perm(new_perm, contig.sign, new_link))
            new_contigs[-1].link = contig.link

        new_scaffolds.append(
            Scaffold.with_contigs(scf.name, None, None, new_contigs))
    return new_scaffolds
Example #5
0
def _update_scaffolds(scaffolds, perm_container, ancestral=False):
    """
    Updates scaffolds wrt to given permutations
    """
    perm_index = defaultdict(list)
    if not ancestral:
        for perm in perm_container.target_perms:
            perm_index[(perm.chr_name, perm.repeat_id)].append(perm)
    else:
        for perm in perm_container.ancestor_perms:
            perm_index[(perm.chr_name, perm.repeat_id)].append(perm)

    new_scaffolds = []
    for scf in scaffolds:
        new_contigs = []
        for contig in scf.contigs:
            inner_perms = []
            for new_perm in perm_index[(contig.perm.chr_name,
                                        contig.perm.repeat_id)]:
                if (contig.perm.seq_start <= new_perm.seq_start
                    < contig.perm.seq_end):
                    inner_perms.append(new_perm)
                    assert (contig.perm.seq_start < new_perm.seq_end
                            <= contig.perm.seq_end)

            if not inner_perms:
                logger.debug("Lost: {0}".format(contig.perm))
                continue

            inner_perms.sort(key=lambda p: p.seq_start, reverse=contig.sign < 0)
            gap_length = contig.link.gap
            for new_perm in inner_perms:
                gap_length -= new_perm.length()
                new_link = Link(gap_length, contig.link.supporting_genomes)
                new_contigs.append(Contig.with_perm(new_perm, contig.sign,
                                                    new_link))
            new_contigs[-1].link = contig.link

        new_scaffolds.append(Scaffold.with_contigs(scf.name, None,
                                                   None, new_contigs))
    return new_scaffolds
Example #6
0
def _merge_consecutive_contigs(scaffolds):
    """
    Merges consecutive contig fragments originating from a same contig
    """
    new_scaffolds = []
    num_contigs = 0
    for scf in scaffolds:
        new_contigs = []

        cur_sign, cur_perm, cur_link = None, None, None
        for cnt in scf.contigs:
            consistent = False
            if cur_sign == cnt.sign and cnt.perm.chr_name == cur_perm.chr_name:
                if cur_sign > 0 and cur_perm.seq_end == cnt.perm.seq_start:
                    cur_perm.seq_end = cnt.perm.seq_end
                    cur_perm.blocks.extend(cnt.perm.blocks)
                    consistent = True
                if cur_sign < 0 and cur_perm.seq_start == cnt.perm.seq_end:
                    cur_perm.seq_start = cnt.perm.seq_start
                    cur_perm.blocks = cnt.perm.blocks + cur_perm.blocks
                    consistent = True

            if not consistent:
                if cur_perm:
                    new_contigs.append(
                        Contig.with_perm(cur_perm, cur_sign, cur_link))
                cur_perm = deepcopy(cnt.perm)

            cur_sign = cnt.sign
            cur_link = cnt.link

        if cur_perm:
            new_contigs.append(Contig.with_perm(cur_perm, cur_sign, cur_link))
        num_contigs += len(new_contigs)
        new_scaffolds.append(
            Scaffold.with_contigs(scf.name, None, None, new_contigs))

    logger.debug("Merging consequtive contigs: %d left", num_contigs)
    return new_scaffolds
Example #7
0
def _merge_consecutive_contigs(scaffolds):
    """
    Merges consecutive contig fragments originating from a same contig
    """
    new_scaffolds = []
    num_contigs = 0
    for scf in scaffolds:
        new_contigs = []

        cur_sign, cur_perm, cur_link = None, None, None
        for cnt in scf.contigs:
            consistent = False
            if cur_sign == cnt.sign and cnt.perm.chr_name == cur_perm.chr_name:
                if cur_sign > 0 and cur_perm.seq_end == cnt.perm.seq_start:
                    cur_perm.seq_end = cnt.perm.seq_end
                    cur_perm.blocks.extend(cnt.perm.blocks)
                    consistent = True
                if cur_sign < 0 and cur_perm.seq_start == cnt.perm.seq_end:
                    cur_perm.seq_start = cnt.perm.seq_start
                    cur_perm.blocks = cnt.perm.blocks + cur_perm.blocks
                    consistent = True

            if not consistent:
                if cur_perm:
                    new_contigs.append(Contig.with_perm(cur_perm, cur_sign, cur_link))
                cur_perm = deepcopy(cnt.perm)

            cur_sign = cnt.sign
            cur_link = cnt.link

        if cur_perm:
            new_contigs.append(Contig.with_perm(cur_perm, cur_sign, cur_link))
        num_contigs += len(new_contigs)
        new_scaffolds.append(Scaffold.with_contigs(scf.name, None,
                                                   None, new_contigs))

    logger.debug("Merging consequtive contigs: {0} left".format(num_contigs))
    return new_scaffolds
Example #8
0
    def extend_scaffold(contig):
        visited.add(contig)
        scf_name = "ragout-scaffold-{0}".format(counter[0])
        counter[0] += 1
        scf = Scaffold.with_contigs(scf_name, contig.left_end(),
                                    contig.right_end(), [contig])

        already_complete = (scf.right in adjacencies
                            and adjacencies[scf.right].block == scf.left
                            and adjacencies[scf.right].infinity)
        if already_complete:
            scaffolds.append(scf)
            return

        #go right
        while scf.right in adjacencies and not adjacencies[scf.right].infinity:
            adj_block = adjacencies[scf.right].block
            adj_distance = adjacencies[scf.right].distance
            adj_supporting_genomes = adjacencies[scf.right].supporting_genomes

            contig = contig_index[abs(adj_block)]
            if contig in visited:
                break

            if adj_block in [contig.left_end(), contig.right_end()]:
                if contig.left_end() == adj_block:
                    scf.contigs.append(contig)
                else:
                    scf.contigs.append(contig.reverse_copy())

                flank = scf.contigs[-2].right_gap() + scf.contigs[-1].left_gap(
                )
                gap = adj_distance - flank if correct_distances else adj_distance
                scf.contigs[-2].link = Link(gap, adj_supporting_genomes)

                scf.right = scf.contigs[-1].right_end()
                visited.add(contig)
                continue

            break

        #go left
        while scf.left in adjacencies and not adjacencies[scf.left].infinity:
            adj_block = adjacencies[scf.left].block
            adj_distance = adjacencies[scf.left].distance
            adj_supporting_genomes = adjacencies[scf.left].supporting_genomes

            contig = contig_index[abs(adj_block)]
            if contig in visited:
                break

            if adj_block in [contig.right_end(), contig.left_end()]:
                if contig.right_end() == adj_block:
                    scf.contigs.insert(0, contig)
                else:
                    scf.contigs.insert(0, contig.reverse_copy())

                flank = scf.contigs[0].right_gap() + scf.contigs[1].left_gap()
                gap = adj_distance - flank if correct_distances else adj_distance
                scf.contigs[0].link = Link(gap, adj_supporting_genomes)

                scf.left = scf.contigs[0].left_end()
                visited.add(contig)
                continue

            break

        if len(scf.contigs) > 1:
            scaffolds.append(scf)
Example #9
0
    def extend_scaffold(contig):
        visited.add(contig)
        scf_name = "ragout-scaffold-{0}".format(counter[0])
        counter[0] += 1
        scf = Scaffold.with_contigs(scf_name, contig.left_end(),
                                    contig.right_end(), [contig])

        already_complete = (scf.right in adjacencies and
                            adjacencies[scf.right].block == scf.left and
                            adjacencies[scf.right].infinity)
        if already_complete:
            scaffolds.append(scf)
            return

        #go right
        while scf.right in adjacencies and not adjacencies[scf.right].infinity:
            adj_block = adjacencies[scf.right].block
            adj_distance = adjacencies[scf.right].distance
            adj_supporting_genomes = adjacencies[scf.right].supporting_genomes

            ###KeyError may be throw when ref.indels = False
            contig = contig_index[abs(adj_block)]
            if contig in visited:
                break

            if adj_block in [contig.left_end(), contig.right_end()]:
                if contig.left_end() == adj_block:
                    scf.contigs.append(contig)
                else:
                    scf.contigs.append(contig.reverse_copy())

                flank = scf.contigs[-2].right_gap() + scf.contigs[-1].left_gap()
                try:
                    gap = adj_distance - flank if correct_distances else adj_distance
                except Exception as e:
                    print scf.right
                    print adj_distance, flank
                scf.contigs[-2].link = Link(gap, adj_supporting_genomes)

                scf.right = scf.contigs[-1].right_end()
                visited.add(contig)
                continue

            break

        #go left
        while scf.left in adjacencies and not adjacencies[scf.left].infinity:
            adj_block = adjacencies[scf.left].block
            adj_distance = adjacencies[scf.left].distance
            adj_supporting_genomes = adjacencies[scf.left].supporting_genomes

            contig = contig_index[abs(adj_block)]
            if contig in visited:
                break

            if adj_block in [contig.right_end(), contig.left_end()]:
                if contig.right_end() == adj_block:
                    scf.contigs.insert(0, contig)
                else:
                    scf.contigs.insert(0, contig.reverse_copy())

                flank = scf.contigs[0].right_gap() + scf.contigs[1].left_gap()
                try:
                    gap = adj_distance - flank if correct_distances else adj_distance
                except Exception as e:
                    print adj_distance, flank
                scf.contigs[0].link = Link(gap, adj_supporting_genomes)

                scf.left = scf.contigs[0].left_end()
                visited.add(contig)
                continue

            break

        if len(scf.contigs) > 1:
            scaffolds.append(scf)