Esempio n. 1
0
def path_to_agp(g, path, object, sizes, status):
    lines = []
    for (a, ao), (b, bo) in pairwise(path):
        ao = get_orientation(ao, status)
        e = g.get_edge(a.v, b.v)
        cline = AGPLine.cline(object, a.v, sizes, ao)
        gline = AGPLine.gline(object, e.length)
        lines.append(cline)
        lines.append(gline)
    # Do not forget the last one
    z, zo = path[-1]
    zo = get_orientation(zo, status)
    cline = AGPLine.cline(object, z.v, sizes, zo)
    lines.append(cline)

    return lines
Esempio n. 2
0
def path_to_agp(g, path, object, sizes, status):
    lines = []
    for (a, ao), (b, bo) in pairwise(path):
        ao = get_orientation(ao, status)
        e = g.get_edge(a.v, b.v)
        cline = AGPLine.cline(object, a.v, sizes, ao)
        gline = AGPLine.gline(object, e.length)
        lines.append(cline)
        lines.append(gline)
    # Do not forget the last one
    z, zo = path[-1]
    zo = get_orientation(zo, status)
    cline = AGPLine.cline(object, z.v, sizes, zo)
    lines.append(cline)

    return lines
Esempio n. 3
0
    def write_agp(self, filename):
        sizes = self.sz
        agp = []
        for scaffold, lines in self.iter_scaffold():
            for a, b in pairwise(lines):
                cline = AGPLine.cline(scaffold, a.tig, sizes, a.oo)
                gline = AGPLine.gline(scaffold, a.gaps)
                agp.append(cline)
                agp.append(gline)
            a = lines[-1]
            cline = AGPLine.cline(scaffold, a.tig, sizes, a.oo)
            agp.append(cline)

        fw = open(filename, "w")
        for a in agp:
            print >> fw, a
        fw.close()

        reindex([filename, "--inplace"])
        return filename
Esempio n. 4
0
    def write_agp(self, filename):
        sizes = self.sz
        agp = []
        for scaffold, lines in self.iter_scaffold():
            for a, b in pairwise(lines):
                cline = AGPLine.cline(scaffold, a.tig, sizes, a.oo)
                gline = AGPLine.gline(scaffold, a.gaps)
                agp.append(cline)
                agp.append(gline)
            a = lines[-1]
            cline = AGPLine.cline(scaffold, a.tig, sizes, a.oo)
            agp.append(cline)

        fw = open(filename, "w")
        for a in agp:
            print >> fw, a
        fw.close()

        reindex([filename, "--inplace"])
        return filename
Esempio n. 5
0
def embed(args):
    """
    %prog embed evidencefile scaffolds.fasta contigs.fasta

    Use SSPACE evidencefile to scaffold contigs into existing scaffold
    structure, as in `scaffolds.fasta`. Contigs.fasta were used by SSPACE
    directly to scaffold.

    Rules:
    1. Only update existing structure by embedding contigs small enough to fit.
    2. Promote singleton contigs only if they are big (>= min_length).
    """
    p = OptionParser(embed.__doc__)
    p.set_mingap(default=10)
    p.add_option("--min_length", default=200, type="int", help="Minimum length to consider [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) != 3:
        sys.exit(not p.print_help())

    evidencefile, scaffolds, contigs = args
    min_length = opts.min_length
    splitfasta, oagp, cagp = gaps([scaffolds, "--split", "--mingap={0}".format(opts.mingap)])

    agp = AGP(cagp)
    p = agp.graph

    ef = EvidenceFile(evidencefile, contigs)
    sizes = ef.sz
    q = ef.graph

    logging.debug("Reference graph: {0}".format(p))
    logging.debug("Patch graph: {0}".format(q))

    newagp = deepcopy(agp)

    seen = set()
    deleted = set()
    for a in agp:
        if a.is_gap:
            continue

        name = a.component_id
        object = a.object
        if name in deleted:
            print >>sys.stderr, "* Skip {0}, already embedded".format(name)
            continue

        seen.add(name)

        target_name, tag = get_target(p, name)
        path = q.get_path(name, target_name, tag=tag)
        path_size = sum([sizes[x.v] for x, t in path]) if path else None
        status = NO_UPDATE

        # Heuristic, the patch must not be too long
        if path and path_size > min_length and len(path) > 3:
            path = None

        if not path:
            print >>sys.stderr, name, target_name, path, path_size, status
            continue

        backward = False
        for x, t in path:
            if x.v in seen:
                print >>sys.stderr, "* Does not allow backward" " patch on {0}".format(x.v)
                backward = True
                break

        if backward:
            continue

        # Build the path plus the ends
        vv = q.get_node(name)
        path.appendleft((vv, tag))
        if tag == ">":
            path.reverse()
            status = INSERT_BEFORE
        elif target_name is None:
            status = INSERT_AFTER
        else:
            target = q.get_node(target_name)
            path.append((target, tag))
            status = INSERT_BETWEEN

        print >>sys.stderr, name, target_name, path, path_size, status

        # Trim the ends off from the constructed AGPLines
        lines = path_to_agp(q, path, object, sizes, status)
        if status == INSERT_BEFORE:
            lines = lines[:-1]
            td = newagp.insert_lines(name, lines, delete=True, verbose=True)
        elif status == INSERT_AFTER:
            lines = lines[1:]
            td = newagp.insert_lines(name, lines, after=True, delete=True, verbose=True)
        else:
            lines = lines[1:-1]
            td = newagp.update_between(name, target_name, lines, delete=True, verbose=True)
        deleted |= td
        seen |= td

    # Recruite big singleton contigs
    CUTOFF = opts.min_length
    for ctg, size in sizes.items():
        if ctg in seen:
            continue
        if size < CUTOFF:
            continue
        newagp.append(AGPLine.cline(ctg, ctg, sizes, "?"))

    # Write a new AGP file
    newagpfile = "embedded.agp"
    newagp.print_to_file(newagpfile, index=True)
    tidy([newagpfile, contigs])
Esempio n. 6
0
def embed(args):
    """
    %prog embed evidencefile scaffolds.fasta contigs.fasta

    Use SSPACE evidencefile to scaffold contigs into existing scaffold
    structure, as in `scaffolds.fasta`. Contigs.fasta were used by SSPACE
    directly to scaffold.

    Rules:
    1. Only update existing structure by embedding contigs small enough to fit.
    2. Promote singleton contigs only if they are big (>= min_length).
    """
    p = OptionParser(embed.__doc__)
    p.set_mingap(default=10)
    p.add_option("--min_length", default=200, type="int",
                 help="Minimum length to consider [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) != 3:
        sys.exit(not p.print_help())

    evidencefile, scaffolds, contigs = args
    min_length = opts.min_length
    splitfasta, oagp, cagp = gaps([scaffolds, "--split",
                                   "--mingap={0}".format(opts.mingap)])

    agp = AGP(cagp)
    p = agp.graph

    ef = EvidenceFile(evidencefile, contigs)
    sizes = ef.sz
    q = ef.graph

    logging.debug("Reference graph: {0}".format(p))
    logging.debug("Patch graph: {0}".format(q))

    newagp = deepcopy(agp)

    seen = set()
    deleted = set()
    for a in agp:
        if a.is_gap:
            continue

        name = a.component_id
        object = a.object
        if name in deleted:
            print >> sys.stderr, "* Skip {0}, already embedded".format(name)
            continue

        seen.add(name)

        target_name, tag = get_target(p, name)
        path = q.get_path(name, target_name, tag=tag)
        path_size = sum([sizes[x.v] for x, t in path]) if path else None
        status = NO_UPDATE

        # Heuristic, the patch must not be too long
        if path and path_size > min_length and len(path) > 3:
            path = None

        if not path:
            print >> sys.stderr, name, target_name, path, path_size, status
            continue

        backward = False
        for x, t in path:
            if x.v in seen:
                print >> sys.stderr, "* Does not allow backward" \
                                     " patch on {0}".format(x.v)
                backward = True
                break

        if backward:
            continue

        # Build the path plus the ends
        vv = q.get_node(name)
        path.appendleft((vv, tag))
        if tag == ">":
            path.reverse()
            status = INSERT_BEFORE
        elif target_name is None:
            status = INSERT_AFTER
        else:
            target = q.get_node(target_name)
            path.append((target, tag))
            status = INSERT_BETWEEN

        print >> sys.stderr, name, target_name, path, path_size, status

        # Trim the ends off from the constructed AGPLines
        lines = path_to_agp(q, path, object, sizes, status)
        if status == INSERT_BEFORE:
            lines = lines[:-1]
            td = newagp.insert_lines(name, lines, \
                                 delete=True, verbose=True)
        elif status == INSERT_AFTER:
            lines = lines[1:]
            td = newagp.insert_lines(name, lines, after=True, \
                                 delete=True, verbose=True)
        else:
            lines = lines[1:-1]
            td = newagp.update_between(name, target_name, lines, \
                                 delete=True, verbose=True)
        deleted |= td
        seen |= td

    # Recruite big singleton contigs
    CUTOFF = opts.min_length
    for ctg, size in sizes.items():
        if ctg in seen:
            continue
        if size < CUTOFF:
            continue
        newagp.append(AGPLine.cline(ctg, ctg, sizes, "?"))

    # Write a new AGP file
    newagpfile = "embedded.agp"
    newagp.print_to_file(newagpfile, index=True)
    tidy([newagpfile, contigs])