Exemple #1
0
def optimize_ordering(fwtour, clm, phase, cpus):
    """
    Optimize the ordering of contigs by Genetic Algorithm (GA).
    """
    from .chic import score_evaluate_M

    # Prepare input files
    tour_contigs = clm.active_contigs
    tour_sizes = clm.active_sizes
    tour_M = clm.M
    tour = clm.tour
    signs = clm.signs
    oo = clm.oo

    def callback(tour, gen, phase, oo):
        fitness = tour.fitness if hasattr(tour, "fitness") else None
        label = "GA{}-{}".format(phase, gen)
        if fitness:
            fitness = "{0}".format(fitness).split(",")[0].replace("(", "")
            label += "-" + fitness
        if gen % 20 == 0:
            print_tour(fwtour, tour, label, tour_contigs, oo, signs=signs)
        return tour

    callbacki = partial(callback, phase=phase, oo=oo)
    toolbox = GA_setup(tour)
    toolbox.register("evaluate", score_evaluate_M,
                     tour_sizes=tour_sizes, tour_M=tour_M)
    tour, tour_fitness = GA_run(toolbox, ngen=1000, npop=100, cpus=cpus,
                                callback=callbacki)
    clm.tour = tour

    return tour
Exemple #2
0
def optimize_ordering(fwtour, clm, phase, cpus):
    """
    Optimize the ordering of contigs by Genetic Algorithm (GA).
    """
    from .chic import score_evaluate_M

    # Prepare input files
    tour_contigs = clm.active_contigs
    tour_sizes = clm.active_sizes
    tour_M = clm.M
    tour = clm.tour
    signs = clm.signs
    oo = clm.oo

    def callback(tour, gen, phase, oo):
        fitness = tour.fitness if hasattr(tour, "fitness") else None
        label = "GA{}-{}".format(phase, gen)
        if fitness:
            fitness = "{0}".format(fitness).split(",")[0].replace("(", "")
            label += "-" + fitness
        if gen % 20 == 0:
            print_tour(fwtour, tour, label, tour_contigs, oo, signs=signs)
        return tour

    callbacki = partial(callback, phase=phase, oo=oo)
    toolbox = GA_setup(tour)
    toolbox.register("evaluate", score_evaluate_M,
                     tour_sizes=tour_sizes, tour_M=tour_M)
    tour, tour_fitness = GA_run(toolbox, ngen=1000, npop=100, cpus=cpus,
                                callback=callbacki)
    clm.tour = tour

    return tour
Exemple #3
0
    def __init__(self, lgs, scaffolds, mapc, pivot, weights, sizes,
                 function=(lambda x: x.rank), linkage=min,
                 ngen=500, npop=100, cpus=8):

        self.lgs = lgs
        self.lengths = mapc.lengths
        self.bins = mapc.bins
        self.sizes = sizes
        self.scaffolds = scaffolds
        self.pivot = pivot
        self.weights = weights
        self.function = function
        self.linkage = linkage

        self.prepare_linkage_groups()  # populate all data
        signs = self.assign_orientation()
        assert len(signs) == len(scaffolds)
        scaffolds_oo = dict(zip(scaffolds, signs))
        tour = self.assign_order()
        tour = [(x, scaffolds_oo[x]) for x in tour]

        i = 0
        best_tour, best_fitness = None, None
        while True:   # Multiple EC rounds due to orientation fixes
            logging.debug("Start EC round {0}".format(i))
            scaffolds_oo = dict(tour)
            scfs, tour, ww = self.prepare_ec(scaffolds, tour, weights)
            toolbox = GA_setup(tour)
            toolbox.register("evaluate", colinear_evaluate_multi,
                                         scfs=scfs, weights=ww)
            tour, fitness = GA_run(toolbox, ngen=ngen, npop=npop, cpus=cpus)
            tour = [scaffolds[x] for x in tour]
            tour = [(x, scaffolds_oo[x]) for x in tour]
            if best_fitness and fitness <= best_fitness:
                logging.debug("No fitness improvement: {0}. Exit EC.".\
                              format(best_fitness))
                break
            best_tour, best_fitness = tour, fitness
            logging.debug("Current best fitness: {0}".format(best_fitness))
            tour = self.fix_orientation(tour)
            i += 1

        tour = best_tour
        recode = {0: '?', 1: '+', -1: '-'}
        tour = [(x, recode[o]) for x, o in tour]
        self.tour = tour

        for mlg in self.lgs:
            mapname, lg = mlg.rsplit("-", 1)
            if mapname == pivot:
                self.object = "chr{0}".format(lg)
                break
Exemple #4
0
def layout(args):
    """
    %prog layout query.subject.simple query.seqids subject.seqids

    Compute optimal seqids order in a second genome, based on seqids on one
    genome, given the pairwise blocks in .simple format.
    """
    from jcvi.algorithms.ec import GA_setup, GA_run

    p = OptionParser(layout.__doc__)
    p.set_beds()
    p.set_cpus(cpus=32)
    opts, args = p.parse_args(args)

    if len(args) != 3:
        sys.exit(not p.print_help())

    simplefile, qseqids, sseqids = args
    qbed, sbed, qorder, sorder, is_self = check_beds(simplefile, p, opts)

    qseqids = qseqids.strip().split(",")
    sseqids = sseqids.strip().split(",")
    qseqids_ii = dict((s, i) for i, s in enumerate(qseqids))
    sseqids_ii = dict((s, i) for i, s in enumerate(sseqids))

    blocks = SimpleFile(simplefile).blocks
    scores = defaultdict(int)
    for a, b, c, d, score, orientation, hl in blocks:
        qi, q = qorder[a]
        si, s = sorder[c]
        qseqid, sseqid = q.seqid, s.seqid
        if sseqid not in sseqids:
            continue
        scores[sseqids_ii[sseqid], qseqid] += score

    data = []
    for (a, b), score in sorted(scores.items()):
        if b not in qseqids_ii:
            continue
        data.append((qseqids_ii[b], score))

    tour = range(len(qseqids))
    toolbox = GA_setup(tour)
    toolbox.register("evaluate", colinear_evaluate_weights, data=data)
    tour, fitness = GA_run(toolbox, ngen=100, npop=100, cpus=opts.cpus)
    tour = [qseqids[x] for x in tour]

    print ",".join(tour)
Exemple #5
0
def layout(args):
    """
    %prog layout query.subject.simple query.seqids subject.seqids

    Compute optimal seqids order in a second genome, based on seqids on one
    genome, given the pairwise blocks in .simple format.
    """
    from jcvi.algorithms.ec import GA_setup, GA_run

    p = OptionParser(layout.__doc__)
    p.set_beds()
    p.set_cpus(cpus=32)
    opts, args = p.parse_args(args)

    if len(args) != 3:
        sys.exit(not p.print_help())

    simplefile, qseqids, sseqids = args
    qbed, sbed, qorder, sorder, is_self = check_beds(simplefile, p, opts)

    qseqids = qseqids.strip().split(",")
    sseqids = sseqids.strip().split(",")
    qseqids_ii = dict((s, i) for i, s in enumerate(qseqids))
    sseqids_ii = dict((s, i) for i, s in enumerate(sseqids))

    blocks = SimpleFile(simplefile).blocks
    scores = defaultdict(int)
    for a, b, c, d, score, orientation, hl in blocks:
        qi, q = qorder[a]
        si, s = sorder[c]
        qseqid, sseqid = q.seqid, s.seqid
        if sseqid not in sseqids:
            continue
        scores[sseqids_ii[sseqid], qseqid] += score

    data = []
    for (a, b), score in sorted(scores.items()):
        if b not in qseqids_ii:
            continue
        data.append((qseqids_ii[b], score))

    tour = range(len(qseqids))
    toolbox = GA_setup(tour)
    toolbox.register("evaluate", colinear_evaluate_weights, data=data)
    tour, fitness = GA_run(toolbox, ngen=100, npop=100, cpus=opts.cpus)
    tour = [qseqids[x] for x in tour]

    print ",".join(tour)
Exemple #6
0
def test_ec(input_array, expected):
    from jcvi.algorithms.ec import (
        GA_setup,
        GA_run,
        colinear_evaluate,
        creator,
        make_data,
    )

    POINTS, SCF = 200, 20
    scaffolds = make_data(POINTS, SCF)
    toolbox = GA_setup(input_array)
    toolbox.register("evaluate", colinear_evaluate, scaffolds=scaffolds)
    tour, tour.fitness = GA_run(toolbox, cpus=8)
    print(tour, tour.fitness)

    assert list(tour) == expected
    assert tour.fitness == creator.FitnessMax((200.0, ))
Exemple #7
0
def score(args):
    """
    %prog score main_results/ cached_data/ contigsfasta

    Score the current LACHESIS CLM.
    """
    p = OptionParser(score.__doc__)
    p.set_cpus()
    opts, args = p.parse_args(args)

    if len(args) != 3:
        sys.exit(not p.print_help())

    mdir, cdir, contigsfasta = args
    orderingfiles = natsorted(iglob(mdir, "*.ordering"))
    sizes = Sizes(contigsfasta)
    contig_names = list(sizes.iter_names())
    contig_ids = dict((name, i) for (i, name) in enumerate(contig_names))

    oo = []
    # Load contact matrix
    glm = op.join(cdir, "all.GLM")
    N = len(contig_ids)
    M = np.zeros((N, N), dtype=int)
    fp = open(glm)
    for row in fp:
        if row[0] == '#':
            continue
        x, y, z = row.split()
        if x == 'X':
            continue
        M[int(x), int(y)] = int(z)

    fwtour = open("tour", "w")

    def callback(tour, gen, oo):
        fitness = tour.fitness if hasattr(tour, "fitness") else None
        label = "GA-{0}".format(gen)
        if fitness:
            fitness = "{0}".format(fitness).split(",")[0].replace("(", "")
            label += "-" + fitness
        print_tour(fwtour, tour, label, contig_names, oo)
        return tour

    for ofile in orderingfiles:
        co = ContigOrdering(ofile)
        for x in co:
            contig_id = contig_ids[x.contig_name]
            oo.append(contig_id)
        pf = op.basename(ofile).split(".")[0]
        print pf
        print oo

        tour, tour_sizes, tour_M = prepare_ec(oo, sizes, M)
        # Store INIT tour
        print_tour(fwtour, tour, "INIT", contig_names, oo)

        # Faster Cython version for evaluation
        from .chic import score_evaluate_M
        callbacki = partial(callback, oo=oo)
        toolbox = GA_setup(tour)
        toolbox.register("evaluate",
                         score_evaluate_M,
                         tour_sizes=tour_sizes,
                         tour_M=tour_M)
        tour, tour.fitness = GA_run(toolbox,
                                    npop=100,
                                    cpus=opts.cpus,
                                    callback=callbacki)
        print tour, tour.fitness
        break

    fwtour.close()
Exemple #8
0
def score(args):
    """
    %prog score main_results/ cached_data/ contigsfasta

    Score the current LACHESIS CLM.
    """
    p = OptionParser(score.__doc__)
    p.set_cpus()
    opts, args = p.parse_args(args)

    if len(args) != 3:
        sys.exit(not p.print_help())

    mdir, cdir, contigsfasta = args
    orderingfiles = natsorted(iglob(mdir, "*.ordering"))
    sizes = Sizes(contigsfasta)
    contig_names = list(sizes.iter_names())
    contig_ids = dict((name, i) for (i, name) in enumerate(contig_names))

    oo = []
    # Load contact matrix
    glm = op.join(cdir, "all.GLM")
    N = len(contig_ids)
    M = np.zeros((N, N), dtype=int)
    fp = open(glm)
    for row in fp:
        if row[0] == '#':
            continue
        x, y, z = row.split()
        if x == 'X':
            continue
        M[int(x), int(y)] = int(z)

    fwtour = open("tour", "w")

    def callback(tour, gen, oo):
        fitness = tour.fitness if hasattr(tour, "fitness") else None
        label = "GA-{0}".format(gen)
        if fitness:
            fitness = "{0}".format(fitness).split(",")[0].replace("(", "")
            label += "-" + fitness
        print_tour(fwtour, tour, label, contig_names, oo)
        return tour

    for ofile in orderingfiles:
        co = ContigOrdering(ofile)
        for x in co:
            contig_id = contig_ids[x.contig_name]
            oo.append(contig_id)
        pf = op.basename(ofile).split(".")[0]
        print pf
        print oo

        tour, tour_sizes, tour_M = prepare_ec(oo, sizes, M)
        # Store INIT tour
        print_tour(fwtour, tour, "INIT", contig_names, oo)

        # Faster Cython version for evaluation
        from .chic import score_evaluate_M
        callbacki = partial(callback, oo=oo)
        toolbox = GA_setup(tour)
        toolbox.register("evaluate", score_evaluate_M,
                         tour_sizes=tour_sizes, tour_M=tour_M)
        tour, tour.fitness = GA_run(toolbox, npop=100, cpus=opts.cpus,
                                    callback=callbacki)
        print tour, tour.fitness
        break

    fwtour.close()