def main_continuous(args):
    glbls = globals()
    engine = create_engine(args.engine)
    pymodels.Base.metadata.bind = engine
    pymodels.Session.configure(bind=engine)
    session = pymodels.Session()
    tasks = session.query(pymodels.Job).\
            options(joinedload("analysis"), joinedload("control"),
            joinedload("experiment")).filter(~pymodels.Job.complete).all()
    if len(tasks) == 0:
        LOGGER.warn("Nothing to do")
        return
    analysis_configs = {job.analysis for job in tasks}
    control_configs = {job.control for job in tasks}
    experiments = {job.experiment for job in tasks}
    preparations = {job.preparation for job in tasks}
    sampling = {job.sampling for job in tasks}
    projections = {job.projection for job in tasks}
    LOGGER.debug("%d analysis configurations", len(analysis_configs))
    LOGGER.debug("%d control configurations", len(control_configs))
    LOGGER.debug("%d experiments", len(experiments))
    LOGGER.debug("%d setup cases", len(preparations))
    LOGGER.debug("%d sampling methods", len(sampling))
    LOGGER.debug("%d network projections", len(projections))
    num_prep = len(analysis_configs) * len(control_configs) * len(experiments)\
            * len(preparations) * len(sampling) * len(projections)
    LOGGER.debug("%d total configurations", num_prep)
    LOGGER.info("Preparing Data")
    task_args = dict()
    bar = ProgressBar(maxval=num_prep, widgets=[Timer(), " ",
            SimpleProgress(), " ", Percentage(), " ", Bar(), " ",
            ETA()]).start()
    for anal in analysis_configs:
        LOGGER.debug(" %s:", anal.version)
        feature2node = pyorg.read_pickle(os.path.join(anal.objects, anal.map))
        for cntrl in control_configs:
            LOGGER.debug("  %s", cntrl.type)
            net = pyorg.read_pickle(os.path.join(anal.objects, cntrl.network))
            tu_net = pyreg.to_transcription_unit_based(net)
            op_net = pyreg.to_operon_based(net)
            for exp in experiments:
                LOGGER.debug("   %s", exp.strain)
                for prep in preparations:
                    LOGGER.debug("    %s", prep)
                    series = glbls[prep](session, exp)
                    for sampl in sampling:
                        LOGGER.debug("     %s", sampl)
                        for prj in projections:
                            LOGGER.debug("      %s", prj)
                            control = pyreg.ContinuousControl()
                            if prj == "tu":
                                control.setup(tu_net, series, feature2node, sampl)
                            elif prj == "operon":
                                control.setup(op_net, series, feature2node, sampl)
                            else:
                                control.setup(net, series, feature2node, sampl)
                            if cntrl.type == "analog":
                                control.from_gpn()
                            elif cntrl.type == "digital":
                                control.from_trn()
                            else:
                                raise ValueError("'{}'".format(cntrl.type))
                            task_args[(anal.id, cntrl.id, exp.id, prep, sampl,
                                prj)] = (control, series.columns)
                            bar += 1
    bar.finish()
    LOGGER.info("Running Jobs")
    tasks = [task_args[(job.analysis.id, job.control.id, job.experiment.id,
        job.preparation, job.sampling, job.projection)] + (job.measure,
        job.random_num, job.delay, job.id) for job in tasks]
    pool = multiprocessing.Pool(args.nproc)
    result_it = pool.imap_unordered(continuous_exec, tasks)
    bar = ProgressBar(maxval=len(tasks), widgets=[Timer(), " ",
            SimpleProgress(), " ", Percentage(), " ", Bar(), " ",
            ETA()]).start()
    for (job_id, z_scores, cntrl_scores, samples, points) in result_it:
        results = list()
        try:
            job = session.query(pymodels.Job).filter_by(id=job_id).one()
            for (i, name) in enumerate(points):
                res = pymodels.Result(control=cntrl_scores[i], ctc=z_scores[i],
                        point=name, job=job)
                session.add(res)
                results.append(res)
            job.complete = True
            session.commit()
        except Exception:
            session.rollback()
            bar += 1
            continue
        if job.selection > 0:
            try:
                for (i, res) in enumerate(results):
                    # use a more low-level insert for speed
                    session.execute(pymodels.RandomSample.__table__.insert(),
                            [{"control": val, "result_id": res.id}\
                            for val in np.random.choice(samples[i], job.selection,
                            replace=False)])
                session.commit()
            except Exception:
                session.rollback()
                bar += 1
                continue
        bar += 1
    bar.finish()
    session.close()
Ejemplo n.º 2
0
def main(remote_client, args):
    config = json.load(codecs.open(args.config, encoding=args.encoding, mode="rb"))
    if config["continuous"]:
        load_func = load_continuous
        table_key = "/Continuous"
        job_gen = continuous_jobs
        worker = continuous_worker
        result = continuous_result
    else:
        load_func = load_discrete
        table_key = "/Discrete"
        job_gen = discrete_jobs
        worker = discrete_worker
        result = discrete_result
    organism = pyorg.Organism(name=config["organism"])
    load_func(organism, config)
    LOGGER.info("Load data")
    glob_vars = globals()
    data = config["data"]
    network = config["network"]
    analysis = config["analysis"]
    namespace = dict()
    namespace["genes"] = dict()
    namespace["networks"] = dict()
    namespace["prepared"] = dict()
    for version in config["versions"]:
        LOGGER.info("{0:*^78s}".format(version))
        namespace["genes"][version] = pyorg.read_pickle(os.path.join(
                data["base"], version, data["gene_path"]))
        id2gene = pyorg.read_pickle(os.path.join(data["base"], version,
                data["mapping_path"]))
        namespace["networks"][version] = dict()
        for (cntrl_type, net_file, projections) in izip(analysis["control_types"],
                network["paths"], network["projections"]):
            net = pyorg.read_pickle(os.path.join(data["base"], version, net_file))
            namespace["networks"][version][cntrl_type] = dict()
            for basis in projections:
                if basis == "gene":
                    namespace["networks"][version][cntrl_type][basis] = net
                elif basis == "tu":
                    namespace["networks"][version][cntrl_type][basis] =\
                            pyreg.to_transcription_unit_based(net)
                elif basis == "operon":
                    namespace["networks"][version][cntrl_type][basis] =\
                            pyreg.to_operon_based(net)
        namespace["prepared"][version] = dict()
        for (cntrl_type, experiments, setups) in izip(analysis["control_types"],
                 analysis["experimental_sets"], analysis["experimental_setups"]):
            LOGGER.info("{0:*^78s}".format(cntrl_type))
            namespace["prepared"][version][cntrl_type] = dict()
            for (exp_name, exp_setup) in izip(experiments, setups):
                LOGGER.info("{0:*^78s}".format(exp_name))
                df = organism.activity[exp_name]
                setup_func = glob_vars[exp_setup]
                namespace["prepared"][version][cntrl_type][exp_name] =\
                        setup_func(cntrl_type, df, id2gene)
                if any(method.startswith("delayed") for method in chain(analysis["control"], *analysis["ctc"])):
                    namespace["prepared"][version][cntrl_type][exp_name]["delayed"] = dict()
                    for delta in analysis["delays"]:
                        delayed_continuous(namespace["prepared"][version][cntrl_type][exp_name],
                                delta)
                if any(ms_name.endswith("comparison") for ms_name in chain(*analysis["measures"])):
                    rate_continuous(namespace["prepared"][version][cntrl_type][exp_name])
    LOGGER.debug("\n".join(print_dict(namespace)))
    db = shelve.open(config["shelve"], protocol=pickle.HIGHEST_PROTOCOL)
    for (key, value) in namespace.iteritems():
        db[key] = value
    db.close()
    # general parallel setup using IPython.parallel
    LOGGER.info("Remote imports")
    d_view = remote_client.direct_view()
    d_view.execute("import numpy as np; "\
            "import shelve; import pickle;"\
            "import pyorganism as pyorg; import pyorganism.regulation as pyreg;"\
            "import logging; from IPython.config import Application;"\
            "LOGGER = Application.instance().log;"\
            "LOGGER.setLevel(logging.{level});".format(level=args.log_level),
            block=True)
    LOGGER.info("Transfer data")
#    d_view.push(namespace, block=True)
    d_view.execute("db = shelve.open('{shelve}', protocol=pickle.HIGHEST_PROTOCOL);"\
                    "globals().update(db);db.close()".format(shelve=config["shelve"]), block=True)
    LOGGER.info("Generate job descriptions")
    jobs = job_gen(organism, config, namespace)
    l_view = remote_client.load_balanced_view()
    bar = ProgressBar(maxval=len(jobs), widgets=[Timer(), " ", Percentage(),
            " ", Bar(), " ", ETA()]).start()
    result_mngr = ResultManager(config["output"], table_key)
    results_it = l_view.map(worker, jobs, ordered=False, block=False)
    for (spec, res_cntrl, res_ctc, samples) in results_it:
        LOGGER.debug(res_cntrl)
        LOGGER.debug(res_ctc)
        result(result_mngr, spec, res_cntrl, res_ctc, samples)
        bar += 1
    result_mngr.finalize()
    bar.finish()
    LOGGER.info("parallel speed-up was %.3g",
            results_it.serial_time / results_it.wall_time)