Python commit Examples

Programming Language: Python

Namespace/Package Name: nprlib.db

Method/Function: commit

Examples at hotexamples.com: 4

Python commit - 4 examples found. These are the top rated real world Python examples of nprlib.db.commit extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: scheduler.py Project: jhcepas/npr

def control_c(_signal, _frame):    
    signal.signal(signal.SIGINT, signal.SIG_IGN)
    db.commit()
    
    ver = {28: "0", 26: "1", 24: "2", 22: "3", 20: "4", 10: "5"}
    ver_level = log.level
    
    print '\n\nYou pressed Ctrl+C!'
    print 'q) quit'
    print 'v) change verbosity level:', ver.get(ver_level, ver_level)
    print 'd) enter debug mode'
    print 'c) continue execution'
    key = ask("   Choose:", ["q", "v", "d", "c"])
    if key == "q":
        raise KeyboardInterrupt
    elif key == "d":
        signal.signal(signal.SIGALRM, debug)
        signal.alarm(1)
        return
    elif key == "v":
        vl = ask("new level", sorted(ver.values()))
        new_level = sorted(ver.keys(), reverse=True)[int(vl)]
        log.setLevel(new_level)
    elif key == "d":
        import pdb
        pdb.set_trace()
    signal.signal(signal.SIGINT, control_c)

Example #2

Show file

File: supermatrix.py Project: jhcepas/npr

def process_task(task, wkname, npr_conf, nodeid2info):
    cogconf, cogclass = npr_conf.cog_selector
    concatconf, concatclass = npr_conf.alg_concatenator
    treebuilderconf, treebuilderclass = npr_conf.tree_builder
    splitterconf, splitterclass = npr_conf.tree_splitter
    
    threadid, nodeid, seqtype, ttype = (task.threadid, task.nodeid,
                                        task.seqtype, task.ttype)
    cladeid, targets, outgroups = db.get_node_info(threadid, nodeid)

    if not treebuilderclass or task.size < 4:
        # Allows to dump algs in workflows with no tree tasks or if tree
        # inference does not make sense given the number of sequences. DummyTree
        # will produce a fake fully collapsed newick tree.
        treebuilderclass = DummyTree
    
    if outgroups and len(outgroups) > 1:
        constrain_id = nodeid
    else:
        constrain_id = None
        
    node_info = nodeid2info[nodeid]
    conf = GLOBALS[task.configid]
    new_tasks = []    
    if ttype == "cog_selector":
       
        # Generates a md5 id based on the genetree configuration workflow used
        # for the concat alg task. If something changes, concat alg will change
        # and the associated tree will be rebuilt
        config_blocks = set([wkname])
        for key, value in conf[wkname].iteritems():
            if isinstance(value, list) or  isinstance(value, tuple) \
                    or isinstance(value, set):
                for elem in value:
                    config_blocks.add(elem[1:]) if isinstance(elem, str) and elem.startswith("@") else None
            elif isinstance(value, str):
                config_blocks.add(value[1:]) if value.startswith("@") else None
        config_checksum =  md5(''.join(["[%s]\n%s" %(x, dict_string(conf[x]))
                                        for x in sorted(config_blocks)]))

        # THIS PART HAS BEEN MOVED TO COG_SELECTOR TASK
        # Check that current selection of cogs will cover all target and
        # outgroup species
        #cog_hard_limit = int(conf[concatconf]["_max_cogs"])
        #sp_repr = defaultdict(int)
        #for co in task.raw_cogs[:cog_hard_limit]:
        #    for sp, seq in co:
        #        sp_repr[sp] += 1
        #missing_sp = (targets | outgroups) - set(sp_repr.keys())
        #if missing_sp:
        #    raise TaskError("missing species under current cog selection: %s" %missing_sp)
        #else:
        #    log.log(28, "Analysis of current COG selection:")
        #    for sp, ncogs in sorted(sp_repr.items(), key=lambda x:x[1]):
        #        log.log(28, "   % 30s species present in % 6d COGs" %(sp, ncogs))
                
        # register concat alignment task. NodeId associated to concat_alg tasks
        # and all its children jobs should take into account cog information and
        # not only species and outgroups included.
        
        concat_job = concatclass(task.cogs, seqtype, conf, concatconf,
                                 config_checksum)
        db.add_node(threadid,
                    concat_job.nodeid, cladeid,
                    targets, outgroups)

        # Register Tree constrains
        constrain_tree = "(%s, (%s));" %(','.join(sorted(outgroups)), 
                                         ','.join(sorted(targets)))
        _outs = "\n".join(map(lambda name: ">%s\n0" %name, sorted(outgroups)))
        _tars = "\n".join(map(lambda name: ">%s\n1" %name, sorted(targets)))
        constrain_alg = '\n'.join([_outs, _tars])
        db.add_task_data(concat_job.nodeid, DATATYPES.constrain_tree, constrain_tree)
        db.add_task_data(concat_job.nodeid, DATATYPES.constrain_alg, constrain_alg)
        db.dataconn.commit() # since the creation of some Task objects
                             # may require this info, I need to commit
                             # right now.
        concat_job.size = task.size
        new_tasks.append(concat_job)
       
    elif ttype == "concat_alg":
        # register tree for concat alignment, using constraint tree if
        # necessary
        alg_id = db.get_dataid(task.taskid, DATATYPES.concat_alg_phylip)
        try:
            parts_id = db.get_dataid(task.taskid, DATATYPES.model_partitions)
        except ValueError:
            parts_id = None

        nodeid2info[nodeid]["size"] = task.size
        nodeid2info[nodeid]["target_seqs"] = targets
        nodeid2info[nodeid]["out_seqs"] = outgroups
        tree_task = treebuilderclass(nodeid, alg_id,
                                     constrain_id, None,
                                     task.seqtype, conf, treebuilderconf,
                                     parts_id=parts_id)
        tree_task.size = task.size
        new_tasks.append(tree_task)
        
    elif ttype == "tree":
        merger_task = splitterclass(nodeid, seqtype, task.tree_file, conf, splitterconf)
        merger_task.size = task.size
        new_tasks.append(merger_task)

    elif ttype == "treemerger":
        # Lets merge with main tree
        if not task.task_tree:
            task.finish()

        log.log(24, "Saving task tree...")
        annotate_node(task.task_tree, task)
        db.update_node(nid=task.nodeid, runid=task.threadid,
                       newick=db.encode(task.task_tree))
        db.commit()

        if not isinstance(treebuilderclass, DummyTree) and npr_conf.max_iters > 1:
            current_iter = get_iternumber(threadid)
            if npr_conf.max_iters and current_iter >= npr_conf.max_iters:
                log.warning("Maximum number of iterations reached!")
            else:
                # Add new nodes
                source_seqtype = "aa" if "aa" in GLOBALS["seqtypes"] else "nt"
                ttree, mtree = task.task_tree, task.main_tree

                log.log(26, "Processing tree: %s seqs, %s outgroups",
                        len(targets), len(outgroups))

                target_cladeids = None
                if tobool(conf[splitterconf].get("_find_ncbi_targets", False)):
                    tcopy = mtree.copy()
                    ncbi.connect_database()
                    tax2name, tax2track = ncbi.annotate_tree_with_taxa(tcopy, None)
                    #tax2name, tax2track = ncbi.annotate_tree_with_taxa(tcopy, "fake") # for testing sptree example
                    n2content = tcopy.get_cached_content()
                    broken_branches, broken_clades, broken_clade_sizes, tax2name = ncbi.get_broken_branches(tcopy, n2content)
                    log.log(28, 'restricting NPR to broken clades: '+
                            colorify(', '.join(map(lambda x: "%s"%tax2name[x], broken_clades)), "wr"))
                    target_cladeids = set()
                    for branch in broken_branches:
                        print branch.get_ascii(attributes=['spname', 'taxid'], compact=True)
                        print map(lambda x: "%s"%tax2name[x], broken_branches[branch])
                        target_cladeids.add(branch.cladeid)

                for node, seqs, outs, wkname in get_next_npr_node(task.configid, ttree,
                                                          task.out_seqs, mtree, None,
                                                          npr_conf, target_cladeids): # None is to avoid alg checks
                    log.log(24, "Adding new node: %s seqs, %s outgroups",
                            len(seqs), len(outs))
                    new_task_node = cogclass(seqs, outs,
                                             source_seqtype, conf, cogconf)
                    new_task_node.target_wkname = wkname
                    new_tasks.append(new_task_node)
                    db.add_node(threadid,
                                new_task_node.nodeid, new_task_node.cladeid,
                                new_task_node.targets,
                                new_task_node.outgroups)
    return new_tasks

Example #3

Show file

File: genetree.py Project: jhcepas/npr

            #else:
            #    treemerge_task = TreeSplitter(nodeid, seqtype, task.tree_file, main_tree, conf)

        treemerge_task.size = task.size
        new_tasks.append(treemerge_task)

    elif ttype == "treemerger":
        if not task.task_tree:
            task.finish()

        log.log(24, "Saving task tree...")
        annotate_node(task.task_tree, task) 
        db.update_node(nid=task.nodeid, 
                       runid=task.threadid,
                       newick=db.encode(task.task_tree))
        db.commit()
        
        if not isinstance(treebuilderclass, DummyTree) and npr_conf.max_iters > 1:
            current_iter = get_iternumber(threadid)
            if npr_conf.max_iters and current_iter >= npr_conf.max_iters:
                log.warning("Maximum number of iterations reached!")
            else:
                # Add new nodes
                source_seqtype = "aa" if "aa" in GLOBALS["seqtypes"] else "nt"
                ttree, mtree = task.task_tree, task.main_tree
                log.log(26, "Processing tree: %s seqs, %s outgroups",
                        len(target_seqs), len(out_seqs))
                alg_path = node_info.get("clean_alg_path", node_info["alg_path"])
                for node, seqs, outs, wkname in get_next_npr_node(threadid, ttree,
                                                          task.out_seqs, mtree,
                                                          alg_path, npr_conf):

Example #4

Show file

File: scheduler.py Project: jhcepas/npr

def schedule(workflow_task_processor, pending_tasks, schedule_time, execution, debug, norender):    
    # Adjust debug mode
    if debug == "all":
        log.setLevel(10)
    pending_tasks = set(pending_tasks)
    
    ## ===================================
    ## INITIALIZE BASIC VARS 
    execution, run_detached = execution
    thread2tasks = defaultdict(list)
    for task in pending_tasks:
        thread2tasks[task.configid].append(task)
    expected_threads = set(thread2tasks.keys())
    past_threads = {}
    thread_errors = defaultdict(list)
    ## END OF VARS AND SHORTCUTS
    ## ===================================

    cores_total = GLOBALS["_max_cores"]
    if cores_total > 0:
        job_queue = Queue()
        
        back_launcher = Process(target=background_job_launcher,
                                args=(job_queue, run_detached,
                                      GLOBALS["launch_time"], cores_total))
        back_launcher.start()
    else:
        job_queue = None
        back_launcher = None

    GLOBALS["_background_scheduler"] = back_launcher
    GLOBALS["_job_queue"] = job_queue

        
    # Captures Ctrl-C for debuging DEBUG 
    #signal.signal(signal.SIGINT, control_c)
    

    
    last_report_time = None
    
    BUG = set()
    try:
        # Enters into task scheduling
        while pending_tasks:
            wtime = schedule_time

            # ask SGE for running jobs
            if execution == "sge":
                sgeid2jobs = db.get_sge_tasks()
                qstat_jobs = sge.qstat()
            else:
                qstat_jobs = None

            # Show summary of pending tasks per thread
            thread2tasks = defaultdict(list)
            for task in pending_tasks:
                thread2tasks[task.configid].append(task)
            set_logindent(0)
            log.log(28, "@@13: Updating tasks status:@@1: (%s)" % (ctime()))
            info_lines = []
            for tid, tlist in thread2tasks.iteritems():
                threadname = GLOBALS[tid]["_name"]
                sizelist = ["%s" %getattr(_ts, "size", "?") for _ts in tlist]
                info = "Thread @@13:%s@@1:: pending tasks: @@8:%s@@1: of sizes: %s" %(
                    threadname, len(tlist), ', '.join(sizelist))
                info_lines.append(info)

            for line in info_lines:
                log.log(28, line)

            if GLOBALS["email"]  and last_report_time is None:
                last_report_time = time()
                send_mail(GLOBALS["email"], "Your NPR process has started", '\n'.join(info_lines))

            ## ================================
            ## CHECK AND UPDATE CURRENT TASKS
            checked_tasks = set()
            check_start_time = time()
            to_add_tasks = set()

            GLOBALS["cached_status"] = {}
            for task in sorted(pending_tasks, sort_tasks):
                # Avoids endless periods without new job submissions
                elapsed_time = time() - check_start_time
                #if not back_launcher and pending_tasks and \
                #        elapsed_time > schedule_time * 2:
                #    log.log(26, "@@8:Interrupting task checks to schedule new jobs@@1:")
                #    db.commit()
                #    wtime = launch_jobs(sorted(pending_tasks, sort_tasks),
                #                        execution, run_detached)
                #    check_start_time = time()

                # Enter debuging mode if necessary
                if debug and log.level > 10 and task.taskid.startswith(debug):
                    log.setLevel(10) 
                    log.debug("ENTERING IN DEBUGGING MODE")
                thread2tasks[task.configid].append(task)

                # Update tasks and job statuses

                if task.taskid not in checked_tasks:
                    try:
                        show_task_info(task)
                        task.status = task.get_status(qstat_jobs)
                        db.dataconn.commit()
                        if back_launcher and task.status not in set("DE"):
                            for j, cmd in task.iter_waiting_jobs():
                                j.status = "Q"
                                GLOBALS["cached_status"][j.jobid] = "Q"
                                if j.jobid not in BUG:
                                    if not os.path.exists(j.jobdir):
                                        os.makedirs(j.jobdir)
                                    for ifile, outpath in j.input_files.iteritems():
                                        try:
                                            _tid, _did = ifile.split(".")
                                            _did = int(_did)
                                        except (IndexError, ValueError): 
                                            dataid = ifile
                                        else:
                                            dataid = db.get_dataid(_tid, _did)

                                        if not outpath:
                                            outfile = pjoin(GLOBALS["input_dir"], ifile)
                                        else:
                                            outfile = pjoin(outpath, ifile)

                                        if not os.path.exists(outfile): 
                                            open(outfile, "w").write(db.get_data(dataid))

                                    log.log(24, "  @@8:Queueing @@1: %s from %s" %(j, task))
                                    job_queue.put([j.jobid, j.cores, cmd, j.status_file])
                                BUG.add(j.jobid)

                        update_task_states_recursively(task)
                        db.commit()
                        checked_tasks.add(task.taskid)
                    except TaskError, e:
                        log.error("Errors found in %s" %task)
                        import traceback
                        traceback.print_exc()
                        if GLOBALS["email"]:
                            threadname = GLOBALS[task.configid]["_name"]
                            send_mail(GLOBALS["email"], "Errors found in %s!" %threadname,
                                      '\n'.join(map(str, [task, e.value, e.msg])))
                        pending_tasks.discard(task)
                        thread_errors[task.configid].append([task, e.value, e.msg])
                        continue
                else:
                    # Set temporary Queued state to avoids launching
                    # jobs from clones
                    task.status = "Q" 
                    if log.level < 24:
                        show_task_info(task)

                if task.status == "D":
                    #db.commit()
                    show_task_info(task)
                    logindent(3)


                    # Log commands of every task
                    if 'cmd_log_file' not in GLOBALS[task.configid]:
                         GLOBALS[task.configid]['cmd_log_file'] = pjoin(GLOBALS[task.configid]["_outpath"], "cmd.log")
                         O = open(GLOBALS[task.configid]['cmd_log_file'], "w")
                         O.close()

                    cmd_lines =  get_cmd_log(task)
                    CMD_LOG = open(GLOBALS[task.configid]['cmd_log_file'], "a")
                    print >>CMD_LOG, task
                    for c in cmd_lines:
                        print >>CMD_LOG, '   '+'\t'.join(map(str, c))
                    CMD_LOG.close()
                    # 

                    try:
                        #wkname = GLOBALS[task.configid]['_name']
                        create_tasks = workflow_task_processor(task, task.target_wkname)
                    except TaskError, e:
                        log.error("Errors found in %s" %task)
                        pending_tasks.discard(task)
                        thread_errors[task.configid].append([task, e.value, e.msg])
                        continue
                    else: 
                        logindent(-3)

                        to_add_tasks.update(create_tasks)
                        pending_tasks.discard(task)

                elif task.status == "E":
                    log.error("task contains errors: %s " %task)
                    log.error("Errors found in %s")
                    pending_tasks.discard(task)
                    thread_errors[task.configid].append([task, None, "Found (E) task status"])