Python setproctitle Examples

Programming Language: Python

Namespace/Package Name: MEDRank.utility.proctitle

Method/Function: setproctitle

Examples at hotexamples.com: 5

Python setproctitle - 5 examples found. These are the top rated real world Python examples of MEDRank.utility.proctitle.setproctitle extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: process.py Project: YZWD/MEDRank

def processor(workflow_class,
              graph_builder_constructor, graph_builder_params,
              ranker_constructor, ranker_params,
              eval_parameters, 
              ranking_cutoff,
              mesh_tree_filename, distance_matrix_filename,
              distance_function,
              umls_converter_data_filename,
              extra_data_name,
              extra_data_contents,
              my_input_queue, my_output_queue,
              my_own_name=None):
    logging.info("Setting up worker.")
    if my_own_name is not None:
        proctitle.setproctitle(my_own_name)

    my_workflow=workflow_class(graph_builder_constructor,
                               graph_builder_params,
                               ranker_constructor,
                               ranker_params,
                               eval_parameters,
                               ranking_cutoff,
                               mesh_tree_filename,
                               distance_matrix_filename,
                               distance_function,
                               umls_converter_data_filename
                               )
    if extra_data_name is not None:
        my_workflow.__setattr__(extra_data_name, extra_data_contents)
    logging.info("Finished setting up worker process. Waiting for requests.")
    try:
        while True:
            request=my_input_queue.get()
            logging.log(ULTRADEBUG, "Processing request %r", request)
            if request=='STOP':
                logging.log(ULTRADEBUG, "Received stop request.")
                break
            try:
                my_workflow.process_article(request)
                # Recover the article, push it on the output queue
                my_output_queue.put(my_workflow.all_results)
                # Clear the output queue
                my_workflow.all_results={}
            except CouldNotRank:
                #my_input_queue.put(request) # On error, push the task
                                            # back into the queue
                logging.info("Skipping unrankable article.")
            except:
                logging.warn("EXCEPTION RAISED: \n%s", 
                             traceback.format_exc())
                raise
    finally:
        logging.log(ULTRADEBUG, "Returning results to caller.")
        logging.log(ULTRADEBUG, "Ending processor execution.")
    return

Example #2

Show file

File: process.py Project: nhsland/MEDRank

def processor(workflow_class,
              graph_builder_constructor,
              graph_builder_params,
              ranker_constructor,
              ranker_params,
              eval_parameters,
              ranking_cutoff,
              mesh_tree_filename,
              distance_matrix_filename,
              distance_function,
              umls_converter_data_filename,
              extra_data_name,
              extra_data_contents,
              my_input_queue,
              my_output_queue,
              my_own_name=None):
    logging.info("Setting up worker.")
    if my_own_name is not None:
        proctitle.setproctitle(my_own_name)

    my_workflow = workflow_class(graph_builder_constructor,
                                 graph_builder_params, ranker_constructor,
                                 ranker_params, eval_parameters,
                                 ranking_cutoff, mesh_tree_filename,
                                 distance_matrix_filename, distance_function,
                                 umls_converter_data_filename)
    if extra_data_name is not None:
        my_workflow.__setattr__(extra_data_name, extra_data_contents)
    logging.info("Finished setting up worker process. Waiting for requests.")
    try:
        while True:
            request = my_input_queue.get()
            logging.log(ULTRADEBUG, "Processing request %r", request)
            if request == 'STOP':
                logging.log(ULTRADEBUG, "Received stop request.")
                break
            try:
                my_workflow.process_article(request)
                # Recover the article, push it on the output queue
                my_output_queue.put(my_workflow.all_results)
                # Clear the output queue
                my_workflow.all_results = {}
            except CouldNotRank:
                #my_input_queue.put(request) # On error, push the task
                # back into the queue
                logging.info("Skipping unrankable article.")
            except:
                logging.warn("EXCEPTION RAISED: \n%s", traceback.format_exc())
                raise
    finally:
        logging.log(ULTRADEBUG, "Returning results to caller.")
        logging.log(ULTRADEBUG, "Ending processor execution.")
    return

Example #3

Show file

File: multiprocessing_workflow.py Project: YZWD/MEDRank

def multi_processor(reader,
                    workflow_class,
                    graph_builder_constructor, graph_builder_params,
                    ranker_constructor, ranker_params,
                    eval_parameters, 
                    ranking_cutoff,
                    mesh_tree_filename, distance_matrix_filename,
                    distance_function,
                    umls_converter_data_filename,
                    umls_concept_data_filename,
                    extra_data_name,
                    extra_data_contents,
                    output_file,
                    num_processes=None,
                    queue_size=None,
                    output_callback=output,
                    output_headers_callback=output_headers,
                    output_item_callback=output_one_item,
                    performance_tuning=True):
    """
    Perform the evaluation.
    Multiprocessing notes: It's the responsibility of the caller to make sure that
    extra_data_contents, if any, are multiprocessing-safe. For example, by using
    a SyncManager and Namespace and passing the proxy. See umls/concept for an example.
    """
    
    if num_processes is None:
        num_processes=cpu_count()

    if performance_tuning:
        # Since reading the file involves an awful lot of object creation 
        # and destruction we'll tweak the gc adjustments to sweep less frequently
        # IOW - we have a LOT of short-lived objects. No sense garbage-collecting
        # the latter generations very often.    
        # (this is about 10x, 5x, and 5x the usual)
        original_threshold=gc.get_threshold()
        gc.set_threshold(10 * original_threshold[0], 
                         5 * original_threshold[1],
                         5 * original_threshold[1]) 
        original_check_interval=sys.getcheckinterval()
        # Similarly, we'll try to minimize overhead from thread switches
        # 5x usual value
        sys.setcheckinterval(5*original_check_interval)
    logging.debug("Initializing Concept storage from %s", 
                  umls_concept_data_filename)
                  
    if umls_concept_data_filename is None:
        Concept.init_storage()
    else:
        Concept.init_storage(StringDBDict(umls_concept_data_filename))
    Pmid.init_storage()

    proctitle.setproctitle("MEDRank-main")
    
    processes=[]
    logging.info("Creating %d worker processes.", num_processes)
    #task_queue=[JoinableQueue(queue_size) for x in xrange(num_processes)]
    task_queues=[Queue(queue_size) for x in xrange(num_processes)]
    this_output_queue=Queue(2*queue_size)

    # Create an output processor
    output_processor=Process(target=output_callback, 
                             args=(output_file, 
                                   this_output_queue,
                                   output_headers_callback,
                                   output_item_callback))
    output_processor.start()
    
    for i in xrange(num_processes):
        this_process=Process(target=processor, args=(workflow_class,
                                                graph_builder_constructor, 
                                                graph_builder_params,
                                                ranker_constructor, 
                                                ranker_params,
                                                eval_parameters, 
                                                ranking_cutoff,
                                                mesh_tree_filename,
                                                distance_matrix_filename,
                                                distance_function,
                                                umls_converter_data_filename,
                                                extra_data_name,
                                                extra_data_contents,
                                                task_queues[i],
                                                this_output_queue,
                                                "MEDRank-Worker-%d" % i),
                             name="MEDRank-Worker-%d" % i)
        logging.log(ULTRADEBUG, "Created process: %r", this_process)
        this_process.start()
        processes.append((this_process, this_output_queue, task_queues[i]))
    
    all_results={}
    count=0

    # Use a single dispatch queue for automagical load balancing
    # CHANGED - Now uses multiple queues to avoid starving due to waiting on semlocks
    for each_article in reader:
        count+=1
        #queues_and_sizes=[(task_queues[x].qsize(), x) 
        #                  for x in xrange(num_processes)]
        #queues_and_sizes.sort()
        #target_process=queues_and_sizes[0][1]
        # logging.info("Dispatching article %d: %r", count, each_article)
        target_process=(count-1) % num_processes
        #Lowest-loaded process first.
        logging.info("Dispatching article %d: %s to %s", 
                     count,
                     each_article.set_id,
                     processes[target_process][0].name)
        task_queues[target_process].put(each_article)
        #task_queue[target_process].put(each_article)
        #task_queue.put(each_article)
        #logging.info("The task queue is approximately %d items long.", 
        #             task_queue.qsize())

    logging.log(ULTRADEBUG, "Waiting for processing to end.")
    all_results={}

    alive_processes=[x for x in processes if x[0].is_alive()]
    remaining_processes=len(alive_processes)

    logging.info("There are %d processes (out of %d) still alive.", 
                 remaining_processes,
                 num_processes)
    for i in xrange(remaining_processes):
        alive_processes[i][2].put('STOP')
        alive_processes[i][2].close()
    logging.debug("Sent STOP requests. Notifying queue that no further "
                  "requests will come.")

    logging.info("All information sent to the processors.")

    # Back to normal
    if performance_tuning:
        gc.set_threshold(original_threshold[0],
                         original_threshold[1],
                         original_threshold[2])
        sys.setcheckinterval(original_check_interval)

    # Note end of output

    while len(processes)>0:
        a_process=processes.pop()
        # We join the process to wait for the end of the reading 
        a_process[0].join()
        # logging.log(ULTRADEBUG, "Fetching results from finished process.")
        # all_results.update(a_process[1].get()) # Add results to result pool
        # logging.log(ULTRADEBUG, "Received results.")
    logging.info("Finishing writing out results.")
    this_output_queue.put("STOP")
    output_processor.join()
    logging.info("Results written. Finishing multiprocessing.")
    return

Example #4

Show file

File: output.py Project: YZWD/MEDRank

def output(output_file, result_queue, headers_callback=output_headers, 
           item_callback=output_one_item, initial_result_set_size=100):
    """Actually dumps the result set to output. Override for easy output
    customization."""
    result_set={}
    proctitle.setproctitle("MEDRank-output-processor")
    stop_requested=False
    # Gather a few values
    logging.log(ULTRADEBUG, "Gathering values for initial analysis.")
    for i in xrange(initial_result_set_size):
        logging.log(ULTRADEBUG, "Getting results %d.", i)
        try:
            request=result_queue.get()
            if request=='STOP':
                stop_requested=True
                break
            result_set.update(request)
        except KeyboardInterrupt:
            return
        except:
            logging.warn("EXCEPTION RAISED: \n%s", traceback.format_exc())

    logging.log(ULTRADEBUG, "Values gathered. Computing columns.")
            
    column_names=set([])
    # Add the colnames to the csv
    if headers_callback is not None:
        for result in result_set.itervalues():
            column_names|=result.columns()
        # Create a writer
        column_names=['pmid'] + [x for x in column_names]
        headers_callback(output_file, column_names)
    logging.log(ULTRADEBUG, "Looping to get more results and output them.")
    while True:
        if not stop_requested:
            try:
                request=result_queue.get()
                if request=='STOP':
                    stop_requested=True
                else:
                    result_set.update(request)
            except KeyboardInterrupt:
                return
            except:
                logging.warn("EXCEPTION RAISED: \n%s", traceback.format_exc())
        if stop_requested and len(result_set)==0:
            break
        if len(result_set)==0:
            continue # It can happen! We might get no results, or an empty set.
        pmid=result_set.keys()[0]
        logging.log(ULTRADEBUG, "Output: article %r.", pmid)
        result=result_set[pmid]
        item_callback(output_file, pmid, result, column_names)
        del result_set[pmid]
    try:
        output_file.flush()
    except:
        logging.warn("The output file object does not support flushing.")
    try:
        os.fsync(output_file.fileno())
    except:
        logging.warn("Could not fsync the output file. Traceback follows.\n%s",
                     traceback.format_exc())
    return

Example #5

Show file

def multi_processor(reader,
                    workflow_class,
                    graph_builder_constructor,
                    graph_builder_params,
                    ranker_constructor,
                    ranker_params,
                    eval_parameters,
                    ranking_cutoff,
                    mesh_tree_filename,
                    distance_matrix_filename,
                    distance_function,
                    umls_converter_data_filename,
                    umls_concept_data_filename,
                    extra_data_name,
                    extra_data_contents,
                    output_file,
                    num_processes=None,
                    queue_size=None,
                    output_callback=output,
                    output_headers_callback=output_headers,
                    output_item_callback=output_one_item,
                    performance_tuning=True):
    """
    Perform the evaluation.
    Multiprocessing notes: It's the responsibility of the caller to make sure that
    extra_data_contents, if any, are multiprocessing-safe. For example, by using
    a SyncManager and Namespace and passing the proxy. See umls/concept for an example.
    """

    if num_processes is None:
        num_processes = cpu_count()

    if performance_tuning:
        # Since reading the file involves an awful lot of object creation
        # and destruction we'll tweak the gc adjustments to sweep less frequently
        # IOW - we have a LOT of short-lived objects. No sense garbage-collecting
        # the latter generations very often.
        # (this is about 10x, 5x, and 5x the usual)
        original_threshold = gc.get_threshold()
        gc.set_threshold(10 * original_threshold[0], 5 * original_threshold[1],
                         5 * original_threshold[1])
        original_check_interval = sys.getcheckinterval()
        # Similarly, we'll try to minimize overhead from thread switches
        # 5x usual value
        sys.setcheckinterval(5 * original_check_interval)
    logging.debug("Initializing Concept storage from %s",
                  umls_concept_data_filename)

    if umls_concept_data_filename is None:
        Concept.init_storage()
    else:
        Concept.init_storage(StringDBDict(umls_concept_data_filename))
    Pmid.init_storage()

    proctitle.setproctitle("MEDRank-main")

    processes = []
    logging.info("Creating %d worker processes.", num_processes)
    #task_queue=[JoinableQueue(queue_size) for x in xrange(num_processes)]
    task_queues = [Queue(queue_size) for x in xrange(num_processes)]
    this_output_queue = Queue(2 * queue_size)

    # Create an output processor
    output_processor = Process(target=output_callback,
                               args=(output_file, this_output_queue,
                                     output_headers_callback,
                                     output_item_callback))
    output_processor.start()

    for i in xrange(num_processes):
        this_process = Process(
            target=processor,
            args=(workflow_class, graph_builder_constructor,
                  graph_builder_params, ranker_constructor, ranker_params,
                  eval_parameters, ranking_cutoff, mesh_tree_filename,
                  distance_matrix_filename, distance_function,
                  umls_converter_data_filename, extra_data_name,
                  extra_data_contents, task_queues[i], this_output_queue,
                  "MEDRank-Worker-%d" % i),
            name="MEDRank-Worker-%d" % i)
        logging.log(ULTRADEBUG, "Created process: %r", this_process)
        this_process.start()
        processes.append((this_process, this_output_queue, task_queues[i]))

    all_results = {}
    count = 0

    # Use a single dispatch queue for automagical load balancing
    # CHANGED - Now uses multiple queues to avoid starving due to waiting on semlocks
    for each_article in reader:
        count += 1
        #queues_and_sizes=[(task_queues[x].qsize(), x)
        #                  for x in xrange(num_processes)]
        #queues_and_sizes.sort()
        #target_process=queues_and_sizes[0][1]
        # logging.info("Dispatching article %d: %r", count, each_article)
        target_process = (count - 1) % num_processes
        #Lowest-loaded process first.
        logging.info("Dispatching article %d: %s to %s", count,
                     each_article.set_id, processes[target_process][0].name)
        task_queues[target_process].put(each_article)
        #task_queue[target_process].put(each_article)
        #task_queue.put(each_article)
        #logging.info("The task queue is approximately %d items long.",
        #             task_queue.qsize())

    logging.log(ULTRADEBUG, "Waiting for processing to end.")
    all_results = {}

    alive_processes = [x for x in processes if x[0].is_alive()]
    remaining_processes = len(alive_processes)

    logging.info("There are %d processes (out of %d) still alive.",
                 remaining_processes, num_processes)
    for i in xrange(remaining_processes):
        alive_processes[i][2].put('STOP')
        alive_processes[i][2].close()
    logging.debug("Sent STOP requests. Notifying queue that no further "
                  "requests will come.")

    logging.info("All information sent to the processors.")

    # Back to normal
    if performance_tuning:
        gc.set_threshold(original_threshold[0], original_threshold[1],
                         original_threshold[2])
        sys.setcheckinterval(original_check_interval)

    # Note end of output

    while len(processes) > 0:
        a_process = processes.pop()
        # We join the process to wait for the end of the reading
        a_process[0].join()
        # logging.log(ULTRADEBUG, "Fetching results from finished process.")
        # all_results.update(a_process[1].get()) # Add results to result pool
        # logging.log(ULTRADEBUG, "Received results.")
    logging.info("Finishing writing out results.")
    this_output_queue.put("STOP")
    output_processor.join()
    logging.info("Results written. Finishing multiprocessing.")
    return