Example #1
0
def run_parallel_async(graph, nprocs=None, sleep=0.2, raise_errors=False):
    if nprocs == 1:
        return run_async(graph, sleep=sleep, raise_errors=raise_errors)

    nprocs = nprocs or mp.cpu_count() // 2

    with mp.Manager() as manager:
        graph = tgraph.create_parallel_compatible_graph(graph, manager)

        ioq = mp.Queue(len(graph.funcs.keys()))
        cpuq = mp.Queue(len(graph.funcs.keys()))

        procs = [mp.Process(target=run_scheduler,
                            args=(graph, sleep, ioq, cpuq, raise_errors))
                 for _ in range(nprocs)]
        for proc in procs:
            proc.start()

        while not tgraph.all_done(graph):
            for task in tgraph.get_ready_tasks(graph):
                graph = tgraph.mark_as_in_progress(graph, task)
                mlog(graph).info(
                    'pid {}: queueing task {}'.format(os.getpid(), task))
                if task in graph.io_bound:
                    ioq.put(task)
                else:
                    cpuq.put(task)

            time.sleep(sleep)

            if raise_errors and sum(not p.exitcode for p in procs):
                raise RuntimeError('An async task has failed. Please check your logs')

        return tgraph.recover_values_from_manager(graph)
Example #2
0
    def __init__(self,
                 num_of_process: int,
                 mapper: Callable,
                 reducer: Callable,
                 mapper_queue_size: int = 0,
                 reducer_queue_size: int = 0):
        self._mapper_queue = mp.Queue(maxsize=mapper_queue_size)
        self._reducer_queue = ChunkedQueue(maxsize=reducer_queue_size)
        self._result_queue = ChunkedQueue()
        self._mapper_cmd_queue = [mp.Queue() for _ in range(num_of_process)]
        self._reducer_cmd_queue = [mp.Queue() for _ in range(num_of_process)]
        self._manager_cmd_queue = mp.Queue()

        self._manager_process = mp.Process(target=self._run_manager)
        self._mapper_process = [
            mp.Process(target=self._run_mapper, args=(i, ))
            for i in range(num_of_process)
        ]
        self._reducer_process = [
            mp.Process(target=self._run_reducer, args=(i, ))
            for i in range(num_of_process)
        ]

        self._mapper = mapper
        self._reducer = reducer
        self._num_of_process = num_of_process
Example #3
0
def run_parallel_async(graph, nprocs=None, sleep=0.2):
    if nprocs == 1:
        return run_async(graph)

    nprocs = nprocs or mp.cpu_count() // 2

    with mp.Manager() as manager:
        graph = tgraph.create_parallel_compatible_graph(graph, manager)

        ioq = mp.Queue(len(graph.funcs.keys()))
        cpuq = mp.Queue(len(graph.funcs.keys()))

        for _ in range(nprocs):
            proc = mp.Process(target=run_scheduler,
                              args=(graph, sleep, ioq, cpuq))
            proc.start()

        while not tgraph.all_done(graph):
            for task in tgraph.get_ready_tasks(graph):
                graph = tgraph.mark_as_in_progress(graph, task)
                mlog(graph).info('pid {}: queueing task {}'.format(
                    os.getpid(), task))
                if task in graph.io_bound:
                    ioq.put(task)
                else:
                    cpuq.put(task)

            time.sleep(sleep)

        return tgraph.recover_values_from_manager(graph)
Example #4
0
def backtrack(next_choice_func, *, partial_checker=None, candidate_matcher=None,  intermediate_queue=None, solutions_queue=None, mailbox=None, discard=None):
    """next_choice_func should be a function that take a sequences and 
    returns any a list of all possible next items in that sequence.
    candidate_matcher should be a function that returns whether 
    Algorithm:
    Instantiate a queue.
    While it is not empty, pop all of its contents and put all the results
    of the next_choice_func back into the queue.
    Any results of the next_choice_func that match with the candidate_matcher 
    are put in a results queue.
    The algorithm has finished when the queue is empty.
    After that, the results queue is fed out.
    """
    # signal.signal(signal.SIGINT, signal.SIG_IGN)
    paused = False
    if intermediate_queue is None:
        q = multiprocessing.Queue()
    else:
        q = intermediate_queue

    if solutions_queue is None:
        solutions = multiprocessing.Queue()
    else:
        solutions = solutions_queue
    assert not candidate_matcher is None, "A function to match final solutions must be provided."

    while True:
        # quit()
        while not mailbox.empty():
            v = q.get()
            print("Received in inbox:", v)
            if v == 1:
                # return
                quit()
            elif v == 2:
                paused = True
            elif v == 3:
                paused = False
        if not paused:
            try:
                partial = q.get()
                # print(partial)
                # print("partial",partial)
                if candidate_matcher(partial):
                    # print(partial)
                    solutions.put(partial)
                for guess in next_choice_func(partial):
                    if partial_checker(guess):
                        q.put(guess)

                    else:
                        # print("BAD:",partial)
                        if discard:
                            discard.put(guess)
                        pass
                    # print(head)
            except queue.Empty:
                pass
Example #5
0
def run_parallel(graph, nprocs=None, sleep=0.2, raise_errors=False):
    nprocs = nprocs or mp.cpu_count() - 1
    with mp.Manager() as manager:
        graph = tgraph.create_parallel_compatible_graph(graph, manager)
        with mp.Pool(nprocs) as pool:

            exception_q = mp.Queue(10)

            def error_callback(exception):
                exception_q.put_nowait(exception)
                pool.terminate()

            while not tgraph.all_done(graph):
                for task in tgraph.get_ready_tasks(graph, reverse=False):
                    graph = tgraph.mark_as_in_progress(graph, task)
                    mlog(graph).info('pid {}: assigning task {}'.format(
                        os.getpid(), task))
                    pool.apply_async(run_task,
                                     args=(graph, task, raise_errors),
                                     error_callback=error_callback)
                time.sleep(sleep)

                if not exception_q.empty():
                    raise exception_q.get()

        return tgraph.recover_values_from_manager(graph)
Example #6
0
    def __init__(self, ds, nr_prefetch, nr_proc):
        """
        Args:
            ds (DataFlow): input DataFlow.
            nr_prefetch (int): size of the queue to hold prefetched datapoints.
            nr_proc (int): number of processes to use.
        """
        if os.name == 'nt':
            logger.warn("MultiProcessPrefetchData does support windows. \
However, windows requires more strict picklability on processes, which may \
lead of failure on some of the code.")
        super(MultiProcessPrefetchData, self).__init__(ds)
        try:
            self._size = ds.size()
        except NotImplementedError:
            self._size = -1
        self.nr_proc = nr_proc
        self.nr_prefetch = nr_prefetch

        if nr_proc > 1:
            logger.info("[MultiProcessPrefetchData] Will fork a dataflow more than one times. "
                        "This assumes the datapoints are i.i.d.")

        self.queue = mp.Queue(self.nr_prefetch)
        self.procs = [MultiProcessPrefetchData._Worker(self.ds, self.queue)
                      for _ in range(self.nr_proc)]
        ensure_proc_terminate(self.procs)
        start_proc_mask_signal(self.procs)
Example #7
0
 def __init__(self, port=None):
     super(mp.Process, self).__init__()
     self.queue = mp.Queue()
     self.pause_state = mp.Event()
     self.halt = mp.Event()
     self.idle = True
     self.port = port
     self.kill_process = mp.Event()
def main():

    enable = mp.Value('i', 0)
    imgQueue = mp.Queue(0)
    imgQueueBin = mp.Queue(0)

    while True:
        sign = receive()
        if sign == 1:
            print('connect')
            enable.value = 1
            mp.Process(target=motionControl,
                       args=(enable, imgQueue, imgQueueBin)).start()
            mp.Process(target=displayImage,
                       args=(enable, imgQueue, imgQueueBin)).start()
        elif sign == 2:
            print('disconnect')
            enable.value = 0
Example #9
0
def GetLoggings(logfile):
    terminating = multiprocess.Event()
    logger = logging.getLogger('')
    logger.setLevel(logging.DEBUG)
    logQueue = multiprocess.Queue(16)
    filehandler = MultiProcessingLogHandler(logging.FileHandler(logfile),
                                            logQueue)
    logger.addHandler(filehandler)
    filehandler.setLevel(logging.DEBUG)
    return (terminating, logger, logQueue)
Example #10
0
 def start(self):
     ''' Create tasks and results queues, and start consumers. '''
     mp.freeze_support()
     self.tasks = mp.JoinableQueue()
     self.results = mp.Queue()
     self.consumers = [
         Consumer(self.tasks, self.results)
         for i in range(self.getNConsumers())
     ]
     for c in self.consumers:
         c.start()
Example #11
0
 def start_break_timer(self, length):
     # Basically the same implementation as the timer for the pomodoro, but this one cannot be paused
     print('\nBreak started\n')
     current_length = multiprocess.Queue()
     break_process = multiprocess.Process(target=countdown, args=(length,
                                                                  current_length,
                                                                  self.sound_file))
     break_process.start()
     break_process.join()
     break_process.terminate()
     input('Press ENTER to start another pomodoro\r')
Example #12
0
def multithread():
    q = mp.Queue()  # thread可放入process同样的queue中
    t1 = td.Thread(target=job, args=(q, ))
    t2 = td.Thread(target=job, args=(q, ))
    t1.start()
    t2.start()
    t1.join()
    t2.join()
    res1 = q.get()
    res2 = q.get()
    print('multithread:', res1 + res2)
Example #13
0
def multicore():
    q = mp.Queue()
    p1 = mp.Process(target=job, args=(q, ))
    p2 = mp.Process(target=job, args=(q, ))
    p1.start()
    p2.start()
    p1.join()
    p2.join()
    res1 = q.get()
    res2 = q.get()
    print('multicore:', res1 + res2)
Example #14
0
def run_apple_script(cmd, timeout=300):
    """
    run apple script and return result. the script will run in a different process so if python crashes we will not
    fail. if the apple script doesn't return answer within the timeout, it will be terminated
    :param cmd: apple script
    :param timeout: timeout to end the apple script process
    :return: apple script result if exist
    """
    def _run_apple_script_in_another_process(cmd, stdout_queue, stderr_queue):
        apple_script_process = subprocess.Popen(['osascript'],
                                                shell=True,
                                                stdin=subprocess.PIPE,
                                                stdout=subprocess.PIPE,
                                                stderr=subprocess.PIPE,
                                                text=True)
        p_stdout, p_stderr = apple_script_process.communicate(cmd)
        if p_stdout:
            stdout_queue.put(p_stdout)
        if p_stderr:
            stderr_queue.put(p_stderr)

    # logger.debug('Going to run the apple script: {}'.format(cmd))
    stdout_queue_obj = multiprocess.Queue()
    stderr_queue_obj = multiprocess.Queue()
    p = multiprocess.Process(target=_run_apple_script_in_another_process,
                             args=(cmd, stdout_queue_obj, stderr_queue_obj))
    p.start()
    p.join(timeout=timeout)
    if p.is_alive():
        logger.error(
            'The process that runs the apple script was terminated after reaching the timeout'
        )
        p.terminate()
    if not stderr_queue_obj.empty(
    ):  # if stderr, log the error and return None
        logger.error(stderr_queue_obj.get())
        return
    if not stdout_queue_obj.empty():
        stdout = stdout_queue_obj.get()
        # logger.debug('Apple script result is: {}'.format(stdout))
        return stdout
Example #15
0
def test():
    manager = processing.Manager()
    
    gc.disable()
    
    print('\n\t######## testing Queue.Queue\n')
    test_queuespeed(threading.Thread, Queue.Queue(),
                    threading.Condition())
    print('\n\t######## testing processing.Queue\n')
    test_queuespeed(processing.Process, processing.Queue(),
                    processing.Condition())
    print('\n\t######## testing Queue managed by server process\n')
    test_queuespeed(processing.Process, manager.Queue(),
                    manager.Condition())
    print('\n\t######## testing processing.Pipe\n')
    test_pipespeed()
    
    print
    
    print('\n\t######## testing list\n')
    test_seqspeed(range(10))
    print('\n\t######## testing list managed by server process\n')
    test_seqspeed(manager.list(range(10)))
    print('\n\t######## testing Array("i", ..., lock=False)\n')
    test_seqspeed(processing.Array('i', range(10), lock=False))
    print('\n\t######## testing Array("i", ..., lock=True)\n')
    test_seqspeed(processing.Array('i', range(10), lock=True))

    print()

    print('\n\t######## testing threading.Lock\n')
    test_lockspeed(threading.Lock())
    print('\n\t######## testing threading.RLock\n')
    test_lockspeed(threading.RLock())
    print('\n\t######## testing processing.Lock\n')
    test_lockspeed(processing.Lock())
    print('\n\t######## testing processing.RLock\n')
    test_lockspeed(processing.RLock())
    print('\n\t######## testing lock managed by server process\n')
    test_lockspeed(manager.Lock())
    print('\n\t######## testing rlock managed by server process\n')
    test_lockspeed(manager.RLock())

    print()

    print('\n\t######## testing threading.Condition\n')
    test_conditionspeed(threading.Thread, threading.Condition())
    print('\n\t######## testing processing.Condition\n')
    test_conditionspeed(processing.Process, processing.Condition())
    print('\n\t######## testing condition managed by a server process\n')
    test_conditionspeed(processing.Process, manager.Condition())

    gc.enable()
Example #16
0
 def __init__(self, measureFlowchart, numberProc=1):
     """
     Object for parallel processing and preprocessing of image frames
     """
     # Flowchart object, queues and processes
     self.measureFlowchart = measureFlowchart
     self.input_queue = multiprocessing.JoinableQueue(1)
     self.output_queue = multiprocessing.Queue()
     self.numberProc = numberProc
     self.processes = [
         ProcessQueue(self.input_queue, self.output_queue)
         for _ in range(self.numberProc)
     ]
Example #17
0
 def __init__(self, data_queue, nr_producer, start=0):
     """
     Args:
         data_queue(mp.Queue): a queue which contains datapoints.
         nr_producer(int): number of producer processes. This process will
             terminate after receiving this many of :class:`DIE` sentinel.
         start(int): the rank of the first object
     """
     super(OrderedResultGatherProc, self).__init__()
     self.data_queue = data_queue
     self.ordered_container = OrderedContainer(start=start)
     self.result_queue = mp.Queue()
     self.nr_producer = nr_producer
    def __init__(self, logger=None, fmt=None, level=None):
        """ Current logging instance or None - alternative format and level to be used within the bounded context.

            Redirect log requests to an multi-proc queue and a listener that
            redirects the request to handers bound to the input logger instance.

        """
        self.__handlerInitialList = []
        self.__handlerWrappedList = []
        #
        self.logger = logger if logger else logging.getLogger()
        #
        self.__loggingQueue = multiprocessing.Queue(-1)
        self.__ql = None
        self.__altFmt = logging.Formatter(fmt) if fmt else None
        self.__altLevel = level if level else None
Example #19
0
def test_queue():
    q = processing.Queue()

    p = processing.Process(target=queue_func, args=(q, ))
    p.start()

    o = None
    while o != 'STOP':
        try:
            o = q.get(timeout=0.3)
            print(o, end=' ')
            sys.stdout.flush()
        except Empty:
            print('TIMEOUT')

    print()
Example #20
0
    def __init__(self, env_id, make_env, reward_predictor, num_workers, max_timesteps_per_episode, seed):
        self.num_workers = num_workers
        self.predictor = reward_predictor

        self.tasks_q = multiprocess.JoinableQueue()
        self.results_q = multiprocess.Queue()

        self.actors = []
        for i in range(self.num_workers):
            new_seed = seed * 1000 + i  # Give each actor a uniquely seeded env
            self.actors.append(Actor(self.tasks_q, self.results_q, env_id, make_env, new_seed, max_timesteps_per_episode))

        for a in self.actors:
            a.start()

        # we will start by running 20,000 / 1000 = 20 episodes for the first iteration  TODO OLD
        self.average_timesteps_in_episode = 1000
Example #21
0
    def generate(self: object) -> None:
        print("Beginning data generation...")
        start_seconds = time.time()
        queue = multiprocess.Queue()
        w = multiprocess.Process(target=self.write, args=(queue, "STOP"))
        jobs = []

        for i in range(0, 5):
            p = multiprocess.Process(target=self.gen, args=(queue, ))
            jobs.append(p)
            p.start()

        w.start()
        for i, item in enumerate(jobs):
            item.join()
        queue.put("STOP")
        w.join()
        elapsed_time = (time.time() - start_seconds) / 60
        print("Generation completed. Elapsed time: ",
              "{0:.2f}".format(elapsed_time), " minutes")
Example #22
0
        def process_proxy(f_, *args0):
            import multiprocess as mp

            def f1(f, q, *args):
                try:
                    r = f(*args)
                    q.put(r)
                    q.close()
                except Exception as e:
                    import traceback
                    q.put((e, traceback.format_exc()))

            q = mp.Queue()
            p = mp.Process(target=f1, args=(f_, q) + args0)
            p.start()
            rv = q.get()
            q.close()
            p.terminate()
            p.join()
            q.join_thread()
            return rv
Example #23
0
    def start_pomodoro_timer(self):
        # Save the remaining length of the pomodoro inside a queue that can be accessed via the multiprocess
        current_length = multiprocess.Queue()

        # Define a asynchronous multiprocess for the timer
        pomodoro_process = multiprocess.Process(target=countdown, args=(self.remaining_length,
                                                                        current_length,
                                                                        self.sound_file))
        pomodoro_process.daemon = True
        pomodoro_process.start()

        while pomodoro_process.is_alive():
            # While the timer is running in the background, listen to user input
            input('Press ENTER to pause\n')

            # If the user has pressed ENTER, get the remaining length and kill the current timer process
            self.remaining_length = current_length.get()
            pomodoro_process.terminate()

            # Check if the timer has finished (reached 0 seconds)
            if self.remaining_length < 1:
                break

            # Monitor for how long the user has paused the timer
            pause_start_time = time.time()

            # Wait for the user input
            input('Press ENTER to resume\n')

            # Print for how long the timer was paused
            pause_end_time = time.time() - pause_start_time
            pause_time_format = '{:02d}:{:02d}'.format(int(pause_end_time / 60), int(pause_end_time % 60))
            print('Total pause time: ' + pause_time_format + '\n')

            """
            Start the pomodoro timer again, but counting down from 
            the remaining time we got before killing the multiprocess
            """
            self.start_pomodoro_timer()
Example #24
0
def main():
    # Some tests
    import logging
    import logging.handlers
    import multiprocess as mp

    # Set up loggingserver
    log_file = '~/mplogger.log'
    status_queue = mp.Queue()
    lserver_process = mp.Process(target=loggingserver,
                                 args=(log_file, status_queue))
    lserver_process.daemon = True
    lserver_process.start()
    server_address = status_queue.get()

    # Connect main process to logging server
    rootLogger = logging.getLogger('')
    rootLogger.setLevel(logging.DEBUG)
    socketHandler = ClientSocketHandler(*server_address)
    rootLogger.addHandler(socketHandler)

    # Send some sample logs
    logging.info('Test1')
    logging.error('Test2')
    logging.critical('Test3')
    logging.debug('Test4')
    logging.warning('Test5')

    logger1 = logging.getLogger('test1')
    logger2 = logging.getLogger('test2')
    logger1.info('asdfasdfsa')
    logger2.info('1234567890')

    # Close the logging server
    status_queue.put('DIE')
    lserver_process.join()
    print("Server closed, exiting...")
def run_safely(f, x):
    """Runs f(args) in a separate process."""

    # f_global = f # globalize(f)
    # mp.freeze_support()
    mp.set_start_method("spawn")
    q = mp.Queue()
    p = Process(target=with_queue, args=(f, q, x))
    p.start()
    p.join()

    if p.exception:
        error, traceback = p.exception
        print(traceback)
        raise error

    try:
        out = q.get(False, 2.0)  # Non-blocking mode
    except queue.Empty:
        print("Empty queue!")
        print("Exit code: ", p.exitcode)
        raise MemoryError()

    return out
Example #26
0
    def __init__(self, thread=None, *args, **kwargs):
        super(IProcess, self).__init__(*args, **kwargs)
        self._thread = thread or 1
        self._pool = multiprocessing.Queue(self._thread)
        self._threads = []
        self._requests = []
        self._states = {}
        for i in range(self._thread):
            request, response = multiprocessing.Queue(), multiprocessing.Queue(
            )
            t = threading.Thread(target=self.loop,
                                 args=(request, response),
                                 name='request-%s' % i)
            self._threads.append(t)
            self._requests.append((request, response))
            self._pool.put(i)
            self._states[t.name] = None

        self._reader_mutex = multiprocessing.Lock()
        self._reader = multiprocessing.Queue(), multiprocessing.Queue()
        self._threads.append(
            threading.Thread(target=self.loop,
                             args=self._reader,
                             name='reader'))
        self._states['reader'] = None

        self._collection = multiprocessing.Queue()
        self._received = multiprocessing.Event()
        self._properties = {}
        for key in dir(self.__class__):
            value = getattr(self.__class__, key, None)
            value = getattr(value, '__doc__', None)
            if str(value).startswith('child_property.'):
                self._properties[key] = None
            if str(value).startswith('child_timer.'):
                delta = int(value.split('.')[-1])
                self._threads.append(
                    threading.Thread(target=self.timentry,
                                     args=(key, delta),
                                     name='timer.%s' % key))
        self._wrap_run()
Example #27
0
def fetchseq(ids,
             species,
             write=False,
             output_name='',
             delim='\t',
             id_type='brute',
             server=None,
             source="SQL",
             database="bioseqdb",
             database_path=None,
             host='localhost',
             driver='psycopg2',
             version='1.0',
             user='******',
             passwd='',
             email='',
             batch_size=50,
             output_type="fasta",
             verbose=1,
             n_threads=1,
             n_subthreads=1,
             add_length=(0, 0),
             indent=0):
    if isgenerator(ids):
        if verbose > 1:
            print('Received generator!', indent=indent)
    elif isinstance(ids, list):
        if verbose > 1:
            print('Received list!', indent=indent)
    else:
        if verbose > 1:
            print('Reading ID File... ', indent=indent)
        with ids.open('w') as in_handle:
            id_prelist = [line.strip() for line in in_handle
                          ]  # list of each line in the file
            print('Done!', indent=indent)
        ids = [id_item for id_item in filter(None, id_prelist) if id_item]
        if not id_prelist or id_prelist is None:
            if verbose:
                print('id_prelist is empty!', indent=indent)
            return 'None'
    for id_item in ids:
        assert len(id_item) == 12, (
            "Item {0} in id_list has {1} items, not 5!\n"
            "Format should be: "
            "chr, (start,end), id, score, strand, thickStart, thickEnd, rgb, blockcount,"
            " blockspans, blockstarts, query_span"
            "!").format(
                " ".join((" ".join(item) if not isinstance(item, str) else item
                          for item in id_item)), len(id_item))
    if verbose > 1:
        print('Readied ids!', indent=indent)

    id_list = multiprocessing.JoinableQueue()
    results = multiprocessing.Queue()

    if 'sql' in source.lower():
        if server is None:
            try:
                if verbose > 1:
                    print('No server received, opening server...',
                          indent=indent)
                server = BioSeqDatabase.open_database(driver=driver,
                                                      user=user,
                                                      passwd=passwd,
                                                      host=host,
                                                      database=database)
                if verbose > 1:
                    print('Done!', indent=indent)
            except Exception as err:
                if verbose > 1:
                    print('Failed to open server!', indent=indent)
                    print(str(type(err)), err, sep=' ', indent=indent)
                raise
        else:
            if verbose > 1:
                print('Received server handle:', indent=indent)
                print(server, indent=indent)
            if verbose > 2:
                print('Please note the sub_databases of server:\n\t',
                      [str(i) for i in server.keys()],
                      indent=indent)
    elif source.lower() in ['fasta', '2bit', 'twobit']:
        print('Search type: ', source, indent=indent)
    else:
        raise SearchEngineNotImplementedError(
            'Search using source {} has not yet been implemented!'.format(
                source))
    if verbose > 1:
        print('Creating FecSeq Processes...', indent=indent)
    fs_instances = [
        FetchSeqMP(id_queue=id_list,
                   seq_out_queue=results,
                   delim=delim,
                   id_type=id_type,
                   server=server,
                   species=species,
                   source=source,
                   database=database,
                   database_path=database_path,
                   host=host,
                   driver=driver,
                   version=version,
                   user=user,
                   passwd=passwd,
                   email=email,
                   output_type=output_type,
                   batch_size=batch_size,
                   verbose=verbose,
                   n_subthreads=n_subthreads,
                   add_length=add_length,
                   indent=indent + 1) for _ in range(n_threads)
    ]
    if verbose > 1:
        print('Done! Starting processes...', indent=indent)
    for fs in fs_instances:
        fs.start()
    if verbose > 1:
        print('Done!', indent=indent)
        print('Assigning FetchSeq records to queue... ', indent=indent)
    id_order = []
    for i, id_rec in enumerate(ids):
        try:
            id_order.append("{0}:{1}-{2}".format(id_rec[0], id_rec[1][0],
                                                 id_rec[1][1]))
        except IndexError:
            id_order.append("{0}".format(id_rec[0]))
        try:
            id_list.put(FetchSeq(id_rec=id_rec))
        except AssertionError as err:
            print(i, type(err), err, sep=' ')
            break
    for _ in fs_instances:
        id_list.put(None)
    if verbose > 1:
        print('Done!', indent=indent)
    output_dict = dict()
    missing_items_list = list()
    if verbose > 1:
        print('Getting sequences from processes... ', indent=indent)
    n_jobs = len(ids)
    while n_jobs:
        seq, missing = results.get()
        output_dict[seq[0]] = seq[1]
        missing_items_list.append(missing)
        n_jobs -= 1
    if verbose > 1:
        print('Done! Finished fetching sequences!', indent=indent)
        print('Closing processes!', indent=indent)
    for fs in fs_instances:
        if fs.is_alive():
            fs.join()
    output_list = [output_dict[i] for i in id_order if i in output_dict]
    if write:
        SeqIO.write(output_list, output_name, output_type)
        return
    else:
        if missing_items_list == [None]:
            missing_items_list = None
        return output_list, missing_items_list
Example #28
0
    def __init__(self,
                 num_of_processor: int,
                 mapper: Callable,
                 max_size_per_mapper_queue: int = 0,
                 collector: Callable = None,
                 max_size_per_collector_queue: int = 0,
                 enable_process_id: bool = False,
                 batch_size: int = 1,
                 progress=None,
                 progress_total=None,
                 use_shm=False,
                 enable_collector_queues=True,
                 single_mapper_queue: bool = False):
        self.num_of_processor = num_of_processor
        self.single_mapper_queue = single_mapper_queue
        if sys.version_info >= (3, 8):
            self.collector_queues: typing.Optional[typing.Union[ShmQueue,
                                                                mp.Queue]]
        else:
            self.collector_queues: typing.Optional[mp.Queue]
        if use_shm:
            if sys.version_info >= (3, 8):
                if single_mapper_queue:
                    self.mapper_queues = [
                        ShmQueue(maxsize=max_size_per_mapper_queue *
                                 num_of_processor)
                    ]
                else:
                    self.mapper_queues = [
                        ShmQueue(maxsize=max_size_per_mapper_queue)
                        for _ in range(num_of_processor)
                    ]
                if enable_collector_queues:
                    self.collector_queues = [
                        ShmQueue(maxsize=max_size_per_collector_queue)
                        for _ in range(num_of_processor)
                    ]
                else:
                    self.collector_queues = None
            else:
                raise ValueError(
                    "shm not available in this version of Python.")
        else:
            if single_mapper_queue:
                self.mapper_queues = [
                    mp.Queue(maxsize=max_size_per_mapper_queue *
                             num_of_processor)
                ]
            else:
                self.mapper_queues = [
                    mp.Queue(maxsize=max_size_per_mapper_queue)
                    for _ in range(num_of_processor)
                ]
            if enable_collector_queues:
                self.collector_queues = [
                    mp.Queue(maxsize=max_size_per_collector_queue)
                    for _ in range(num_of_processor)
                ]
                self.collector_qstats = [
                    self.QSTATS_ON for _ in range(num_of_processor)
                ]
            else:
                self.collector_queues = None

        if self.collector_queues is not None:
            if single_mapper_queue:
                self.processes = [
                    mp.Process(target=self._run,
                               args=(i, self.mapper_queues[0],
                                     self.collector_queues[i]))
                    for i in range(num_of_processor)
                ]
            else:
                self.processes = [
                    mp.Process(target=self._run,
                               args=(i, self.mapper_queues[i],
                                     self.collector_queues[i]))
                    for i in range(num_of_processor)
                ]
        else:
            if single_mapper_queue:
                self.processes = [
                    mp.Process(target=self._run,
                               args=(i, self.mapper_queues[0], None))
                    for i in range(num_of_processor)
                ]
            else:
                self.processes = [
                    mp.Process(target=self._run,
                               args=(i, self.mapper_queues[i], None))
                    for i in range(num_of_processor)
                ]
        if progress is not None:
            if sys.version_info >= (3, 8):
                self.progress_queues: typing.Optional[typing.Union[ShmQueue,
                                                                   mp.Queue]]
            else:
                self.progress_queues: typing.Optional[mp.Queue]
            if use_shm:
                if sys.version_info >= (3, 8):
                    self.progress_queues = [
                        ShmQueue(maxsize=1) for _ in range(num_of_processor)
                    ]
                else:
                    raise ValueError(
                        "shm not available in this version of Python.")
            else:
                self.progress_queues = [
                    mp.Queue(maxsize=1) for _ in range(num_of_processor)
                ]
            self.progress_qstats = [
                self.QSTATS_ON for _ in range(num_of_processor)
            ]
        else:
            self.progress_queues = None
        self.progress = progress

        ctx = self
        if not inspect.isclass(mapper) or not issubclass(mapper, Mapper):

            class DefaultMapper(Mapper):
                def process(self, *args, **kwargs):
                    if ctx.enable_process_id:
                        kwargs['_idx'] = self._idx
                    return mapper(*args, **kwargs)

            self.mapper = DefaultMapper
        else:
            self.mapper = mapper

        self.collector = collector
        self.mapper_queue_index = 0
        self.enable_process_id = enable_process_id
        self.batch_size = batch_size
        self.batch_data = []

        # collector can be handled in each process or in main process after merging (collector needs to be set)
        # if collector is set, it needs to be handled in main process;
        # otherwise, it assumes there's no collector.
        if collector:
            self.collector_thread = CollectorThread(self, collector)

        if progress:
            self.progress_thread = ProgressThread(self, progress,
                                                  progress_total,
                                                  num_of_processor)
Example #29
0
    def filter(self, filter_obj, squash=True, num_procs=mp.cpu_count()):
        """Filter the dataframe using a user-supplied function.

        Note: Operates in parallel on user-supplied lambda functions.

        Arguments:
            filter_obj (callable, list, or QueryMatcher): the filter to apply to the GraphFrame.
            squash (boolean, optional): if True, automatically call squash for the user.
        """
        dataframe_copy = self.dataframe.copy()

        index_names = self.dataframe.index.names
        dataframe_copy.reset_index(inplace=True)

        filtered_df = None

        if callable(filter_obj):
            # applying pandas filter using the callable function
            if num_procs > 1:
                # perform filter in parallel (default)
                queue = mp.Queue()
                processes = []
                returned_frames = []
                subframes = np.array_split(dataframe_copy, num_procs)

                # Manually create a number of processes equal to the number of
                # logical cpus available
                for pid in range(num_procs):
                    process = mp.Process(
                        target=parallel_apply,
                        args=(filter_obj, subframes[pid], queue),
                    )
                    process.start()
                    processes.append(process)

                # Stores filtered subframes in a list: 'returned_frames', for
                # pandas concatenation. This intermediary list is used because
                # pandas concat is faster when called only once on a list of
                # dataframes, than when called multiple times appending onto a
                # frame of increasing size.
                for pid in range(num_procs):
                    returned_frames.append(queue.get())

                for proc in processes:
                    proc.join()

                filtered_df = pd.concat(returned_frames)

            else:
                # perform filter sequentiually if num_procs = 1
                filtered_rows = dataframe_copy.apply(filter_obj, axis=1)
                filtered_df = dataframe_copy[filtered_rows]

        elif isinstance(filter_obj, list) or isinstance(
                filter_obj, QueryMatcher):
            # use a callpath query to apply the filter
            query = filter_obj
            if isinstance(filter_obj, list):
                query = QueryMatcher(filter_obj)
            query_matches = query.apply(self)
            match_set = list(set().union(*query_matches))
            filtered_df = dataframe_copy.loc[dataframe_copy["node"].isin(
                match_set)]
        else:
            raise InvalidFilter(
                "The argument passed to filter must be a callable, a query path list, or a QueryMatcher object."
            )

        if filtered_df.shape[0] == 0:
            raise EmptyFilter(
                "The provided filter would have produced an empty GraphFrame.")

        filtered_df.set_index(index_names, inplace=True)

        filtered_gf = GraphFrame(self.graph, filtered_df)
        filtered_gf.exc_metrics = self.exc_metrics
        filtered_gf.inc_metrics = self.inc_metrics

        if squash:
            return filtered_gf.squash()
        return filtered_gf
Example #30
0
def _drain_queue(sock_queue):
    """ Ensures queue is empty before closing """
    time.sleep(3)  # TODO: the socket needs a better way of closing
    while not sock_queue.empty():
        obj = sock_queue.get()
        _handleLogRecord(obj)


if __name__ == '__main__':
    # Some tests
    import logging, logging.handlers
    import multiprocess as mp

    # Set up loggingserver
    log_file = '~/mplogger.log'
    status_queue = mp.Queue()
    loggingserver = mp.Process(target=loggingserver,
                               args=(log_file, status_queue))
    loggingserver.daemon = True
    loggingserver.start()
    server_address = status_queue.get()

    # Connect main process to logging server
    rootLogger = logging.getLogger('')
    rootLogger.setLevel(logging.DEBUG)
    socketHandler = ClientSocketHandler(*server_address)
    rootLogger.addHandler(socketHandler)

    # Send some sample logs
    logging.info('Test1')
    logging.error('Test2')