Beispiel #1
0
def run_step(step, context):
    """Run a given step.

    1. Starts a process to run the next step.
    2. Creates a queue to communicate with the process.
    3. Changes the state of the Step to Step.RUN.
    """
    log_step(logging.debug, step, 'Preparing objects to run.')

    step.prompt_queue = ProcessQueue()
    step.input_queue = ProcessQueue()
    step.output_queue = ProcessQueue()
    step.result_queue = ProcessQueue()

    # Reset some attributes in-case the Step is being re-run.
    # Older values present can be confusing to the user, so remove them.
    step.prompt_messages = []
    step.input_messages = []
    step.return_value = None

    trail_environment = TrailEnvironment(step.prompt_queue, step.input_queue,
                                         step.output_queue)
    step.process = Process(target=step_manager,
                           args=(step, trail_environment, context))
    log_step(logging.debug, step, 'Starting subprocess to run step.')
    step.process.start()
    step.state = step.RUN
Beispiel #2
0
    def __init__(self, lane_departure_callback):
        self.lane_departure_callback = lane_departure_callback
        self.running = False
        self.out_queue = ProcessQueue()
        self.in_queue = ProcessQueue()
        self.terminate_event = ProcessEvent()
        self.lane_process = Process(target=self.LaneProcess)
        self.update_thread = threading.Thread(target=self.update_thread)
        self.running = True

        self.lane_process.start()
        self.update_thread.start()
Beispiel #3
0
    def __init__(self, total, index):
        """

        :param total: 总worker数量
        :param index: 当前worker index
        """
        self.queue = ProcessQueue(self.QUEUE_LEN)
        self.process = Process(target=self.run_forever, args=())
        self.total, self.index = total, index
Beispiel #4
0
def parallel_mode(settings):
    num_workers = settings["num_workers"]
    print("Initializing %d workers" % (num_workers, ))

    to_ps = ProcessQueue()
    from_ps = [ProcessQueue() for _ in range(num_workers)]

    ps = Process(target=parameter_server_process,
                 args=(savedir, settings, to_ps, from_ps))

    workers = []
    for i in range(num_workers):
        workers.append(
            Process(target=worker_process, args=(settings, to_ps, from_ps[i])))

    ps.start()
    for worker in workers:
        worker.start()
Beispiel #5
0
 def _gen_processes(self, simulator_class, configuration, generation_count):
     queues = [ProcessQueue() for _ in range(len(self._simulation_states))]
     simulators = [
         Process(target=_run_simulation,
                 args=(queue, simulator_class, configuration, state,
                       generation_count, i))
         for i, (queue,
                 state) in enumerate(zip(queues, self._simulation_states))
     ]
     return queues, simulators
Beispiel #6
0
def create_stream_listener(stream):
    """Runs listeners to tail STDOUT and STDERR.

    When the shell command is run with Popen, we need a way to asynchronously and non-blockingly read STDOUT and STDERR.

    This is achieved by running the stream_reader function as a subprocess.
    Each such instance is called a listener. This function creates and runs such listeners:

    Arguments:
    stream -- A stream to read from. Like subprocess.PIPE. Must support readline() method.

    Returns:
    A tuple of the form: (listener_process, queue)
    Where:
    listener_process -- A multiprocessing.Process object referring to the listener subprocess. This is needed to
                        terminate the listener since the listener contains no termination logic.
    queue            -- A multiprocessing.Queue object into which the listener will be writing messages from the stream
                        to. This conversion from a stream like object to a queue like object allows one to read in a
                        non-blocking manner.
    """
    queue = ProcessQueue()
    listener_process = Process(target=stream_reader, args=(stream, queue))
    listener_process.start()
    return (listener_process, queue)
Beispiel #7
0
def read_cache(cache,
               channel,
               start=None,
               end=None,
               resample=None,
               nproc=1,
               **kwargs):
    """Read a `TimeSeries` from a cache of data files using
    multiprocessing.

    The inner-workings are agnostic of data-type, but can only handle a
    single data type at a time.

    Parameters
    ----------
    cache : :class:`glue.lal.Cache`, `str`
        cache of GWF frame files, or path to a LAL-format cache file
        on disk
    channel : :class:`~gwpy.detector.channel.Channel`, `str`
        data channel to read from frames
    start : `Time`, :lalsuite:`LIGOTimeGPS`, optional
        start GPS time of desired data
    end : `Time`, :lalsuite:`LIGOTimeGPS`, optional
        end GPS time of desired data
    resample : `float`, optional
        rate (samples per second) to resample
    format : `str`, optional
        name of data file format, e.g. ``gwf`` or ``hdf``.
    nproc : `int`, default: ``1``
        maximum number of independent frame reading processes, default
        is set to single-process file reading.

    Notes
    -----
    The number of independent processes spawned by this function can be
    calculated as ``min(maxprocesses, len(cache)//minprocesssize)``.

    Returns
    -------
    data : :class:`~gwpy.timeseries.core.TimeSeries`
        a new `TimeSeries` containing the data read from disk
    """
    cls = kwargs.pop('target', TimeSeries)
    # open cache from file if given
    if isinstance(cache, (unicode, str, file)):
        cache = open_cache(cache)

    # fudge empty cache
    if len(cache) == 0:
        return cls([], channel=channel, epoch=start)

    # use cache to get start end times
    cache.sort(key=lambda ce: ce.segment[0])
    if start is None:
        start = cache[0].segment[0]
    if end is None:
        end = cache[-1].segment[1]

    # get span
    span = Segment(start, end)
    if cls not in (StateVector, StateVectorDict) and resample:
        cache = cache.sieve(segment=span.protract(8))
    else:
        cache = cache.sieve(segment=span)
    cspan = Segment(cache[0].segment[0], cache[-1].segment[1])

    # if reading one channel, try to use lalframe, its faster
    if (isinstance(channel, str)
            or (isinstance(channel, (list, tuple)) and len(channel) == 1)):
        try:
            from lalframe import frread
        except ImportError:
            format_ = 'gwf'
        else:
            kwargs.pop('type', None)
            format_ = 'lalframe'
    # otherwise use the file extension as the format
    else:
        format_ = os.path.splitext(cache[0].path)[1][1:]

    # force one frame per process minimum
    nproc = min(nproc, len(cache))

    # single-process
    if nproc <= 1:
        return cls.read(cache,
                        channel,
                        format=format_,
                        start=start,
                        end=end,
                        resample=resample,
                        **kwargs)

    # define how to read each frame
    def _read(q, pstart, pend):
        # don't go beyond the requested limits
        pstart = float(max(start, pstart))
        pend = float(min(end, pend))
        # if resampling TimeSeries, pad by 8 seconds inside cache limits
        if cls not in (StateVector, StateVectorDict) and resample:
            cstart = float(max(cspan[0], pstart - 8))
            subcache = cache.sieve(segment=Segment(cstart, pend))
            out = cls.read(subcache,
                           channel,
                           format=format_,
                           start=cstart,
                           end=pend,
                           resample=None,
                           **kwargs)
            out = out.resample(resample)
            q.put(out.crop(pstart, pend))
        else:
            subcache = cache.sieve(segment=Segment(pstart, pend))
            q.put(
                cls.read(subcache,
                         channel,
                         format=format_,
                         start=pstart,
                         end=pend,
                         resample=resample,
                         **kwargs))

    # separate cache into parts
    fperproc = int(ceil(len(cache) / nproc))
    subcaches = [
        Cache(cache[i:i + fperproc]) for i in range(0, len(cache), fperproc)
    ]
    subsegments = SegmentList(
        [Segment(c[0].segment[0], c[-1].segment[1]) for c in subcaches])

    # start all processes
    queue = ProcessQueue(nproc)
    proclist = []
    for subseg in subsegments:
        process = Process(target=_read, args=(queue, subseg[0], subseg[1]))
        process.daemon = True
        proclist.append(process)
        process.start()

    # get data and block
    data = [queue.get() for p in proclist]
    for process in proclist:
        process.join()

    # format and return
    if issubclass(cls, dict):
        try:
            data.sort(key=lambda tsd: tsd.values()[0].epoch.gps)
        except IndexError:
            pass
        out = cls()
        while len(data):
            tsd = data.pop(0)
            out.append(tsd)
            del tsd
        return out
    else:
        out = TimeSeriesList(*data)
        out.sort(key=lambda ts: ts.epoch.gps)
        ts = out.join()
        return ts
Beispiel #8
0
 def __init__(self):
     super(DistributorProcess, self).__init__(ProcessQueue(),
                                              ProcessEvent())
Beispiel #9
0
def from_timeseries(ts1, ts2, stride, fftlength=None, fftstride=None,
                    window=None, nproc=1, **kwargs):
    """Calculate the coherence `Spectrogram` between two `TimeSeries`.

    Parameters
    ----------
    timeseries : :class:`~gwpy.timeseries.core.TimeSeries`
        input time-series to process.
    stride : `float`
        number of seconds in single PSD (column of spectrogram).
    fftlength : `float`
        number of seconds in single FFT.
    fftstride : `int`, optiona, default: fftlength
        number of seconds between FFTs.
    window : `timeseries.window.Window`, optional, default: `None`
        window function to apply to timeseries prior to FFT.
    nproc : `int`, default: ``1``
        maximum number of independent frame reading processes, default
        is set to single-process file reading.

    Returns
    -------
    spectrogram : :class:`~gwpy.spectrogram.core.Spectrogram`
        time-frequency power spectrogram as generated from the
        input time-series.
    """
    # format FFT parameters
    if fftlength is None:
        fftlength = stride
    if fftstride is None:
        fftstride = fftlength

    sampling = min(ts1.sample_rate.value, ts2.sample_rate.value)

    # get size of spectrogram
    nFFT = int(fftlength * sampling)
    nsteps = int(ts1.size // (stride * ts1.sample_rate.value))
    nproc = min(nsteps, nproc)

    # single-process return
    if nsteps == 0 or nproc == 1:
        return _from_timeseries(ts1, ts2, stride, fftlength=fftlength,
                                fftstride=fftstride, window=window, **kwargs)

    # wrap spectrogram generator
    def _specgram(q, ts):
        try:
            q.put(_from_timeseries(ts, ts2, stride, fftlength=fftlength,
                                   fftstride=fftstride, window=window,
                                   **kwargs))
        except Exception as e:
            q.put(e)

    # otherwise build process list
    stepperproc = int(ceil(nsteps / nproc))
    nsamp = [stepperproc * ts.sample_rate.value * stride for ts in (ts1, ts2)]

    queue = ProcessQueue(nproc)
    processlist = []
    for i in range(nproc):
        process = Process(target=_specgram,
                          args=(queue, ts1[i * nsamp[0]:(i + 1) * nsamp[0]],
                                ts2[i * nsamp[1]:(i + 1) * nsamp[1]]))
        process.daemon = True
        processlist.append(process)
        process.start()
        if ((i + 1) * nsamp[0]) >= ts1.size:
            break

    # get data
    data = []
    for process in processlist:
        result = queue.get()
        if isinstance(result, Exception):
            raise result
        else:
            data.append(result)

    # and block
    for process in processlist:
        process.join()

    # format and return
    out = SpectrogramList(*data)
    out.sort(key=lambda spec: spec.epoch.gps)
    return out.join()
Beispiel #10
0
def search_parallel(username, password, client_matter, q, num_workers=15):
    '''
	Download a list of dockets in parallel by launching many processes.
	
	docket_list:		A list of (court, docket) tuples
	num_workers:		How many parallel processes to start
	'''
    login_token = call(call="login",
                       method="POST",
                       username=username,
                       password=password)['login_token']
    first_page = call(call="search",
                      method="GET",
                      q=q,
                      login_token=login_token,
                      client_matter=client_matter)

    num_first_page = len(first_page['search_results'])

    num_results = first_page['count']
    # The main thread removes them from searchqueue and puts them into a list.
    results = [None] * num_results
    results[:num_first_page] = first_page['search_results']
    logging.info("Downloading %s Results, already got first %d" %
                 (num_results, num_first_page))

    # Put all of the search ranges into the result queue
    dlqueue = ProcessQueue()
    NUM_AT_ONCE = 20
    for i in xrange(num_first_page, num_results, NUM_AT_ONCE):
        limit = min(num_results, i + NUM_AT_ONCE) - i
        logging.info("Added: %s --> %s" % (i, i + limit))
        dlqueue.put((i, limit))

    # The processes will put their results into the searchqueue
    searchqueue = ProcessQueue()
    # Start up the parallel processes
    pool = MultiProcessPool(
        processes=num_workers,
        initializer=_search_worker,
        initargs=[username, password, client_matter, q, dlqueue, searchqueue])
    try:
        # Continue until the processing queue is empty.
        while True:
            # It takes about 15 seconds to download a docket, so wait that long.
            time.sleep(2.0 / num_workers)
            got = 0
            try:
                item = searchqueue.get_nowait()
                start, end = item['offset'], item['offset'] + item['limit']
                results[start:end] = item['result']['search_results']
                logging.info("Downloaded: %s --> %s (of %d total)" %
                             (start, end, num_results))
                got += 1
            except Empty:
                left = len(results) - len(filter(None, results))
                if left <= 0:
                    break
                logging.info("Got %d, %d results. Waiting for %d more." %
                             (got, len(results), left))
                continue
            except Exception as e:
                logging.info("Main thread loop exception: %s" % e)
                break

    except KeyboardInterrupt as e:
        logging.info("Main thread exception: %s" % e)
        dlqueue.close()
        searchqueue.close()
        pool.close()
        pool.terminate()
        # Return what we have even if there was an exception.
        return results

    for i, r in enumerate(results):
        if not r:
            print("Missing Result %s" % (i + 1))

    return {
        'search_results': results,
        'count': num_results,
    }
Beispiel #11
0
def read_cache(cache,
               channel,
               start=None,
               end=None,
               resample=None,
               gap=None,
               pad=None,
               nproc=1,
               format=None,
               **kwargs):
    """Read a `TimeSeries` from a cache of data files using
    multiprocessing.

    The inner-workings are agnostic of data-type, but can only handle a
    single data type at a time.

    Parameters
    ----------
    cache : :class:`glue.lal.Cache`, `str`
        cache of GWF frame files, or path to a LAL-format cache file
        on disk
    channel : :class:`~gwpy.detector.channel.Channel`, `str`
        data channel to read from frames
    start : `Time`, `~gwpy.time.LIGOTimeGPS`, optional
        start GPS time of desired data
    end : `Time`, `~gwpy.time.LIGOTimeGPS`, optional
        end GPS time of desired data
    resample : `float`, optional
        rate (samples per second) to resample
    format : `str`, optional
        name of data file format, e.g. ``gwf`` or ``hdf``.
    nproc : `int`, default: ``1``
        maximum number of independent frame reading processes, default
        is set to single-process file reading.
    gap : `str`, optional
        how to handle gaps in the cache, one of

        - 'ignore': do nothing, let the undelying reader method handle it
        - 'warn': do nothing except print a warning to the screen
        - 'raise': raise an exception upon finding a gap (default)
        - 'pad': insert a value to fill the gaps

    pad : `float`, optional
        value with which to fill gaps in the source data, only used if
        gap is not given, or `gap='pad'` is given

    Notes
    -----
    The number of independent processes spawned by this function can be
    calculated as ``min(maxprocesses, len(cache)//minprocesssize)``.

    Returns
    -------
    data : :class:`~gwpy.timeseries.TimeSeries`
        a new `TimeSeries` containing the data read from disk
    """
    from gwpy.segments import (Segment, SegmentList)

    cls = kwargs.pop('target', TimeSeries)
    # open cache from file if given
    if isinstance(cache, (unicode, str, file)):
        cache = open_cache(cache)

    # fudge empty cache
    if len(cache) == 0:
        return cls([], channel=channel, epoch=start)

    # use cache to get start end times
    cache.sort(key=lambda ce: ce.segment[0])
    if start is None:
        start = cache[0].segment[0]
    if end is None:
        end = cache[-1].segment[1]

    # get span
    span = Segment(start, end)
    if cls not in (StateVector, StateVectorDict) and resample:
        cache = cache.sieve(segment=span.protract(8))
    else:
        cache = cache.sieve(segment=span)
    cspan = Segment(cache[0].segment[0], cache[-1].segment[1])

    # check for gaps
    if gap is None and pad is not None:
        gap = 'pad'
    elif gap is None:
        gap = 'raise'
    segs = cache_segments(cache, on_missing='ignore') & SegmentList([span])
    if len(segs) != 1 and gap.lower() == 'ignore' or gap.lower() == 'pad':
        pass
    elif len(segs) != 1:
        gaps = SegmentList([cspan]) - segs
        msg = ("The cache given to %s.read has gaps in it in the "
               "following segments:\n    %s" %
               (cls.__name__, '\n    '.join(map(str, gaps))))
        if gap.lower() == 'warn':
            warnings.warn(msg)
        else:
            raise ValueError(msg)
        segs = type(segs)([span])

    # if reading a small number of channels, try to use lalframe, its faster
    if format is None and (isinstance(channel, str) or
                           (isinstance(channel, (list, tuple))
                            and len(channel) <= MAX_LALFRAME_CHANNELS)):
        try:
            from lalframe import frread
        except ImportError:
            format = 'gwf'
        else:
            kwargs.pop('type', None)
            format = 'lalframe'
    # otherwise use the file extension as the format
    elif format is None:
        format = os.path.splitext(cache[0].path)[1][1:]

    # -- process multiple cache segments --------
    # this entry point loops this method for each segment

    if len(segs) > 1:
        out = None
        for seg in segs:
            new = read_cache(cache,
                             channel,
                             start=seg[0],
                             end=seg[1],
                             resample=resample,
                             nproc=nproc,
                             format=format,
                             target=cls,
                             **kwargs)
            if out is None:
                out = new
            else:
                out.append(new, gap='pad', pad=pad)
        return out

    # -- process single cache segment

    # force one frame per process minimum
    nproc = min(nproc, len(cache))

    # single-process
    if nproc <= 1:
        return cls.read(cache,
                        channel,
                        format=format,
                        start=start,
                        end=end,
                        resample=resample,
                        **kwargs)

    # define how to read each frame
    def _read(q, pstart, pend):
        try:
            # don't go beyond the requested limits
            pstart = float(max(start, pstart))
            pend = float(min(end, pend))
            # if resampling TimeSeries, pad by 8 seconds inside cache limits
            if cls not in (StateVector, StateVectorDict) and resample:
                cstart = float(max(cspan[0], pstart - 8))
                subcache = cache.sieve(segment=Segment(cstart, pend))
                out = cls.read(subcache,
                               channel,
                               format=format,
                               start=cstart,
                               end=pend,
                               resample=None,
                               **kwargs)
                out = out.resample(resample)
                q.put(out.crop(pstart, pend))
            else:
                subcache = cache.sieve(segment=Segment(pstart, pend))
                q.put(
                    cls.read(subcache,
                             channel,
                             format=format,
                             start=pstart,
                             end=pend,
                             resample=resample,
                             **kwargs))
        except Exception as e:
            q.put(e)

    # separate cache into parts
    fperproc = int(ceil(len(cache) / nproc))
    subcaches = [
        Cache(cache[i:i + fperproc]) for i in range(0, len(cache), fperproc)
    ]
    subsegments = SegmentList(
        [Segment(c[0].segment[0], c[-1].segment[1]) for c in subcaches])

    # start all processes
    queue = ProcessQueue(nproc)
    proclist = []
    for subseg in subsegments:
        process = Process(target=_read, args=(queue, subseg[0], subseg[1]))
        process.daemon = True
        proclist.append(process)
        process.start()

    # get data and block
    data = [queue.get() for p in proclist]
    for result in data:
        process.join()
        if isinstance(result, Exception):
            raise result

    # format and return
    if issubclass(cls, dict):
        try:
            data.sort(key=lambda tsd: tsd.values()[0].epoch.gps)
        except IndexError:
            pass
        out = cls()
        while len(data):
            tsd = data.pop(0)
            out.append(tsd)
            del tsd
        return out
    else:
        if cls in (TimeSeries, TimeSeriesDict):
            out = TimeSeriesList(*data)
        else:
            out = StateVectorList(*data)
        out.sort(key=lambda ts: ts.epoch.gps)
        ts = out.join(gap=gap)
        return ts
Beispiel #12
0
    def _read(cls, source, *args, **kwargs):
        # parse input as a list of files
        if isinstance(source, list):
            files = source
        else:
            try:  # try and map to a list of file-like objects
                files = file_list(source)
            except ValueError:  # otherwise treat as single
                files = [source]

        # determine input format
        if kwargs.get('format', None) is None:
            kwargs['format'] = get_format('read', cls, files[0], source, args,
                                          kwargs)

        # calculate maximum number of processes
        nproc = kwargs.pop('nproc', 1)
        num = len(files)
        nproc = min(nproc, num)

        # read single file or single process
        if num == 1:
            return reader(cls, files[0], *args, **kwargs)
        if nproc == 1:
            return reader(cls, source, *args, **kwargs)

        # define multiprocessing method
        def _read_chunk(q, chunk, index):
            if len(chunk) == 1:
                chunk = chunk[0]
            try:
                if cls:
                    q.put((index, reader(cls, chunk, *args, **kwargs)))
                else:
                    q.put((index, reader(chunk, *args, **kwargs)))
            except Exception as e:
                q.put(e)

        # split source into parts
        numperproc = int(ceil(num / nproc))
        chunks = [
            type(files)(files[i:i + numperproc])
            for i in range(0, num, numperproc)
        ]

        # process
        queue = ProcessQueue(nproc)
        processes = []
        for i, chunk in enumerate(chunks):
            if len(chunk) == 0:
                continue
            process = Process(target=_read_chunk, args=(queue, chunk, i))
            process.daemon = True
            process.start()
            processes.append(process)

        # get data and block
        output = []
        for i in range(len(processes)):
            result = queue.get()
            if isinstance(result, Exception):
                raise result
            output.append(result)
        for process in processes:
            process.join()

        # return chunks sorted into input order
        return flatten(zip(*sorted(output, key=lambda out: out[0]))[1])
Beispiel #13
0
def getdocket_parallel(username,
                       password,
                       client_matter,
                       docket_list,
                       cached=False,
                       num_workers=15,
                       save_progress=None):
    '''
	Download a list of dockets in parallel by launching many processes.
	
	docket_list:		A list of (court, docket) tuples
	num_workers:		How many parallel processes to start
	cached:				Get cached dockets instead of fresh ones from the court
	save_progress		Use a temporary file to save work in case we crash.
	'''
    if save_progress != None:
        save_progress = shelve.open(save_progress, 'c')

    def get_key(court, docket):
        return ("(%s),(%s)" % (court, docket)).encode('ascii', 'ignore')

    dockets = []

    # Put all of the tuples into a processing queue
    dlqueue = ProcessQueue()
    for court, docket in docket_list:
        k = get_key(court, docket)
        if save_progress != None and save_progress.get(k) and \
          save_progress[k]['result']['success']:
            # Add to the results
            dockets.append(save_progress[k])
        else:
            # Add it to the download queue
            dlqueue.put((court, docket))

    # The processes will put their results into the docketqueue
    docketqueue = ProcessQueue()
    # The main thread removes them from docketqueue and puts them into a list.

    # Start up the parallel processes
    pool = MultiProcessPool(processes=num_workers,
                            initializer=_dl_worker,
                            initargs=[
                                username, password, client_matter, cached,
                                dlqueue, docketqueue
                            ])

    try:
        # Continue until the processing queue is empty
        got = 0
        while True:
            # It takes about 15 seconds to download a docket, so wait that long.
            time.sleep(1.0)
            try:
                # get_nowait will have raise Empty and break the loop
                while True:
                    new_docket = docketqueue.get_nowait()
                    dockets.append(new_docket)
                    # Only save if succesful
                    if save_progress != None and new_docket['result'][
                            'success']:
                        # Save our progress
                        k = get_key(new_docket['court'], new_docket['docket'])
                        save_progress[k] = new_docket
                    got += 1
            except Empty:
                if save_progress != None:
                    print("Syncing dbase (len=%d), dockets=%d " %
                          (len(save_progress), len(dockets)))
                    save_progress.sync()
                left = len(docket_list) - len(dockets)
                if left <= 0:
                    break
                logging.info("Got %d, %d total dockets. Waiting again." %
                             (got, len(dockets)))
                continue
            except Exception as e:
                logging.info("Main thread loop exception: %s" % e)
                break

    except KeyboardInterrupt as e:
        logging.info("Main thread exception: %s" % e)
        dlqueue.close()
        docketqueue.close()
        pool.close()
        pool.terminate()
        # Return what we have even if there was an exception.

    if save_progress != None:
        save_progress.sync()
        save_progress.close()
    return dockets
Beispiel #14
0
    def fit(self, data):
        """Main thread: adds task while semaphore free, else blocks. 
        Other thread is used to free up finished tasks. Quite simple to just 
        
        Args:
            data (MicroArrayData): data.
        """
        if self.verbose:
            print '[Parallel] fitting {} tasks with {} process{}...'.format(
                len(self.tasks), self.processes,
                'es' if self.processes > 1 else '')
        assert issubclass(type(data), MicroArrayData)

        start_time = time.time()

        # need to use two different kinds of queues, one thread-safe and one process-safe
        task_queue = ThreadQueue()  # Pipe tasks between threads
        result_queue = ProcessQueue()  # Pipe results back to self.tasks list

        # keep track of start time per task
        def wrap_fit(task, data, index):
            """Wrapper of fit method, keep track of index of in self.task
            list where the results will be put back to
            """
            result_queue.put((task.fit(data), index))

        # Thread - start processes and acquire semaphore
        def add_processes(task_queue):
            indices = range(len(self.tasks))
            if self.randomize: random.shuffle(indices)
            for index in indices:
                task = self.tasks[index]
                for _ in xrange(task.processes):
                    self._semaphore.acquire()
                if self.verbose >= 3:
                    time.sleep(0.1)
                    print '[thread-start] acquired', task.processes, 'process{} for'.format(
                        'ses' if task.processes > 1 else ''), task.name
                p = Process(target=wrap_fit, args=(task, data, index))
                # Need non-daemonic threads to use multiprocessed python processes.
                p.daemon = False
                p.start()
                # Put tuple of process and associated task in queue.
                task_queue.put((p, task))
            task_queue.put(None)  # send sentinal

        thread_add_processes = Thread(target=add_processes,
                                      args=(task_queue, ))
        thread_add_processes.start()

        # Thread - maintain processes and release semaphore
        def handle_processes(task_queue):
            running_tasks = []
            finished = False
            print_count = 1
            while not finished or len(running_tasks) > 0:
                # check task_queue at intervals
                if not task_queue.empty():
                    next_task = task_queue.get(timeout=0.1)
                    # receive STOP sentinal, finish
                    if next_task is None:
                        finished = True
                    else:
                        running_tasks.append(next_task)
                # maintain process list;
                for proc, task in running_tasks[:]:
                    if not proc.is_alive():
                        if self.verbose >= 3:
                            print '[thread-maintain] releasing', task.processes, 'process{} for'.format(
                                'ses' if task.processes > 1 else ''), task.name
                        for _ in xrange(task.processes):
                            self._semaphore.release()
                        proc.terminate()
                        running_tasks.remove((proc, task))
                        break  # need when a process is found that is done!
                time.sleep(.5)
                # print currently running processes every once in a while.
                if int((time.time() - start_time) / self.print_fitting_time
                       ) > print_count and self.verbose >= 1:
                    print '[Parallel][{:02d}h{:02d}m] running:'.format(
                        *divmod(print_count * 10, 60)),
                    for _, task in running_tasks:
                        if task == running_tasks[-1][1]:  # last task
                            print '{}'.format(task.name)
                        else:
                            print '{},'.format(task.name),
                        # print '[Parallel] {} ({:d}:{:2d})'.format(task.name, *divmod(int(start_time_task[task.name] - time.time()/60), 60))
                    print_count += 1

        thread_handle_processes = Thread(target=handle_processes,
                                         args=(task_queue, ))
        thread_handle_processes.start()

        # Thread - catch results from result_queue and put back in self.task list
        def handle_results():
            processed_results = 0
            while processed_results < len(self.tasks):
                task, index = result_queue.get()
                if self.verbose >= 3:
                    print '[thread-result] saving result for', task.name, 'to task list'
                self.tasks[index] = task
                processed_results += 1
                time.sleep(.1)

        thread_handle_results = Thread(target=handle_results, args=())
        thread_handle_results.start()

        # block main thread
        thread_add_processes.join()
        thread_handle_processes.join()
        thread_handle_results.join()

        assert all((i.done for i in self.tasks))
Beispiel #15
0
def getdocket_parallel(username,
                       password,
                       client_matter,
                       docket_list,
                       cached=False,
                       num_workers=15,
                       save_progress=None,
                       _async=False):
    '''
	Download a list of dockets in parallel by launching many processes.
	
	docket_list:		A list of (court, docket) tuples
	num_workers:		How many parallel processes to start
	cached:				Get cached dockets instead of fresh ones from the court
	save_progress		Use a temporary file to save work in case we crash.
	async               If True, we get data asyncrhonously.
	'''
    if save_progress != None:
        if _async == True:
            raise NotImplementedError("Cannot save progress and async.")
        save_progress = shelve.open(save_progress, 'c')

    def get_key(court, docket):
        return ("(%s),(%s)" % (court, docket)).encode('ascii', 'ignore')

    dockets = []

    def deb(msg, *args, **kwargs):
        msg = "getdocket_parallel %s-%s: %s" % (username, client_matter, msg)
        logging.info(msg, *args, **kwargs)

    # Put all of the tuples into a processing queue
    dlqueue = ProcessQueue()
    for c_vals in docket_list:
        c_vals = list(c_vals)
        if len(c_vals) < 2:
            raise Exception(
                "Expecting a list of at least two with court, "
                "docket, instead got: %s", c_vals)
        court, docket = c_vals[:2]
        k = get_key(court, docket)
        if save_progress != None and save_progress.get(k) and \
          save_progress[k]['result']['success']:
            # Add to the results
            dockets.append(save_progress[k])
        else:
            # Add it to the download queue
            dlqueue.put((court, docket))

    # The processes will put their results into the docketqueue
    docketqueue = ProcessQueue()
    # The main thread removes them from docketqueue and puts them into a list.

    # Start up the parallel processes
    pool = MultiProcessPool(processes=num_workers,
                            initializer=_dl_worker,
                            initargs=[
                                username, password, client_matter, cached,
                                dlqueue, docketqueue
                            ])

    def iterator(sleep_time=1.0):
        '''An iterator that goes through all of the given dockets.'''
        # Continue until the processing queue is empty
        got, iters, total = 0, 0, len(docket_list)
        while True:
            # It takes about 15 seconds to download a docket, so wait that long.
            iters += 1
            try:
                time.sleep(sleep_time)
                # get_nowait will have raise Empty and break the loop
                while True:
                    yield docketqueue.get_nowait()
                    got += 1
            except Empty:
                left = total - got
                if left <= 0:
                    deb("Finished iterating %s" % total)
                    break
                if iters % 5 == 0:
                    deb("Did %d/%d, %d left.", got, total, left)
                continue
            except KeyboardInterrupt as e:
                deb("Main thread interrupt: %s" % e)
                break
            except Exception as e:
                deb("Main thread loop exception: %s" % e)
                break

        dlqueue.close()
        docketqueue.close()
        pool.close()
        pool.terminate()

    if _async:
        return iterator

    for new_i, new_docket in enumerate(iterator()):
        dockets.append(new_docket)
        # Only save if succesful
        if save_progress != None and new_docket['result']['success']:
            # Save our progress
            k = get_key(new_docket['court'], new_docket['docket'])
            save_progress[k] = new_docket
        elif save_progress != None and new_i % 20 == 0:
            deb("sync dbase len=%d, added=%d ", len(save_progress), 'got')
            save_progress.sync()

        # Return what we have even if there was an exception.

    if save_progress != None:
        save_progress.sync()
        save_progress.close()
    return dockets
Beispiel #16
0
 def create_queue(self, queue_limit):
     return ProcessQueue(queue_limit)
Beispiel #17
0
    if save_progress != None:
        if async:
            raise NotImplementedError("Cannot save progress and async.")
        save_progress = shelve.open(save_progress, 'c')

    def get_key(court, docket):
        return ("(%s),(%s)" % (court, docket)).encode('ascii', 'ignore')

    dockets = []

    def deb(msg, *args, **kwargs):
        msg = "getdocket_parallel %s-%s: %s" % (username, client_matter, msg)
        logging.info(msg, *args, **kwargs)

    # Put all of the tuples into a processing queue
    dlqueue = ProcessQueue()
    for c_vals in docket_list:
        c_vals = list(c_vals)
        if len(c_vals) < 2:
            raise Exception(
                "Expecting a list of at least two with court, "
                "docket, instead got: %s", c_vals)
        court, docket = c_vals[:2]
        k = get_key(court, docket)
        if save_progress != None and save_progress.get(k) and \
          save_progress[k]['result']['success']:
            # Add to the results
            dockets.append(save_progress[k])
        else:
            # Add it to the download queue
            dlqueue.put((court, docket))
Beispiel #18
0
def from_timeseries(timeseries,
                    stride,
                    fftlength=None,
                    fftstride=None,
                    method='welch',
                    window=None,
                    plan=None,
                    nproc=1):
    """Calculate the average power spectrogram of this `TimeSeries`
    using the specified average spectrum method.

    Parameters
    ----------
    timeseries : :class:`~gwpy.timeseries.core.TimeSeries`
        input time-series to process.
    stride : `float`
        number of seconds in single PSD (column of spectrogram).
    fftlength : `float`
        number of seconds in single FFT.
    method : `str`, optional, default: 'welch'
        average spectrum method.
    fftstride : `int`, optiona, default: fftlength
        number of seconds between FFTs.
    window : `timeseries.window.Window`, optional, default: `None`
        window function to apply to timeseries prior to FFT.
    plan : :lalsuite:`REAL8FFTPlan`, optional
        LAL FFT plan to use when generating average spectrum,
        substitute type 'REAL8' as appropriate.
    nproc : `int`, default: ``1``
        maximum number of independent frame reading processes, default
        is set to single-process file reading.

    Returns
    -------
    spectrogram : :class:`~gwpy.spectrogram.core.Spectrogram`
        time-frequency power spectrogram as generated from the
        input time-series.
    """
    # format FFT parameters
    if fftlength is None:
        fftlength = stride
    if fftstride is None:
        fftstride = fftlength

    # get size of spectrogram
    nFFT = int(fftlength * timeseries.sample_rate.value)
    nsteps = int(timeseries.size // (stride * timeseries.sample_rate.value))
    nproc = min(nsteps, nproc)

    # generate window and plan if needed
    try:
        from lal import lal
    except ImportError:
        pass
    else:
        if window is None:
            window = psd.generate_lal_window(nFFT, dtype=timeseries.dtype)
        if plan is None:
            plan = psd.generate_lal_fft_plan(nFFT, dtype=timeseries.dtype)

    # single-process return
    if nsteps == 0 or nproc == 1:
        return _from_timeseries(timeseries,
                                stride,
                                fftlength=fftlength,
                                fftstride=fftstride,
                                method=method,
                                window=window)

    # wrap spectrogram generator
    def _specgram(q, ts):
        try:
            q.put(
                _from_timeseries(ts,
                                 stride,
                                 fftlength=fftlength,
                                 fftstride=fftstride,
                                 method=method,
                                 window=window,
                                 plan=plan))
        except Exception as e:
            q.put(e)

    # otherwise build process list
    stepperproc = int(ceil(nsteps / nproc))
    nsamp = stepperproc * timeseries.sample_rate.value * stride
    queue = ProcessQueue(nproc)
    processlist = []
    for i in range(nproc):
        process = Process(target=_specgram,
                          args=(queue, timeseries[i * nsamp:(i + 1) * nsamp]))
        process.daemon = True
        processlist.append(process)
        process.start()
        if ((i + 1) * nsamp) >= timeseries.size:
            break

    # get data
    data = []
    for process in processlist:
        result = queue.get()
        if isinstance(result, Exception):
            raise result
        else:
            data.append(result)
    # and block
    for process in processlist:
        process.join()

    # format and return
    out = SpectrogramList(*data)
    out.sort(key=lambda spec: spec.epoch.gps)
    return out.join()
Beispiel #19
0
def read_cache(cache, target, nproc, post, *args, **kwargs):
    """Read arbitrary data from a cache file

    Parameters
    ----------
    cache : :class:`glue.lal.Cache`, `str`
        cache of files files, or path to a LAL-format cache file
        on disk.
    target : `type`
        target class to read into.
    nproc : `int`
        number of individual processes to use.
    post : `function`
        function to post-process output object before returning.
        The output of this method will be returns, so in-place operations
        must return the object.
    *args
        other positional arguments to pass to the target.read()
        classmethod.
    **kwargs
        keyword arguments to pass to the target.read() classmethod.

    Returns
    -------
    data : target
        an instance of the target class, seeded with data read from
        the cache.

    Notes
    -----
    The returned object is constructed from the output of each
    sub-process via the '+=' in-place addition operator.

    If the input cache is indeed a :class:`~glue.lal.Cache` object,
    the sub-processes will be combined in time order, otherwise the ordering
    is given by the order of entries in the input cache (for example,
    if it is a simple `list` of files).

    .. warning::

       no protection is given against overloading the host, for example,
       no checks are done to ensure that ``nproc`` is less than the number
       of available cores.

       High values of ``nproc`` should be used at the users discretion,
       the GWpy team accepts to liability for loss as a result of abuse
       of this feature.
    """
    # read the cache
    if isinstance(cache, (file, unicode, str)):
        cache = open_cache(cache)
    if isinstance(cache, Cache):
        cache.sort(key=lambda ce: ce.segment[0])

    # force one file per process minimum
    nproc = min(nproc, len(cache))
    if nproc > cpu_count():
        warnings.warn("Using %d processes on a %d-core machine is "
                      "unrecommended...but not forbidden."
                      % (nproc, cpu_count()))

    # work out underlying data type
    try:
        kwargs.setdefault(
            'format', _get_valid_format('read', target, None,
                                        None, (cache[0],), {}))
    # if empty, put anything, since it doesn't matter
    except IndexError:
        kwargs.setdefault('format', 'ascii')
    except Exception:
        if 'format' not in kwargs:
            raise

    if nproc <= 1:
        return target.read(cache, *args, **kwargs)

    # define how to read each sub-cache
    def _read(q, sc, i):
        try:
            q.put((i, target.read(sc, *args, **kwargs)))
        except Exception as e:
            q.put(e)

    # separate cache into parts
    fperproc = int(ceil(len(cache) / nproc))
    subcaches = [cache.__class__(cache[i:i+fperproc]) for
                 i in range(0, len(cache), fperproc)]

    # start all processes
    queue = ProcessQueue(nproc)
    proclist = []
    for i, subcache in enumerate(subcaches):
        if len(subcache) == 0:
            continue
        process = Process(target=_read, args=(queue, subcache, i))
        process.daemon = True
        proclist.append(process)
        process.start()

    # get data and block
    pout = []
    for i in range(len(proclist)):
        result = queue.get()
        if isinstance(result, Exception):
            raise result
        pout.append(result)
    for process in proclist:
        process.join()

    # combine and return
    data = zip(*sorted(pout, key=lambda out: out[0]))[1]
    if issubclass(target, Table):  # astropy.table.Table
        out = vstack_tables(data, join_type='exact')
    elif issubclass(target, recarray):
        out = recfunctions.stack_arrays(data, asrecarray=True, usemask=False,
                                        autoconvert=True).view(target)
    else:
        try:
            if hasattr(target, 'tableName'):  # glue.ligolw.table.Table
                out = data[0]
            else:
                out = data[0].copy()
        except AttributeError:
            out = data[0]
        for datum in data[1:]:
            out += datum

    if post:
        return post(out)
    else:
        return out
Beispiel #20
0
class MultiProcessWorker(MultiWorker):
    max_worker_count = cpu_count()
    queue_type = ProcessQueue()
    worker_type = Process