def test_multiprocess_func_does_something():
    q = Queue()
    multiprocess(lambda q, x: q.push(x), [(q, 1), (q, 2)])
    arr = []
    while not q.empty():
        arr.append(q.pop())
    assert (set([1, 2]) == set(arr))
Exemple #2
0
def func(q: mp.Queue, pause: mp.Event
         ):  # finish_table True will not read exit until all done
    print(f'Process {os.getpid()}')
    idle = True  # A flag for whether player is actively playing
    starttime = time.time()
    while True:  # global process loop
        try:  # non-blocking Queue get handler
            val = q.get(block=False)  # get from queue
            # do when got from queue
            if val == 'exit':  # exit code for process
                break
            if type(val) == float:
                print('time', val)

            flag = False
            idle = False
            for item in val:  # player loop

                print(item, time.time() - starttime)  # message event

                # message delay
                endtime = time.time() + 1
                while pause.is_set():
                    continue
                while time.time() < endtime:
                    if not q.empty():  # if new item in queue
                        flag = True  # set exit flag
                        break
                if flag:  # play exit condition
                    idle = True
                    break
            idle = True

        except Exception:  # handle Queue.Empty
            pass  # Do nothing if no input
Exemple #3
0
    def run(self, func, arglist):
        # TODO: Generalise this a bit maybe?
        #       We know len(arglist) == self.workers in pretty much all cases

        pool = []
        queue = Queue()
        for idx, split in enumerate(arglist):
            proc = Process(target=self._eval, args=(idx, func, split, queue))
            pool.append(proc)
            proc.start()
        for proc in pool:
            proc.join()

        yss = [[]] * len(arglist)
        while not queue.empty():
            idx, ys = queue.get()
            yss[idx] = ys
        ret = [y for ys in yss for y in ys]
        #from code import interact
        #interact(local=locals())
        return ret
class DataStreamRecorder(Process):
    def __init__(self,
                 name,
                 data_sampler_method,
                 cache_path=None,
                 save_every=50):
        """ Initializes a DataStreamRecorder
        Parameters
        ----------
            name : string
                    User-friendly identifier for this data stream
            data_sampler_method : function
                    Method to call to retrieve data
        """
        Process.__init__(self)
        self._data_sampler_method = data_sampler_method

        self._has_set_sampler_params = False
        self._recording = False

        self._name = name

        self._cmds_q = Queue()
        self._data_qs = [Queue()]
        self._ok_q = None
        self._tokens_q = None

        self._save_every = save_every
        self._cache_path = cache_path
        self._saving_cache = cache_path is not None
        if self._saving_cache:
            self._save_path = os.path.join(cache_path, self.name)
            if not os.path.exists(self._save_path):
                os.makedirs(self._save_path)

        self._start_data_segment = 0
        self._cur_data_segment = 0
        self._saving_ps = []

    def run(self):
        setproctitle('python.DataStreamRecorder.{0}'.format(self._name))
        try:
            logging.debug("Starting data recording on {0}".format(self.name))
            self._tokens_q.put(("return", self.name))
            while True:
                if not self._cmds_q.empty():
                    cmd = self._cmds_q.get()
                    if cmd[0] == 'stop':
                        break
                    elif cmd[0] == 'pause':
                        self._recording = False
                        if self._saving_cache:
                            self._save_cache(self._cur_data_segment)
                            self._cur_data_segment += 1
                            self._data_qs.append(Queue())
                    elif cmd[0] == 'reset_data_segment':
                        self._start_data_segment = self._cur_data_segment
                    elif cmd[0] == 'resume':
                        self._recording = True
                    elif cmd[0] == 'save':
                        self._save_data(cmd[1], cmd[2], cmd[3])
                    elif cmd[0] == 'params':
                        self._args = cmd[1]
                        self._kwargs = cmd[2]

                if self._recording and not self._ok_q.empty():
                    timestamp = self._ok_q.get()
                    self._tokens_q.put(("take", self.name))

                    data = self._data_sampler_method(*self._args,
                                                     **self._kwargs)

                    cur_data_q = self._data_qs[self._cur_data_segment]
                    if self._saving_cache and cur_data_q.qsize(
                    ) == self._save_every:
                        self._save_cache(self._cur_data_segment)
                        cur_data_q = Queue()
                        self._data_qs.append(cur_data_q)
                        self._cur_data_segment += 1
                    cur_data_q.put((timestamp, data))

                    self._tokens_q.put(("return", self.name))

        except KeyboardInterrupt:
            logging.debug("Shutting down data streamer on {0}".format(
                self.name))
            sys.exit(0)

    def _extract_q(self, i):
        q = self._data_qs[i]
        vals = []
        while q.qsize() > 0:
            vals.append(q.get())
        self._data_qs[i] = None
        del q
        return vals

    def _save_data(self, path, cb, concat):
        if not os.path.exists(path):
            os.makedirs(path)
        target_filename = os.path.join(path, "{0}.jb".format(self.name))
        if self._saving_cache:
            while True in [p.is_alive() for p in self._saving_ps]:
                sleep(1e-3)

            p = Process(target=_caches_to_file,
                        args=(self._save_path, self._start_data_segment,
                              self._cur_data_segment, target_filename, cb,
                              concat))
            p.start()
            self._start_data_segment = self._cur_data_segment
        else:
            data = self._extract_q(0)
            p = Process(target=_dump, args=(data, target_filename, cb))
            p.start()

    def _save_cache(self, i):
        if not self._save_cache:
            raise Exception(
                "Cannot save cache if no cache path was specified.")
        logging.debug("Saving cache for {0} block {1}".format(
            self.name, self._cur_data_segment))
        data = self._extract_q(i)
        p = Process(target=_dump_cache,
                    args=(data,
                          os.path.join(self._save_path, "{0}.jb".format(
                              self._cur_data_segment)), self.name,
                          self._cur_data_segment))
        p.start()
        self._saving_ps.append(p)

    def _start_recording(self, *args, **kwargs):
        """ Starts recording
        Parameters
        ----------
            *args : any
                    Ordinary args used for calling the specified data sampler method
            **kwargs : any
                    Keyword args used for calling the specified data sampler method
        """
        while not self._cmds_q.empty():
            self._cmds_q.get_nowait()
        while not self._data_qs[self._cur_data_segment].empty():
            self._data_qs[self._cur_data_segment].get_nowait()

        self._args = args
        self._kwargs = kwargs

        self._recording = True
        self.start()

    @property
    def name(self):
        return self._name

    def _set_qs(self, ok_q, tokens_q):
        self._ok_q = ok_q
        self._tokens_q = tokens_q

    def _flush(self):
        """ Returns a list of all current data """
        if self._recording:
            raise Exception("Cannot flush data queue while recording!")
        if self._saving_cache:
            logging.warn(
                "Flush when using cache means unsaved data will be lost and not returned!"
            )
            self._cmds_q.put(("reset_data_segment", ))
        else:
            data = self._extract_q(0)
            return data

    def save_data(self, path, cb=_NULL, concat=True):
        if self._recording:
            raise Exception("Cannot save data while recording!")
        self._cmds_q.put(("save", path, cb, concat))

    def _stop(self):
        """ Stops recording. Returns all recorded data and their timestamps. Destroys recorder process."""
        self._pause()
        self._cmds_q.put(("stop", ))
        try:
            self._recorder.terminate()
        except Exception:
            pass
        self._recording = False

    def _pause(self):
        """ Pauses recording """
        self._cmds_q.put(("pause", ))
        self._recording = False

    def _resume(self):
        """ Resumes recording """
        self._cmds_q.put(("resume", ))
        self._recording = True

    def change_data_sampler_params(self, *args, **kwargs):
        """ Chanes args and kwargs for data sampler method
        Parameters
        ----------
            *args : any
                    Ordinary args used for calling the specified data sampler method
            **kwargs : any
                    Keyword args used for calling the specified data sampler method
        """
        self._cmds_q.put(('params', args, kwargs))
Exemple #5
0
class BaseAggregator(object):
    """Base class for the data aggregator interface. This class is used
    alongside the BaseListener class to spawn an aggregator process that
    combines data from multiple crawl processes. The BaseAggregator class
    manages the child listener process.

    Parameters
    ----------
    manager_params : dict
        TaskManager configuration parameters
    browser_params : list of dict
        List of browser configuration dictionaries"""
    __metaclass__ = abc.ABCMeta

    def __init__(self, manager_params, browser_params):
        self.manager_params = manager_params
        self.browser_params = browser_params
        self.listener_address = None
        self.listener_process = None
        self.status_queue = Queue()
        self.shutdown_queue = Queue()
        self._last_status = None
        self._last_status_received = None
        self.logger = logging.getLogger('openwpm')

    @abc.abstractmethod
    def save_configuration(self, openwpm_version, browser_version):
        """Save configuration details to the database"""

    @abc.abstractmethod
    def get_next_visit_id(self):
        """Return a unique visit ID to be used as a key for a single visit"""

    @abc.abstractmethod
    def get_next_crawl_id(self):
        """Return a unique crawl ID used as a key for a browser instance"""

    def get_most_recent_status(self):
        """Return the most recent queue size sent from the listener process"""

        # Block until we receive the first status update
        if self._last_status is None:
            return self.get_status()

        # Drain status queue until we receive most recent update
        while not self.status_queue.empty():
            self._last_status = self.status_queue.get()
            self._last_status_received = time.time()

        # Check last status signal
        if (time.time() - self._last_status_received) > STATUS_TIMEOUT:
            raise RuntimeError(
                "No status update from DataAggregator listener process "
                "for %d seconds." % (time.time() - self._last_status_received))

        return self._last_status

    def get_status(self):
        """Get listener process status. If the status queue is empty, block."""
        try:
            self._last_status = self.status_queue.get(block=True,
                                                      timeout=STATUS_TIMEOUT)
            self._last_status_received = time.time()
        except queue.Empty:
            raise RuntimeError(
                "No status update from DataAggregator listener process "
                "for %d seconds." % (time.time() - self._last_status_received))
        return self._last_status

    def launch(self, listener_process_runner, *args):
        """Launch the aggregator listener process"""
        args = (self.manager_params, self.status_queue,
                self.shutdown_queue) + args
        self.listener_process = Process(target=listener_process_runner,
                                        args=args)
        self.listener_process.daemon = True
        self.listener_process.start()
        self.listener_address = self.status_queue.get()

    def shutdown(self):
        """ Terminate the aggregator listener process"""
        self.logger.debug(
            "Sending the shutdown signal to the %s listener process..." %
            type(self).__name__)
        self.shutdown_queue.put(SHUTDOWN_SIGNAL)
        start_time = time.time()
        self.listener_process.join(300)
        self.logger.debug("%s took %s seconds to close." %
                          (type(self).__name__, str(time.time() - start_time)))
        self.listener_address = None
        self.listener_process = None
Exemple #6
0
class BaseAggregator(object):
    """Base class for the data aggregator interface. This class is used
    alongside the BaseListener class to spawn an aggregator process that
    combines data from multiple crawl processes. The BaseAggregator class
    manages the child listener process.

    Parameters
    ----------
    manager_params : dict
        TaskManager configuration parameters
    browser_params : list of dict
        List of browser configuration dictionaries"""
    __metaclass__ = abc.ABCMeta

    def __init__(self, manager_params, browser_params):
        self.manager_params = manager_params
        self.browser_params = browser_params
        self.logger = loggingclient(*manager_params['logger_address'])
        self.listener_address = None
        self.listener_process = None
        self.status_queue = Queue()
        self.shutdown_queue = Queue()
        self._last_status = None
        self._last_status_received = None

    @abc.abstractmethod
    def save_configuration(self, openwpm_version, browser_version):
        """Save configuration details to the database"""

    @abc.abstractmethod
    def get_next_visit_id(self):
        """Return a unique visit ID to be used as a key for a single visit"""

    @abc.abstractmethod
    def get_next_crawl_id(self):
        """Return a unique crawl ID used as a key for a browser instance"""

    def get_most_recent_status(self):
        """Return the most recent queue size sent from the listener process"""

        # Block until we receive the first status update
        if self._last_status is None:
            return self.get_status()

        # Drain status queue until we receive most recent update
        while not self.status_queue.empty():
            self._last_status = self.status_queue.get()
            self._last_status_received = time.time()

        # Check last status signal
        if (time.time() - self._last_status_received) > STATUS_TIMEOUT:
            raise RuntimeError(
                "No status update from DataAggregator listener process "
                "for %d seconds." % (time.time() - self._last_status_received)
            )

        return self._last_status

    def get_status(self):
        """Get listener process status. If the status queue is empty, block."""
        try:
            self._last_status = self.status_queue.get(
                block=True, timeout=STATUS_TIMEOUT)
            self._last_status_received = time.time()
        except queue.Empty:
            raise RuntimeError(
                "No status update from DataAggregator listener process "
                "for %d seconds." % (time.time() - self._last_status_received)
            )
        return self._last_status

    def launch(self, listener_process_runner, *args):
        """Launch the aggregator listener process"""
        args = (self.manager_params, self.status_queue,
                self.shutdown_queue) + args
        self.listener_process = Process(
            target=listener_process_runner,
            args=args
        )
        self.listener_process.daemon = True
        self.listener_process.start()
        self.listener_address = self.status_queue.get()

    def shutdown(self):
        """ Terminate the aggregator listener process"""
        self.logger.debug(
            "Sending the shutdown signal to the %s listener process..." %
            type(self).__name__
        )
        self.shutdown_queue.put(SHUTDOWN_SIGNAL)
        start_time = time.time()
        self.listener_process.join(300)
        self.logger.debug(
            "%s took %s seconds to close." % (
                type(self).__name__,
                str(time.time() - start_time)
            )
        )
        self.listener_address = None
        self.listener_process = None
class StorageControllerHandle:
    """This class contains all methods relevant for the TaskManager
    to interact with the StorageController
    """
    def __init__(
        self,
        structured_storage: StructuredStorageProvider,
        unstructured_storage: Optional[UnstructuredStorageProvider],
    ) -> None:

        self.listener_address: Optional[Tuple[str, int]] = None
        self.listener_process: Optional[Process] = None
        self.status_queue = Queue()
        self.completion_queue = Queue()
        self.shutdown_queue = Queue()
        self._last_status = None
        self._last_status_received: Optional[float] = None
        self.logger = logging.getLogger("openwpm")
        self.storage_controller = StorageController(
            structured_storage,
            unstructured_storage,
            status_queue=self.status_queue,
            completion_queue=self.completion_queue,
            shutdown_queue=self.shutdown_queue,
        )

    def get_next_visit_id(self) -> VisitId:
        """Generate visit id as randomly generated positive integer less than 2^53.

        Parquet can support integers up to 64 bits, but Javascript can only
        represent integers up to 53 bits:
        https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Number/MAX_SAFE_INTEGER
        Thus, we cap these values at 53 bits.
        """
        return VisitId(random.getrandbits(53))

    def get_next_browser_id(self) -> BrowserId:
        """Generate crawl id as randomly generated positive 32bit integer

        Note: Parquet's partitioned dataset reader only supports integer
        partition columns up to 32 bits.
        """
        return BrowserId(random.getrandbits(32))

    def save_configuration(
        self,
        manager_params: ManagerParamsInternal,
        browser_params: List[BrowserParamsInternal],
        openwpm_version: str,
        browser_version: str,
    ) -> None:
        assert self.listener_address is not None
        sock = DataSocket(self.listener_address)
        task_id = random.getrandbits(32)
        sock.store_record(
            TableName("task"),
            INVALID_VISIT_ID,
            {
                "task_id": task_id,
                "manager_params": manager_params.to_json(),
                "openwpm_version": openwpm_version,
                "browser_version": browser_version,
            },
        )
        # Record browser details for each browser
        for browser_param in browser_params:
            sock.store_record(
                TableName("crawl"),
                INVALID_VISIT_ID,
                {
                    "browser_id": browser_param.browser_id,
                    "task_id": task_id,
                    "browser_params": browser_param.to_json(),
                },
            )
        sock.finalize_visit_id(INVALID_VISIT_ID, success=True)

    def launch(self) -> None:
        """Starts the storage controller"""
        self.storage_controller = Process(
            name="StorageController",
            target=StorageController.run,
            args=(self.storage_controller, ),
        )
        self.storage_controller.daemon = True
        self.storage_controller.start()

        self.listener_address = self.status_queue.get()

    def get_new_completed_visits(self) -> List[Tuple[int, bool]]:
        """
        Returns a list of all visit ids that have been processed since
        the last time the method was called and whether or not they
        ran successfully.

        This method will return an empty list in case no visit ids have
        been processed since the last time this method was called
        """
        finished_visit_ids = list()
        while not self.completion_queue.empty():
            finished_visit_ids.append(self.completion_queue.get())
        return finished_visit_ids

    def shutdown(self, relaxed: bool = True) -> None:
        """Terminate the storage controller process"""
        assert isinstance(self.storage_controller, Process)
        self.logger.debug(
            "Sending the shutdown signal to the Storage Controller...")
        self.shutdown_queue.put((SHUTDOWN_SIGNAL, relaxed))
        start_time = time.time()
        self.storage_controller.join(300)
        self.logger.debug("%s took %s seconds to close." %
                          (type(self).__name__, str(time.time() - start_time)))

    def get_most_recent_status(self) -> int:
        """Return the most recent queue size sent from the Storage Controller process"""

        # Block until we receive the first status update
        if self._last_status is None:
            return self.get_status()

        # Drain status queue until we receive most recent update
        while not self.status_queue.empty():
            self._last_status = self.status_queue.get()
            self._last_status_received = time.time()

        # Check last status signal
        if (time.time() - self._last_status_received) > STATUS_TIMEOUT:
            raise RuntimeError(
                "No status update from the storage controller process "
                "for %d seconds." % (time.time() - self._last_status_received))

        return self._last_status

    def get_status(self) -> int:
        """Get listener process status. If the status queue is empty, block."""
        try:
            self._last_status = self.status_queue.get(block=True,
                                                      timeout=STATUS_TIMEOUT)
            self._last_status_received = time.time()
        except queue.Empty:
            assert self._last_status_received is not None
            raise RuntimeError(
                "No status update from the storage controller process "
                "for %d seconds." % (time.time() - self._last_status_received))
        assert isinstance(self._last_status, int)
        return self._last_status
class BaseAggregator:
    """Base class for the data aggregator interface. This class is used
    alongside the BaseListener class to spawn an aggregator process that
    combines data from multiple crawl processes. The BaseAggregator class
    manages the child listener process.

    Parameters
    ----------
    manager_params : ManagerParamsInternal
        TaskManager configuration parameters
    browser_params : list of BrowserParamsInternal
        List of browser configuration class<BrowserParams>"""

    __metaclass__ = abc.ABCMeta

    def __init__(
        self,
        manager_params: ManagerParamsInternal,
        browser_params: List[BrowserParamsInternal],
    ):
        self.manager_params = manager_params
        self.browser_params = browser_params
        self.listener_address = None
        self.listener_process = None
        self.status_queue = Queue()
        self.completion_queue = Queue()
        self.shutdown_queue = Queue()
        self._last_status = None
        self._last_status_received = None
        self.logger = logging.getLogger("openwpm")

    @abc.abstractmethod
    def save_configuration(self, openwpm_version, browser_version):
        """Save configuration details to the database"""

    @abc.abstractmethod
    def get_next_visit_id(self):
        """Return a unique visit ID to be used as a key for a single visit"""

    @abc.abstractmethod
    def get_next_browser_id(self):
        """Return a unique crawl ID used as a key for a browser instance"""

    def get_most_recent_status(self):
        """Return the most recent queue size sent from the listener process"""

        # Block until we receive the first status update
        if self._last_status is None:
            return self.get_status()

        # Drain status queue until we receive most recent update
        while not self.status_queue.empty():
            self._last_status = self.status_queue.get()
            self._last_status_received = time.time()

        # Check last status signal
        if (time.time() - self._last_status_received) > STATUS_TIMEOUT:
            raise RuntimeError(
                "No status update from DataAggregator listener process "
                "for %d seconds." % (time.time() - self._last_status_received))

        return self._last_status

    def get_status(self):
        """Get listener process status. If the status queue is empty, block."""
        try:
            self._last_status = self.status_queue.get(block=True,
                                                      timeout=STATUS_TIMEOUT)
            self._last_status_received = time.time()
        except queue.Empty:
            raise RuntimeError(
                "No status update from DataAggregator listener process "
                "for %d seconds." % (time.time() - self._last_status_received))
        return self._last_status

    def get_new_completed_visits(self) -> List[Tuple[int, bool]]:
        """
        Returns a list of all visit ids that have been processed since
        the last time the method was called and whether or not they
        have been interrupted.

        This method will return an empty list in case no visit ids have
        been processed since the last time this method was called
        """
        finished_visit_ids = list()
        while not self.completion_queue.empty():
            finished_visit_ids.append(self.completion_queue.get())
        return finished_visit_ids

    def launch(self, listener_process_runner, *args):
        """Launch the aggregator listener process"""
        args = ((self.status_queue, self.completion_queue,
                 self.shutdown_queue), ) + args
        self.listener_process = Process(target=listener_process_runner,
                                        args=args)
        self.listener_process.daemon = True
        self.listener_process.start()
        self.listener_address = self.status_queue.get()

    def shutdown(self, relaxed: bool = True):
        """ Terminate the aggregator listener process"""
        self.logger.debug(
            "Sending the shutdown signal to the %s listener process..." %
            type(self).__name__)
        self.shutdown_queue.put((SHUTDOWN_SIGNAL, relaxed))
        start_time = time.time()
        self.listener_process.join(300)
        self.logger.debug("%s took %s seconds to close." %
                          (type(self).__name__, str(time.time() - start_time)))
        self.listener_address = None
        self.listener_process = None
Exemple #9
0
class MRClient:
    map = StandardOperation("map")
    filter = StandardOperation("filter")
    reduce = StandardOperation("reduce")
    flatmap = StandardOperation("flatmap")
    def __init__(self, num_cores):
        self.num_cores = num_cores
        self.names = set()
        self.global_queue = Queue()
        self.pool = []
        self.channels = []
        self.__terminated = False
        atexit.register(self.__del__)
        queues = [Queue() for _ in range(num_cores)]
        for i in range(num_cores):
            pipe_master, pipe_slave = Pipe()
            state = Value('i', 0)
            process = MRServer(i, queues, pipe_slave, state, self.global_queue)
            self.channels.append(Channel(queues[i], pipe_master, state))
            self.pool.append(process)
            process.start()

    def wait(self, queue=True):
        while not self.idle(queue):
            sleep(0.01)

    def idle(self, queue=True):
        if queue:
            return self.global_queue.empty() and all(map(lambda x: x.state.value==0 and x.queue.empty(), self.channels))
        else:
            return all(map(lambda x: x.state.value==0, self.channels))

    def _register_name(self, name):
        i = 0
        while 1:
            name = "{name}{i}".format(name=name, i=i)
            if name in self.names:
                i += 1
            else:
                self.names.add(name)
                return name
    
    @contextmanager
    def acquire(self, queue=True):
        """
        Wait until the processes are idle
        """
        self.wait(queue)
        yield
    
    def _send(self, item):
        """Send commands to processes"""
        for channel in self.channels:
            channel.pipe.send(item)

    def _recv(self):
        """
        Receive info from processes.
        """
        for channel in self.channels:
            yield channel.pipe.recv()

    def distribute(self, name, data):
        """
        Distribute the data to processes. 
        
        Parameters
        ----------
        name: str
            Store the data by a key `name`.
        data: Iterable
            the data to be distributed.
        """
        name = self._register_name(name)
        if isinstance(data, dict):
            data = data.items()
        i = 0
        n = self.num_cores
        with self.acquire():
            for batch in bufferize(data, CONFIG.BUFFER_SIZE):
                self.channels[i % n].queue.put(batch)
                i += 1
            self._send({'action': "add_dataset", 'name': name})
        return Distributed(self, name)

    def copy(self, dataset):
        """A shallow copy of the current dataset"""
        name = self._register_name(dataset.name)
        with self.acquire():
            self._send({'action': 'copy', 'src': dataset.name, 'dest': name})
        return Distributed(self, name)

    def reduce2(self, func, dataset, inplace=True):
        """
        Step 1: Reduce in seperate processes;
        Step 2: Partition;
        Step 3: Reduce in seperate processes again.
        """
        data = dataset.reduce(func, inplace=inplace) \
                      .partition() \
                      .reduce(func,inplace=True)
        return data

    def partition(self, dataset, by=None):
        """
        Redistribute objects to different processes according to
        their key.

        Parameters
        ----------
        by: object -> Union[str, num]
            a function that maps object to a key,
            this key decides which process to put
            the data on.
        """
        with self.acquire():
            self._send({'action': 'partition', 'name': dataset.name, 'by': by})
        with self.acquire(queue=False):
            self._send({'action': "add_dataset", 'name': dataset.name})
        return dataset

    def merge(self, data):
        """
        Merges multiple datasets to a new one.
        """
        new_name = "/".join(["merge"] + [d.name for d in data])
        name = self._register_name(new_name)
        self._send({'action': 'merge',
                    'src': [d.name for d in data],
                    'dest': name})
        return Distributed(self, name)

    def count(self, dataset):
        """
        Returns
        -------
        The length of the dataset
        """
        n = 0
        with self.acquire():
            self._send({'action': 'count', 'name': dataset.name})
            for c in self._recv():
                n += c
        return n

    def remove(self, dataset):
        """
        Remove the data from processes.
        """
        with self.acquire():
            self._send({'action': 'remove_dataset', 'name': dataset.name})
            self.names.remove(dataset.name)
    
    def exists(self, dataset):
        """
        Returns
        -------
        Whether the data exists on the processes, or
        it it removed already.
        """
        return dataset.name in self.names

    def terminate(self):
        """
        Terminate the processes.
        """
        with self.acquire():
            self._send({'action': 'terminate'})
        self.__terminated = True

    def collect(self, dataset, remove=False):
        """
        Collect the data from processes to the client.
        """
        with self.acquire():
            self._send({'action': 'collect', 'name': dataset.name})
            data = []
            for item in robust_recv(self.global_queue, retries=3):
                data.extend(item)
        if remove:
            self.remove(dataset)
        return data

    def __del__(self):
        if not self.__terminated:
            self.terminate()
Exemple #10
0
class AmpDecorator(Amplifier):
    """This class 'decorates' the Low-Level Amplifier classes with
    Network-Marker and Save-To-File functionality.

    You use it by decorating (not as in Python-Decorator, but in the GoF
    sense) the low level amplifier class you want to use::

        import libmushu
        from libmushu.ampdecorator import AmpDecorator
        from libmushu.driver.randomamp import RandomAmp

        amp = Ampdecorator(RandomAmp)

    Waring: The network marker timings on Windows have a resolution of
    10ms-15ms. On Linux the resolution is 1us. This is due to
    limitations of Python's time.time method, or rather a Windows
    specific issue.

    There exists currently no precise timer, providing times which are
    comparable between two running processes on Windows. The performance
    counter provided on Windows, has a much better resolution but is
    relative to the processes start time and it drifts (1s per 100s), so
    it is only precise for a relatively short amount of time.

    If a higher precision is needed one has to replace the time.time
    calls with something which provides a better precision. For example
    one could create a third process which provides times or regularly
    synchronize both processes with the clock synchronization algorithm
    as described here:

        http://en.wikipedia.org/wiki/Network_Time_Protocol

    Alternatively one could use `timeGetTime` from Windows' Multi Media
    library, which is tunable via `timeBeginPeriod` and provides a
    precision of 1-2ms. Apparently this is the way Chrome and many
    others do it.::

        from __future__ import division

        from ctypes import windll
        import time

        timeBeginPeriod = windll.winmm.timeBeginPeriod
        timeEndPeriod = windll.winmm.timeEndPeriod
        timeGetTime = windll.winmm.timeGetTime

        if __name__ == '__main__':
            # wrap the code that needs high precision in timeBegin- and
            # timeEndPeriod with the same parameter. The parameter is
            # the interval in ms you want as precision. Usually the
            # minimum value allowed is 1 (best).
            timeBeginPeriod(1)
            times = []
            t_start = time.time()
            while time.time() < (time.time() + 1):
                times.append(timeGetTime())
            times = sorted(list(set(times)))
            print(1000 / len(times))
            timeEndPeriod(1)

    """
    def __init__(self, ampcls):
        self.amp = ampcls()
        self.write_to_file = False

    @property
    def presets(self):
        return self.amp.presets

    def start(self, filename=None):
        # prepare files for writing
        self.write_to_file = False
        if filename is not None:
            self.write_to_file = True
            filename_marker = filename + '.marker'
            filename_eeg = filename + '.eeg'
            filename_meta = filename + '.meta'
            for filename in filename_marker, filename_eeg, filename_meta:
                if os.path.exists(filename):
                    logger.error('A file "%s" already exists, aborting.' %
                                 filename)
                    raise Exception
            self.fh_eeg = open(filename_eeg, 'wb')
            self.fh_marker = open(filename_marker, 'w')
            self.fh_meta = open(filename_meta, 'w')
            # write meta data
            meta = {
                'Channels': self.amp.get_channels(),
                'Sampling Frequency': self.amp.get_sampling_frequency(),
                'Amp': str(self.amp)
            }
            json.dump(meta, self.fh_meta, indent=4)

        # start the marker server
        self.marker_queue = Queue()
        self.tcp_reader_running = Event()
        self.tcp_reader_running.set()
        tcp_reader_ready = Event()
        self.tcp_reader = Process(target=marker_reader,
                                  args=(self.marker_queue,
                                        self.tcp_reader_running,
                                        tcp_reader_ready))
        self.tcp_reader.start()
        logger.debug('Waiting for marker server to become ready...')
        tcp_reader_ready.wait()
        logger.debug('Marker server is ready.')
        # zero the sample counter
        self.received_samples = 0
        # start the amp
        self.amp.start()

    def stop(self):
        # stop the amp
        self.amp.stop()
        # stop the marker server
        self.tcp_reader_running.clear()
        logger.debug('Waiting for marker server process to stop...')
        self.tcp_reader.join()
        logger.debug('Marker server process stopped.')
        # close the files
        if self.write_to_file:
            logger.debug('Closing files.')
            for fh in self.fh_eeg, self.fh_marker, self.fh_meta:
                fh.close()

    def configure(self, **kwargs):
        self.amp.configure(**kwargs)

    def get_data(self):
        """Get data from the amplifier.

        This method is supposed to get called as fast as possible (i.e
        hundreds of times per seconds) and returns the data and the
        markers.

        Returns
        -------
        data : 2darray
            a numpy array (time, channels) of the EEG data
        markers : list of (float, str)
            a list of markers. Each element is a tuple of timestamp and
            string. The timestamp is the time in ms relative to the
            onset of the block of data. Note that negative values are
            *allowed* as well as values bigger than the length of the
            block of data returned. That is to be interpreted as a
            marker from the last block and a marker for a future block
            respectively.

        """
        # get data and marker from underlying amp
        data, marker = self.amp.get_data()

        t = time.time()
        # length in sec of the new block according to #samples and fs
        block_duration = len(data) / self.amp.get_sampling_frequency()
        # abs time of start of the block
        t0 = t - block_duration
        # duration of all blocks in ms except the current one
        duration = 1000 * self.received_samples / self.amp.get_sampling_frequency(
        )

        # merge markers
        tcp_marker = []
        while not self.marker_queue.empty():
            m = self.marker_queue.get()
            m[0] = (m[0] - t0) * 1000
            tcp_marker.append(m)
        marker = sorted(marker + tcp_marker)
        # save data to files
        if self.write_to_file:
            for m in marker:
                self.fh_marker.write("%f %s\n" % (duration + m[0], m[1]))
            self.fh_eeg.write(struct.pack("f" * data.size, *data.flatten()))
        self.received_samples += len(data)
        if len(data) == 0 and len(marker) > 0:
            logger.error(
                'Received marker but no data. This is an error, the amp should block on get_data until data is available. Marker timestamps will be unreliable.'
            )
        return data, marker

    def get_channels(self):
        return self.amp.get_channels()

    def get_sampling_frequency(self):
        return self.amp.get_sampling_frequency()