class Client(Node): """Connect to the Utopia Hub and send data to the decoder. This plugin makes the MindAffect decoder compatible with any device supported by Timeflux. Attributes: i (Port): Default input, expects DataFrame. Example: .. literalinclude:: /../../timeflux_mindaffect/examples/openbci.yaml :language: yaml """ def __init__(self, host=None, port=8400, timeout=5000): """ Args: host (str): The Utopia Hub hostname. Leave to `None` for autodiscovery. port (int): The Utopia Hub port. timeout (int): Delay (in ms) after which we stop trying to connect. """ # Connect to the Utopia Hub self._client = UtopiaClient() try: self._client.autoconnect(host, port, timeout_ms=timeout) except: pass if not self._client.isConnected: raise WorkerInterrupt('Could not connect to Utopia hub') # Keep track of the header so it is sent only once self._header = None # Start the sync server self._task = Task(Server(), 'start').start() def update(self): if self.i.ready(): now = getTimeStamp() data = self.i.data.values.tolist() if not self._header: rate = self.i.meta['rate'] channels = self.i.data.shape[1] labels = list(self.i.data.columns) self._header = DataHeader(now, rate, channels, labels) self._client.sendMessage(self._header) self._client.sendMessage(DataPacket(now, data)) def terminate(self): self._client.disconnect() self._task.stop()
def __init__(self, host=None, port=8400, timeout=5000): """ Args: host (str): The Utopia Hub hostname. Leave to `None` for autodiscovery. port (int): The Utopia Hub port. timeout (int): Delay (in ms) after which we stop trying to connect. """ # Connect to the Utopia Hub self._client = UtopiaClient() try: self._client.autoconnect(host, port, timeout_ms=timeout) except: pass if not self._client.isConnected: raise WorkerInterrupt('Could not connect to Utopia hub') # Keep track of the header so it is sent only once self._header = None # Start the sync server self._task = Task(Server(), 'start').start()
def update(self): # Let's get ready self._clear() # Are we dealing with continuous data or epochs? if self._dimensions is None: port_name = "i_training" if self.fit else "i" if getattr(self, port_name).ready(): self._dimensions = 2 elif len(list(self.iterate(port_name + "_*"))) > 0: self._dimensions = 3 # Set the accumulation boundaries if self._accumulation_start is None: matches = match_events(self.i_events, self.event_start_accumulation) if matches is not None: self._accumulation_start = matches.index.values[0] self._status = ACCUMULATING if self._accumulation_stop is None: matches = match_events(self.i_events, self.event_stop_accumulation) if matches is not None: self._accumulation_stop = matches.index.values[0] # Always buffer a few seconds, in case the start event is coming late if self._status == IDLE: start = (now() - self._buffer_size).to_datetime64() stop = max_time() self._accumulate(start, stop) # Accumulate between boundaries if self._status == ACCUMULATING: start = self._accumulation_start stop = self._accumulation_stop if self._accumulation_stop else max_time() self._accumulate(start, stop) # Should we start fitting the model? if self._status < FITTING: if match_events(self.i_events, self.event_start_training) is not None: self._status = FITTING self._task = Task( self._pipeline, "fit", self._X_train, self._y_train ).start() # Is the model ready? if self._status == FITTING: status = self._task.status() if status: if status["success"]: self._pipeline = status["instance"] self._status = READY self.logger.debug(f"Model fitted in {status['time']} seconds") else: self.logger.error( f"An error occured while fitting: {status['exception'].args[0]}" ) self.logger.debug( "\nTraceback (most recent call last):\n" + "".join(status["traceback"]) ) raise WorkerInterrupt() # Run the pipeline if self._status == READY: self._receive() if self._X is not None: args = [self._X] if self.mode.startswith("fit"): args.append(self._y) # TODO: optionally loop through epochs instead of sending them all at once self._out = getattr(self._pipeline, self.mode)(*args) # Set output streams self._send()
class Pipeline(Node): """ Fit, transform and predict. Training on continuous data is always unsupervised. Training on epoched data can either be supervised or unsupervised. If fit is `False`, input events are ignored, and not initital training is performed. Automatically set to False if mode is either 'fit_predict' or fit_transform'. Automatically set to True if mode is either 'predict', 'predict_proba' or 'predict_log_proba'. Attributes: i (Port): Continuous data input, expects DataFrame. i_* (Port): Epoched data input, expects DataFrame. i_training (Port): Continuous training data input, expects DataFrame. i_training_* (Port): Epoched training data input, expects DataFrame. i_events (Port): Event input, expects DataFrame. o (Port): Continuous data output, provides DataFrame. o_* (Port): Epoched data output, provides DataFrame. o_events (Port): Event output, provides DataFrame. Args: steps (dict): Pipeline steps and settings fit (bool): mode ('predict'|'predict_proba'|'predict_log_proba'|'transform'|'fit_predict'|'fit_transform'): meta_label (str|tuple|None): event_start_accumulation (str): event_stop_accumulation (str): event_start_training (str): buffer_size (str): passthrough (bool): resample (bool): resample_direction ('right'|'left'|'both'): resample_rate (None|float): model: Load a pickle model - NOT IMPLEMENTED cv: Cross-validation - NOT IMPLEMENTED """ def __init__( self, steps, fit=True, mode="predict", meta_label=("epoch", "context", "target"), event_start_accumulation="accumulation_starts", event_stop_accumulation="accumulation_stops", event_start_training="training_starts", buffer_size="5s", passthrough=False, resample=False, resample_direction="right", resample_rate=None, model=None, cv=None, ): # TODO: validation # TODO: model loading from file # TODO: cross-validation # TODO: provide more context for errors self.fit = fit self.mode = mode self.meta_label = meta_label self.event_start_accumulation = event_start_accumulation self.event_stop_accumulation = event_stop_accumulation self.event_start_training = event_start_training self.passthrough = passthrough self.resample = resample self.resample_direction = resample_direction self.resample_rate = resample_rate self._buffer_size = pd.Timedelta(buffer_size) self._make_pipeline(steps) self._reset() def update(self): # Let's get ready self._clear() # Are we dealing with continuous data or epochs? if self._dimensions is None: port_name = "i_training" if self.fit else "i" if getattr(self, port_name).ready(): self._dimensions = 2 elif len(list(self.iterate(port_name + "_*"))) > 0: self._dimensions = 3 # Set the accumulation boundaries if self._accumulation_start is None: matches = match_events(self.i_events, self.event_start_accumulation) if matches is not None: self._accumulation_start = matches.index.values[0] self._status = ACCUMULATING if self._accumulation_stop is None: matches = match_events(self.i_events, self.event_stop_accumulation) if matches is not None: self._accumulation_stop = matches.index.values[0] # Always buffer a few seconds, in case the start event is coming late if self._status == IDLE: start = (now() - self._buffer_size).to_datetime64() stop = max_time() self._accumulate(start, stop) # Accumulate between boundaries if self._status == ACCUMULATING: start = self._accumulation_start stop = self._accumulation_stop if self._accumulation_stop else max_time() self._accumulate(start, stop) # Should we start fitting the model? if self._status < FITTING: if match_events(self.i_events, self.event_start_training) is not None: self._status = FITTING self._task = Task( self._pipeline, "fit", self._X_train, self._y_train ).start() # Is the model ready? if self._status == FITTING: status = self._task.status() if status: if status["success"]: self._pipeline = status["instance"] self._status = READY self.logger.debug(f"Model fitted in {status['time']} seconds") else: self.logger.error( f"An error occured while fitting: {status['exception'].args[0]}" ) self.logger.debug( "\nTraceback (most recent call last):\n" + "".join(status["traceback"]) ) raise WorkerInterrupt() # Run the pipeline if self._status == READY: self._receive() if self._X is not None: args = [self._X] if self.mode.startswith("fit"): args.append(self._y) # TODO: optionally loop through epochs instead of sending them all at once self._out = getattr(self._pipeline, self.mode)(*args) # Set output streams self._send() def terminate(self): # Kill the fit subprocess if self._task is not None: self._task.stop() def _reset(self): self._X_train = None self._y_train = None self._X_train_indices = np.array([], dtype=np.datetime64) self._accumulation_start = None self._accumulation_stop = None self._dimensions = None self._shape = () self._task = None if self.mode.startswith("fit"): self.fit = False elif self.mode.startswith("predict"): self.fit = True if self.fit: self._status = IDLE else: self._status = READY def _clear(self): self._X = None self._y = None self._X_indices = [] self._X_columns = [] self._out = None def _make_pipeline(self, steps): schema = { "type": "array", "minItems": 1, "items": { "type": "object", "properties": { "module": {"type": "string"}, "class": {"type": "string"}, "args": {"type": "object"}, }, "required": ["module", "class"], }, } try: validate(instance=steps, schema=schema) except Exception as error: raise ValidationError("steps", error.message) pipeline = [] for step in steps: try: args = step["args"] if "args" in step else {} m = importlib.import_module(step["module"]) c = getattr(m, step["class"]) i = c(**args) pipeline.append(i) except ImportError as error: raise ValidationError("steps", f"could not import '{step['module']}'") except AttributeError as error: raise ValidationError( "steps", f"could not find class '{step['class']}'" ) except TypeError as error: raise ValidationError( "steps", f"could not instantiate class '{step['class']}' with the given params", ) # TODO: memory and verbose args self._pipeline = make_pipeline(*pipeline, memory=None, verbose=False) def _accumulate(self, start, stop): # Do nothing if no fitting required if not self.fit: return # Set defaults indices = np.array([], dtype=np.datetime64) # Accumulate continuous data if self._dimensions == 2: if self.i_training.ready(): data = self.i_training.data mask = (data.index >= start) & (data.index < stop) data = data[mask] if not data.empty: if self._X_train is None: self._X_train = data.values self._shape = self._X_train.shape[1] indices = data.index.values else: if data.shape[1] == self._shape: self._X_train = np.vstack((self._X_train, data.values)) indices = data.index.values else: self.logger.warning("Invalid shape") # Accumulate epoched data if self._dimensions == 3: for _, _, port in self.iterate("i_training_*"): if port.ready(): index = port.data.index.values[0] if index >= start and index < stop: data = port.data.values label = get_meta(port, self.meta_label) if self._shape and (data.shape != self._shape): self.logger.warning("Invalid shape") continue if self.meta_label is not None and label is None: self.logger.warning("Invalid label") continue if self._X_train is None: self._X_train = np.array([data]) self._shape = self._X_train.shape[1:] else: self._X_train = np.vstack((self._X_train, [data])) indices = np.append(indices, index) if label is not None: if self._y_train is None: self._y_train = np.array([label]) else: self._y_train = np.append(self._y_train, [label]) # Store indices if indices.size != 0: self._X_train_indices = np.append(self._X_train_indices, indices) # Trim if self._X_train is not None: mask = (self._X_train_indices >= start) & (self._X_train_indices < stop) self._X_train = self._X_train[mask] self._X_train_indices = self._X_train_indices[mask] if self._y_train is not None: self._y_train = self._y_train[mask] def _receive(self): # Continuous data if self._dimensions == 2: if self.i.ready(): if not self._X_columns: self._X_columns = list(self.i.data.columns) if self._shape and (self.i.data.shape[1] != self._shape): self.logger.warning("Invalid shape") else: self._X = self.i.data.values self._X_indices = self.i.data.index.values # Epochs if self._dimensions == 3: for name, _, port in self.iterate("i_*"): if port.ready() and "training" not in name and "events" not in name: data = port.data.values indices = port.data.index.values label = get_meta(port, self.meta_label) if not self._X_columns: self._X_columns = list(port.data.columns) if self._shape and (data.shape != self._shape): self.logger.warning("Invalid shape") continue if not self.fit and self.meta_label is not None and label is None: self.logger.warning("Invalid label") continue if self._X is None: self._X = [] if self._y is None and label is not None: self._y = [] self._X.append(data) self._X_indices.append(indices) if label is not None: self._y.append(label) def _send(self): # Passthrough if self._status < READY and self.passthrough: inputs = [] for _, suffix, port in self.iterate("i*"): if not suffix.startswith("_training") and not suffix.startswith( "_events" ): inputs.append((suffix, port)) for suffix, src_port in inputs: dst_port = getattr(self, "o" + suffix) dst_port.data = src_port.data dst_port.meta = src_port.meta # Model if self._out is not None: if "predict" in self.mode: # Send events if len(self._X_indices) == len(self._out): # TODO: skip JSON serialization? data = [ [self.mode, json.dumps({"result": self._np_to_native(result)})] for result in self._out ] times = ( self._X_indices if self._dimensions == 2 else np.asarray(self._X_indices)[:, 0] ) # Keep the first timestamp of each epoch names = ["label", "data"] self.o_events.set(data, times, names) else: self.logger.warning( "Number of predictions inconsistent with input length" ) else: # Send data if self._dimensions == 2: try: self.o.data = self._reindex( self._out, self._X_indices, self._X_columns ) except Exception as e: self.logger.warning(getattr(e, "message", repr(e))) if self._dimensions == 3: if len(self._X_indices) == len(self._out): for i, (data, times) in enumerate( zip(self._out, self._X_indices) ): try: getattr(self, "o_" + str(i)).data = self._reindex( data, times, self._X_columns ) except Exception as e: self.logger.warning(getattr(e, "message", repr(e))) else: self.logger.warning( "Number of transforms inconsistent with number of epochs" ) def _np_to_native(self, data): """Convert numpy scalars and objects to native types.""" return getattr(data, "tolist", lambda: data)() def _reindex(self, data, times, columns): if len(data) != len(times): if self.resample: # Resample at a specific frequency kwargs = {"periods": len(data)} if self.resample_rate is None: kwargs["freq"] = pd.infer_freq(times) kwargs["freq"] = pd.tseries.frequencies.to_offset(kwargs["freq"]) else: kwargs["freq"] = pd.DateOffset(seconds=1 / self.resample_rate) if self.resample_direction == "right": kwargs["start"] = times[0] elif self.resample_direction == "left": kwargs["end"] = times[-1] else: def middle(a): return int(np.ceil(len(a) / 2)) - 1 kwargs["start"] = times[middle(times)] - ( middle(data) * kwargs["freq"] ) times = pd.date_range(**kwargs) else: # Linearly arange between first and last times = pd.date_range(start=times[0], end=times[-1], periods=len(data)) return pd.DataFrame(data, times, columns)
def test_exception(working_path): task = Task(DummyWorker(), 'echo', fail=True).start() while not task.done: status = task.status() assert status['success'] == False assert status['exception'].args[0] == 'failed'
def test_kwargs(working_path): task = Task(DummyWorker(), 'echo', message='foobar').start() while not task.done: status = task.status() assert status['result'] == 'foobar'
def test_default(working_path): task = Task(DummyWorker(), 'echo').start() while not task.done: status = task.status() assert status['result'] == 'hello' assert status['instance'].message == 'hello'
def test_stop_not_running(working_path): task = Task(DummyWorker(), 'echo').start() while not task.done: status = task.status() task.stop() assert task.done == True
def test_stop_running(working_path): task = Task(DummyWorker(), 'echo', delay=5).start() sleep(.5) assert task.done == False task.stop() assert task.done == True