def test_accumulate_invalid_shape_2D(caplog): stream = DummyData(start_date=now(), num_cols=5) node = Pipeline(steps=dummy_classifier) node.i_training.data = DummyData(start_date=now(), num_cols=5).next(10) node.update() node.i_training.data = DummyData(start_date=now(), num_cols=6).next(10) node.update() assert caplog.record_tuples[0][2] == 'Invalid shape' assert len(node._X_train_indices) == len(node._X_train)
def test_receive_3D_invalid_shape(caplog): node = Pipeline(steps=dummy_transformer, fit=True, mode='transform', meta_label=None) node.i_training_0.data = DummyData(start_date=now()).next(5) node.update() assert node._shape == (5, 5) node._status = 3 node.i_0.data = DummyData(start_date=now()).next(3) node.update() assert caplog.record_tuples[0][2] == 'Invalid shape' assert node._X == None
def test_receive_2D_invalid_shape(caplog): node = Pipeline(steps=dummy_transformer, fit=True, mode='transform') node.i_training.data = DummyData(start_date=now(), num_cols=5).next() node.update() assert node._shape == 5 node._status = 3 node.i.data = DummyData(start_date=now(), num_cols=3).next() node.update() assert caplog.record_tuples[0][2] == 'Invalid shape' assert node._X == None
def test_accumulate_invalid_shape_3D(caplog): node = Pipeline(steps=dummy_classifier, meta_label=None) start_0 = now() start_1 = now() + pd.Timedelta('1s') start_2 = now() + pd.Timedelta('2s') node.i_training_0.data = DummyData(start_date=start_0, num_cols=5).next(10) node.i_training_1.data = DummyData(start_date=start_1, num_cols=6).next(10) node.i_training_2.data = DummyData(start_date=start_2, num_cols=5).next(20) node.update() assert caplog.record_tuples[0][2] == caplog.record_tuples[1][2] == 'Invalid shape'
def test_idle_buffer_3D(random): node = Pipeline(steps=dummy_classifier, buffer_size='5s', meta_label=None) start_0 = now() - pd.Timedelta('10s') start_1 = now() start_2 = now() + pd.Timedelta('10s') node.i_training_0.data = DummyData(start_date=start_0).next(10) node.i_training_1.data = DummyData(start_date=start_1).next(10) node.i_training_2.data = DummyData(start_date=start_2).next(10) node.update() assert len(node._X_train_indices) == 2 assert len(node._X_train_indices) == len(node._X_train) assert node._X_train.shape == (2, 10, 5)
def _handler(self, address, *args): time = now() port = self._address_to_port(address) values = list(args) with self._lock: self._data[port]["rows"].append(values) self._data[port]["timestamps"].append(time)
def update(self): if self._current > self._stop: raise WorkerInterrupt('No more data.') min = self._current if self._timespan: max = min + self._timespan else: now = clock.now() ellapsed = now - self._last max = min + ellapsed * self._speed self._last = now for key, source in self._sources.items(): # Select data data = self._store.select(key, 'index >= min & index < max') # Add offset if self._resync: data.index += self._offset # Update port getattr(self, source['name']).data = data getattr(self, source['name']).meta = source['meta'] self._current = max
def __init__(self, delta, tol, reset=None): super().__init__() self._delta = delta # Peak threshold self._tol = tol # Tolerence for peak matching, in seconds. This can be seen as the minimum time difference self._reset_states() self._last = pd.to_datetime(now()) # Last timestamp self._reset = reset
def test_accumulate_start_2D(random): node = Pipeline(steps=dummy_classifier, buffer_size='5s') start = now() node.i_events.set([['accumulation_starts', '']], [start], ['label', 'data']) stream = DummyData(start_date=start, rate=1, jitter=0) node.i_training.data = stream.next(100) node.update() assert len(node._X_train) == 100
def test_predict_2D_output(random): classifier = [{'module': 'test_ml', 'class': 'DummyClassifierUnsupervised'}] node = Pipeline(steps=classifier, mode='fit_predict', meta_label=None) stream = DummyData(start_date=now()) node.i.data = stream.next(5) node.i.meta = {'foo': 'bar'} node.update() assert len(node.o_events.data) == 5 assert node.o_events.meta == node.i.meta
def test_transform_2D_output(random): node = Pipeline(steps=dummy_transformer, mode='fit_transform') columns = ['A', 'B', 'C', 'D', 'E'] node.i.data = DummyData(start_date=now()).next() node.i.meta = {'foo': 'bar'} node.i.data.columns = columns node.update() assert np.array_equal(node.i.data.index.values, node.o.data.index.values) assert list(node.o.data.columns) == columns assert node.o.meta == node.i.meta
def test_idle_buffer_2D(random): start = now() - pd.Timedelta('10s') stream = DummyData(start_date=start, rate=1, jitter=0) node = Pipeline(steps=dummy_classifier, buffer_size='5s') node.i_training.data = stream.next(10) node.update() assert len(node._X_train_indices) == 4 node.i_training.data = stream.next(10) node.update() assert len(node._X_train_indices) == 14 assert len(node._X_train_indices) == len(node._X_train)
def update(self): timestamp = now() float = time_to_float(timestamp) if self._start is None: self._start = float values = [ self._amplitude * np.sin(2 * np.pi * self._rate * (float - self._start)) ] self.o.set(values, names=[self._name]) self.o.meta = {"rate": Registry.rate}
def test_accumulate_y_train(caplog): node = Pipeline(steps=dummy_classifier) stream = DummyData(start_date=now()) node.i_training_0.data = stream.next() node.i_training_1.data = stream.next() node.i_training_2.data = stream.next() node.i_training_0.meta = { 'epoch': { 'context': { 'target': True }}} node.i_training_1.meta = {} node.i_training_2.meta = { 'epoch': { 'context': { 'target': False }}} node.update() assert node._y_train.tolist() == [True, False] assert caplog.record_tuples[0][2] =='Invalid label'
def update(self): # copy the meta self.o.meta = self.i.meta # When we have not received data, there is nothing to do if not self.i.ready(): return # # At this point, we are sure that we have some data to process if self.i.data.shape[1] != 1: self.logger.warning( f'Peak detection expects data with one column, received ' f'{self.i.data.shape[1]}. Considering the first one. ') self.i.data = self.i.data.take([0], axis=1) self._last = self.i.data.index[-1] if not self._ready: self._column = self.i.data.columns[0] # if self._last_peak is None: self._last_peak = self._last_valley = self.i.data.index[0] self._values_buffer += [0] * 2 * self._n self._timestamps_buffer += [self.i.data.index[0]] * 2 * self._n self._ready = True self.o.meta = {'column_name': self._column} self.o.data = pd.DataFrame() for (value, timestamp) in zip(self.i.data.values, self.i.data.index): # Peak detection detected = self._on_sample(value=value, timestamp=timestamp) if detected: self.o.data = self.o.data.append( pd.DataFrame(index=[detected[0]], data=np.array([[detected[1]], [{ 'value': detected[2], 'lag': detected[3], 'interval': detected[4], 'column_name': self._column, 'detection_time': str(self._last), 'now': str(now()), 'extremum_time': str(detected[0]) }]]).T, columns=['label', 'data'])) self.o.meta = {"column_name": self._column}
def test_accumulate_start_stop_2D(random): node = Pipeline(steps=dummy_classifier, buffer_size='5s') start = now() events = [ ['accumulation_starts', ''], ['accumulation_stops', ''] ] times = pd.date_range(start=start, periods=2, freq='10s') node.i_events.set(events, times, ['label', 'data']) stream = DummyData(start_date=start, rate=1, jitter=0) node.i_training.data = stream.next(100) node.update() assert len(node._X_train) == 10
def update(self): # copy the meta self.o.meta = self.i.meta # When we have not received data, there is nothing to do if self.i.data is None or self.i.data.empty: return if self.i.data.shape[1] != 1: self.logger.warning( f'Peak detection expects data with one column, received ' f'{self.i.data.shape[1]}. Considering the first one. ') self.i.data = self.i.data.take([0], axis=1) column_name = self.i.data.columns[0] # At this point, we are sure that we have some data to process self.o.data = pd.DataFrame() for (value, timestamp) in zip(self.i.data.values, self.i.data.index): if self._reset is not None: if (self._last - timestamp).total_seconds() > self._reset: self._reset_states() self._last = timestamp # Peak detection detected = self._on_sample(value=value, timestamp=timestamp) # Append event if detected: self.o.data = self.o.data.append( pd.DataFrame( index=[self.i.data.index[-1]], # detected[0] data=np.array([[detected[1]], [{ 'value': detected[2][0], 'lag': detected[3], 'interval': detected[4], 'column_name': column_name, 'detection_time': str(self.i.data.index[-1]), 'now': str(now()), 'extremum_time': str(detected[0]) }]]).T, columns=['label', 'data'])) self.o.meta = {"column_name": column_name}
def make_event(label, data={}): """ Create an event DataFrame Args: label (str): The event label. data (dict): The optional data dictionary. Returns: Dataframe """ return pd.DataFrame( [[label, json.dumps(data)]], index=[now()], columns=["label", "data"] )
def test_predict_3D_output(): node = Pipeline(steps=dummy_classifier, mode='predict', meta_label='target') stream = DummyData(start_date=now()) node.i_training_0.data = stream.next(5) node.i_training_1.data = stream.next(5) node.i_training_0.meta = { 'target': 0 } node.i_training_1.meta = { 'target': 1 } node.i_events.data = make_event('training_starts') while node._status != 3: node.update() node.i_0.data = stream.next(5) node.i_1.data = stream.next(5) node.i_0.meta = {'index': 0} node.i_1.meta = {'index': 1} node.update() assert len(node.o_events.data) == 2 assert node.o_events.meta == {'epochs': [{'index': 0}, {'index': 1}]}
def test_predict(): # classifier = [ # {'module': 'test_node_ml', 'class': 'Flattener'}, # {'module': 'sklearn.dummy', 'class': 'DummyClassifier', 'args': {'strategy': 'most_frequent'}} # ] node = Pipeline(steps=dummy_classifier, mode='predict', meta_label='target') node.i_training_0.set([-1], [now()], meta={ 'target': 0 }) node.i_training_1.set([1], [now()], meta={ 'target': 1 }) node.i_training_2.set([1], [now()], meta={ 'target': 1 }) node.i_training_3.set([1], [now()], meta={ 'target': 1 }) node.i_events.data = make_event('training_starts') while node._status != 3: node.update() node.i_0.set([-1], [now()]) node.i_1.set([1], [now()]) node.i_2.set([1], [now()]) node.i_3.set([1], [now()]) node.update() assert list(node._out) == [1, 1, 1, 1]
def __init__(self): self.device = None # Setup try: # On Unix systems, we need to manually set the product and vendor IDs ftd.setVIDPID(VID, PID) except AttributeError: # The method is not available on Windows pass # Connect try: # Open the first FTDI device self.device = ftd.open(0) # Get info self.logger.info(self.device.getDeviceInfo()) except ftd.ftd2xx.DeviceError: # Could not open device raise WorkerInterrupt('Could not open device') # Initialize connection if self.device: self.device.setBaudRate(921600) self.device.setFlowControl(ftd.defines.FLOW_NONE, 0, 0) self.device.setDataCharacteristics(ftd.defines.BITS_8, ftd.defines.STOP_BITS_1, ftd.defines.PARITY_NONE) self.device.setTimeouts(2000, 2000) self.device.setLatencyTimer(2) self.device.setUSBParameters(BUFFER_SIZE, BUFFER_SIZE) # Start acquisition self.packet_count = 0 self.time_delta = { '1024Hz': np.timedelta64(int(1e9 / 1024), 'ns'), '256Hz': np.timedelta64(int(1e9 / 256), 'ns'), } self.start() self.time_start = now()
def test_transform_3D_output(random): pipeline = [ {'module': 'test_ml', 'class': 'Vectorizer'}, {'module': 'test_ml', 'class': 'DummyTransformer'}, {'module': 'test_ml', 'class': 'Shaper', 'args': { 'shape': (2, -1, 5) }} ] node = Pipeline(steps=pipeline, mode='fit_transform', meta_label=None) columns = ['A', 'B', 'C', 'D', 'E'] stream = DummyData(start_date=now()) node.i_0.data = stream.next() node.i_1.data = stream.next() node.i_0.data.columns = columns node.i_1.data.columns = columns node.i_0.meta = {'index': 0} node.i_1.meta = {'index': 1} node.update() assert len(list(node.iterate('o_*'))) == 2 assert np.array_equal(node.i_0.data.index.values, node.o_0.data.index.values) assert list(node.i_0.data.columns) == columns assert list(node.i_1.data.columns) == columns assert node.o_0.meta == node.i_0.meta assert node.o_1.meta == node.i_1.meta
def update(self): # Let's get ready self._clear() # Are we dealing with continuous data or epochs? if self._dimensions is None: port_name = "i_training" if self.fit else "i" if getattr(self, port_name).ready(): self._dimensions = 2 elif len(list(self.iterate(port_name + "_*"))) > 0: self._dimensions = 3 # Set the accumulation boundaries if self._accumulation_start is None: matches = match_events(self.i_events, self.event_start_accumulation) if matches is not None: self._accumulation_start = matches.index.values[0] self._status = ACCUMULATING if self._accumulation_stop is None: matches = match_events(self.i_events, self.event_stop_accumulation) if matches is not None: self._accumulation_stop = matches.index.values[0] # Always buffer a few seconds, in case the start event is coming late if self._status == IDLE: start = (now() - self._buffer_size).to_datetime64() stop = max_time() self._accumulate(start, stop) # Accumulate between boundaries if self._status == ACCUMULATING: start = self._accumulation_start stop = self._accumulation_stop if self._accumulation_stop else max_time() self._accumulate(start, stop) # Should we start fitting the model? if self._status < FITTING: if match_events(self.i_events, self.event_start_training) is not None: self._status = FITTING self._task = Task( self._pipeline, "fit", self._X_train, self._y_train ).start() # Is the model ready? if self._status == FITTING: status = self._task.status() if status: if status["success"]: self._pipeline = status["instance"] self._status = READY self.logger.debug(f"Model fitted in {status['time']} seconds") else: self.logger.error( f"An error occured while fitting: {status['exception'].args[0]}" ) self.logger.debug( "\nTraceback (most recent call last):\n" + "".join(status["traceback"]) ) raise WorkerInterrupt() # Run the pipeline if self._status == READY: self._receive() if self._X is not None: args = [self._X] if self.mode.startswith("fit"): args.append(self._y) # TODO: optionally loop through epochs instead of sending them all at once self._out = getattr(self._pipeline, self.mode)(*args) # Set output streams self._send()
def __init__(self, filename, keys, speed=1, timespan=None, resync=True): """ Initialize. Parameters ---------- filename : string The path to the HDF5 file. keys: list The list of keys to replay. speed: float The speed at which the data must be replayed. 1 means real-time. timespan: float The timespan of each chunk, in seconds. If not None, will take precedence over the `speed` parameter resync: boolean If False, timestamps will not be resync'ed to current time """ # Load store try: self._store = pd.HDFStore(filename, mode='r') except IOError as e: raise WorkerInterrupt(e) # Init self._sources = {} self._start = pd.Timestamp.max self._stop = pd.Timestamp.min self._speed = speed self._timespan = None if not timespan else pd.Timedelta(f'{timespan}s') self._resync = resync for key in keys: try: # Check format if not self._store.get_storer(key).is_table: self.logger.warning('%s: Fixed format. Will be skipped.', key) continue # Get first index first = self._store.select(key, start=0, stop=1).index[0] # Get last index nrows = self._store.get_storer(key).nrows last = self._store.select(key, start=nrows - 1, stop=nrows).index[0] # Check index type if type(first) != pd.Timestamp: self.logger.warning('%s: Invalid index. Will be skipped.', key) continue # Find lowest and highest indices across stores if first < self._start: self._start = first if last > self._stop: self._stop = last # Extract meta if self._store.get_node(key)._v_attrs.__contains__('meta'): meta = self._store.get_node(key)._v_attrs['meta'] else: meta = {} # Set output port name, port will be created dynamically name = 'o' + key.replace('/', '_') # Update sources self._sources[key] = { 'start': first, 'stop': last, 'nrows': nrows, 'name': name, 'meta': meta } except KeyError: self.logger.warning('%s: Key not found.', key) # Current time now = clock.now() # Time offset self._offset = pd.Timestamp(now) - self._start # Current query time self._current = self._start # Last update self._last = now
def __init__(self, filename, keys, timespan=.04, resync=True): """ Initialize. Parameters ---------- filename : string The path to the HDF5 file. keys: list The list of keys to replay. timespan: float The timespan of each chunk, in seconds. resync: boolean If False, timestamps will not be resync'ed to current time """ # Load store try: self._store = pd.HDFStore(filename, mode='r') except IOError as e: raise WorkerInterrupt(e) # Init self._sources = {} self._start = pd.Timestamp.max self._stop = pd.Timestamp.min self._timespan = pd.Timedelta(f'{timespan}s') self._resync = resync for key in keys: try: # Check format if not self._store.get_storer(key).is_table: self.logger.warning('%s: Fixed format. Will be skipped.', key) continue # Get first index first = self._store.select(key, start=0, stop=1).index[0] # Get last index nrows = self._store.get_storer(key).nrows last = self._store.select(key, start=nrows - 1, stop=nrows).index[0] # Check index type if type(first) != pd.Timestamp: self.logger.warning('%s: Invalid index. Will be skipped.', key) continue # Find lowest and highest indices across stores if first < self._start: self._start = first if last > self._stop: self._stop = last # Set output port name, port will be created dynamically name = 'o' + key.replace('/', '_') # Update sources self._sources[key] = { 'start': first, 'stop': last, 'nrows': nrows, 'name': name } except KeyError: self.logger.warning('%s: Key not found.', key) # Time offset self._offset = pd.Timestamp(clock.now()) - self._start # Current query time self._current = self._start
def __init__(self, filename, keys, speed=1, timespan=None, resync=True, start=0): """ Initialize. Parameters ---------- filename : string The path to the HDF5 file. keys: list The list of keys to replay. speed: float The speed at which the data must be replayed. 1 means real-time. Default: 1 timespan: float The timespan of each chunk, in seconds. If not None, will take precedence over the `speed` parameter Default: None resync: boolean If False, timestamps will not be resync'ed to current time Default: True start: float Start directly at the given time offset, in seconds Default: 0 """ # Load store try: self._store = pd.HDFStore(self._find_path(filename), mode="r") except IOError as e: raise WorkerInterrupt(e) # Init self._sources = {} self._start = pd.Timestamp.max self._stop = pd.Timestamp.min self._speed = speed self._timespan = None if not timespan else pd.Timedelta(f"{timespan}s") self._resync = resync for key in keys: try: # Check format if not self._store.get_storer(key).is_table: self.logger.warning("%s: Fixed format. Will be skipped.", key) continue # Get first index first = self._store.select(key, start=0, stop=1).index[0] # Get last index nrows = self._store.get_storer(key).nrows last = self._store.select(key, start=nrows - 1, stop=nrows).index[0] # Check index type if type(first) != pd.Timestamp: self.logger.warning("%s: Invalid index. Will be skipped.", key) continue # Find lowest and highest indices across stores if first < self._start: self._start = first if last > self._stop: self._stop = last # Extract meta if self._store.get_node(key)._v_attrs.__contains__("meta"): meta = self._store.get_node(key)._v_attrs["meta"] else: meta = {} # Set output port name, port will be created dynamically name = "o" + key.replace("/", "_") # Update sources self._sources[key] = { "start": first, "stop": last, "nrows": nrows, "name": name, "meta": meta, } except KeyError: self.logger.warning("%s: Key not found.", key) # Current time now = clock.now() # Starting timestamp self._start += pd.Timedelta(f"{start}s") # Time offset self._offset = pd.Timestamp(now) - self._start # Current query time self._current = self._start # Last update self._last = now