def _to_dataset(self, data): """ Converts the data recorded from the BIOSEMI device into a Psychic dataset. """ if data == None or data.size == 0: self.logger.warning( 'Data corrupt: no valid frames found in data packet') return None # Undo byte adding that the biosemi has done data = (data >> 8) # First channel is status channel if self.status_as_markers: Y = (data[:1, :] & 0x00ffff) else: Y = numpy.zeros((1, data.shape[1])) X = data[1:, :] + (2**23) # go from signed to unsigned print self.reference_channels if len(self.reference_channels) > 0: REF = X[self.reference_channels, :] X = X[self.target_channels, :] if len(self.reference_channels) > 0: X = X - numpy.tile(numpy.mean(REF, axis=0), (X.shape[0], 1)) I = self._estimate_timing(X.shape[1]) self.logger.debug('Number of samples parsed: %d' % X.shape[1]) return psychic.DataSet(data=X, labels=Y, ids=I, feat_lab=self.feat_lab)
def _train(self): """ Train the classifier on a dataset. """ d = self.recorder.read(block=False) if not d: raise ClassifierException('First collect some data before training.') # Save a snapshot of the training data to disk d.save('test_data.dat') # Convert markers to classes Y = np.zeros((3, d.ninstances), dtype=np.bool) Y[0,:] = (d.Y == 1)[0,:] Y[1,:] = (d.Y == 2)[0,:] Y[2,:] = (d.Y == 3)[0,:] d = psychic.DataSet(labels=Y, cl_lab=['on','off', 'sweep'], default=d) self._construct_pipeline() # Train the pipeline self.classification.train(d) self.logger.info('Training complete') # Send a debug plot to client self._send_debug_image(d) self.window_node.reset() self.thres_node.reset() self.training_complete = True
def _extract_training_trials(self, d): block_onsets = numpy.flatnonzero(d.Y > 100) num_blocks = len(block_onsets) if not num_blocks: raise ClassifierException( 'No blocks found in recording. Make sure the data is properly labeled.' ) block_lengths = numpy.hstack( (numpy.diff(block_onsets), d.ninstances - block_onsets[-1])) targets = d.Y[0, block_onsets] - 100 options = numpy.unique(targets) num_options = len(options) num_instances = num_blocks * num_options num_channels = d.nfeatures mdict = {} for target in sorted(numpy.unique(targets)): mdict[target] = 'target %02d' % target # Allocate memory for the blocks feat_dim_lab = ['channels', 'samples', 'repetitions'] feat_shape = (num_channels, self.target_window[1] - self.target_window[0], self.num_repetitions) X = numpy.zeros(feat_shape + (num_instances, )) Y = numpy.zeros((2, num_instances)) I = numpy.arange(num_instances) # Extract each block for block_num, block_onset, block_length, target in zip( range(num_blocks), block_onsets, block_lengths, targets): block = d[block_onset:block_onset + block_length + self.target_sample_rate] block = psychic.slice(block, mdict, self.target_window) #block = psychic.baseline(block, (0, 10)) # Extract each option within a block for option_num in range(block.nclasses): if block.get_class( option_num).ndX.shape[2] < self.num_repetitions: self.logger.warning( 'could not extract all repetitions of option %d in block %d' % (option_num + 1, block_num)) continue instance = block_num * num_options + option_num X[:, :, :, instance] = block.get_class( option_num).ndX[:, :, :self.num_repetitions] Y[0, instance] = (option_num == (target - 1)) # target trial Y[1, instance] = (option_num != (target - 1)) # nontarget trial # Build new dataset containing the trials return psychic.DataSet(data=X, labels=Y, ids=I, feat_dim_lab=feat_dim_lab, l_lab=['target', 'nontarget'])
def _record_data(self): """ Either generates some random data or extracts a record from the BDF file. Returns result as a Golem dataset. """ self.begin_read_time = self.end_read_time self.end_read_time = self.begin_read_time + self.buffer_size_seconds time_to_wait = max(0, self.end_read_time - precision_timer()) time.sleep(time_to_wait) # Calculate the number of samples to generate target = int((self.end_read_time - self.T0) * self.sample_rate) nsamples = target - self.nsamples if nsamples <= 0: return None if self.file_input: # Determine number of records to read samples_to_read = nsamples if self.remaining_frames_in_record != None: samples_to_read -= self.remaining_frames_in_record.shape[0] nrecords_to_read = int( numpy.ceil( samples_to_read / float(self.header['record_length'] * self.sample_rate))) X = self.remaining_frames_in_record for i in range(nrecords_to_read): if X == None: X = self.bdf_reader.read_record() else: X = numpy.vstack((X, self.bdf_reader.read_record())) data_mask = [ i for i, lab in enumerate(self.header['label']) if lab != 'Status' ] status_mask = self.header['label'].index('Status') feat_lab = [self.header['label'][i] for i in data_mask] self.remaining_frames_in_record = X[nsamples:, :] Y = X[:nsamples, status_mask].reshape(1, -1).astype( numpy.int) & 0xffff X = X[:nsamples, data_mask].T else: X = numpy.random.random_integers(self.digital_min, self.digital_max, (self.nchannels, nsamples)) feat_lab = self.feat_lab Y = numpy.zeros((1, nsamples)) I = self._estimate_timing(X.shape[1]) d = psychic.DataSet(data=X, labels=Y, ids=I, feat_lab=feat_lab) self.nsamples += d.ninstances return d
def _apply(self, d): """ Applies classifier to a dataset. """ # Perform preprocessing d = self.preprocessing.apply(d) slices = self.slice_node.apply(d) if slices == None: return if self.application_data == None: self.application_data = slices else: self.application_data += slices repetitions_recorded = numpy.min(self.application_data.ninstances_per_class) self.logger.info('Repetitions recorded: %d' % repetitions_recorded) if repetitions_recorded == self.last_repetitions_recorded: return self.last_repetitions_recorded = repetitions_recorded if repetitions_recorded < self.num_repetitions: return # Mangle data into shape: (#channels x #samples x #repetitions x #targets) d = self.application_data ndX = numpy.zeros(d.feat_shape + (repetitions_recorded, d.nclasses,)) Y = numpy.zeros((2,d.nclasses), dtype=numpy.bool) I = numpy.arange(d.nclasses) feat_dim_lab = ['channels', 'samples', 'repetitions'] for cl in range(d.nclasses): ndX[:,:,:,cl] = d.get_class(cl).ndX[:,:,:repetitions_recorded] # Update feature labels feat_lab = list(self.feat_lab) feat_lab[2] = range(repetitions_recorded) d = psychic.DataSet(data=ndX, labels=Y, ids=I, feat_dim_lab=feat_dim_lab, feat_lab=feat_lab, cl_lab=['target', 'nontarget'], default=d) # Perform actual classification try: result = self.classification.apply(d).X winner = numpy.argmax(result[0,:]) self.logger.info('classification result: %d' % winner) self.engine.provide_result([list(result[0,:]), winner+1]) self.application_data = None self.last_repetitions_recorded = 0 self.repetitions_recorded = 0 except ValueError as e: self.logger.error('Classification failed: %s' % e)
def infer_spatial_pattern(data, y=None, roi_time=None, roi_channels=None, method='peak'): """Estimate the spatial pattern of an ERP component from a psychic DataSet. This function uses the metadata present in the psychic DataSet object to provide a convenient interface to the general purpose infer_spatial_pattern function. Parameters ---------- data : psychic.DataSet The trials in the format of a psychic DataSet. y : 1D array (n_trials,) | 2D array (n_trials, 1) | None For each trial, a label indicating to which experimental condition the trial belongs. If None, data.y is used instead. Defaults to None. roi_time : tuple of floats (start, end) | None The start and end time (in seconds, end is exclusive) of the time region of interest. When method='peak', the search for maximum difference is restricted to this time window. When method='mean', the mean signal across this time window is used. If None, the entire time window is used. Defaults to None. roi_channels : list of floats | None When method='peak', restrict the search for maximum difference to the channels with the given indices. When None, do not restrict the search. Defaults to None. method : 'peak' | 'mean' When 'peak', the spatial pattern is the signal at the time of maximum squared difference between the experimental conditions. When 'mean', the spatial pattern is the mean difference waveform between the experimental conditions. Defaults to 'peak'. Returns ------- spat_pat : psychic.DataSet The spatial pattern of the ERP component, stored in a psychic DataSet object. """ if y is None: y = data.y channel_names, time = data.feat_lab[:2] if roi_time is not None: # Translate between seconds and samples roi_time = (np.searchsorted(time, roi_time[0]), np.searchsorted(time, roi_time[1])) if roi_channels is not None: # Translate between channel names and indices roi_channels = [channel_names.index(ch) for ch in roi_channels] spat_pat = template.infer_spatial_pattern( data.data.transpose(2, 0, 1), y, roi_time, roi_channels, method) return psychic.DataSet(spat_pat, ids=channel_names)
def _add_markers(self, d): """ Label the data with markers. """ self.marker_lock.acquire() if self.current_marker.type == 'trigger': Y = numpy.zeros((1, d.ninstances)) else: Y = numpy.repeat([[self.current_marker.code]], d.ninstances, axis=1) future_markers = [] for m in self.markers: # Determine the location of the marker in the datastream y_index = numpy.searchsorted(d.I[0, :], m.timestamp - self.T0) if y_index <= 0: # timestamp lies in the past, oh dear! # mark the first sample, the marker is delayed. self.current_marker = m if m.type == 'trigger': Y[0, 0] = m.code else: Y[0, 0:] = m.code elif y_index >= d.ninstances: # timestamp lies in the future, save for a later time future_markers += [m] else: # timestamp is present in current data segment self.current_marker = m if m.type == 'trigger': Y[0, y_index] = m.code else: Y[0, y_index:] = m.code # Write some debug info self.logger.debug('For marker %s, found y_index of %d, (T0=%f)' % (m, y_index, self.T0)) if y_index < d.ninstances: self.markerlog.write( '%f, %f, %d, %d, %f, %f\n' % (m.timestamp, m.time_received, m.code, y_index, m.timestamp - self.T0, d.I[0, 0])) self.markerlog.flush() self.markers = future_markers self.marker_lock.release() return psychic.DataSet(labels=Y, default=d)
def _to_dataset(self, data): """ Converts the data recorded from the EPOC device into a Psychic dataset. """ if data == None or data.size == 0: self.logger.warning( 'Data corrupt: no valid frames found in data packet') return None X = data Y = numpy.zeros((1, X.shape[1])) I = self._estimate_timing(X.shape[1]) self.logger.debug('Number of samples parsed: %d' % X.shape[1]) return psychic.DataSet(data=X, label=Y, ids=I, feat_lab=self.feat_lab)
def _to_dataset(self, data): """ Converts the data recorded from the EPOC device into a Psychic dataset. """ if data == None or len(data) == 0: return None X = [] for i in range(0, len(data) / self.bytes_per_sample): packet = data[i * self.bytes_per_sample:(i + 1) * self.bytes_per_sample] raw_data = self._cipher.decrypt( packet[:16]) + self._cipher.decrypt(packet[16:]) X.append([ self._get_level(raw_data, bits) for bits in sensorBits.values() ]) X = numpy.array(X).T Y = numpy.zeros((1, X.shape[1])) I = self._estimate_timing(X.shape[1]) self.logger.debug('Number of samples parsed: %d' % X.shape[1]) return psychic.DataSet(data=X, labels=Y, ids=I, feat_lab=self.feat_lab)
def _to_dataset(self, data): """ Converts the data recorded from the BIOSEMI device into a Psychic dataset. """ if data == None or len(data) == 0: return None # Convert data from little-endian 32-bit ints to python integers data = struct.unpack('<%dI' % (len(data) / 4), data) # Prepend data recorded earlier that did not cover complete frames if len(self.unfinished_frames) > 0: data = self.unfinished_frames + data self.unfinished_frames = [] # Check sync byte if data[0] != SYNC_BV: self.logger.warning('sync lost, trying to find it again') for i in range(len(data) - 1): if data[i] == SYNC_BV and data[i + 1] != SYNC_BV: self.logger.warning('signal re-synced') data = data[i:] break else: self.logger.warning( 'unable to re-sync signal, discarding data') return None # Only keep complete frames samples_read = len(data) / self.stride self.unfinished_frames = data[self.stride * samples_read:] data = data[:self.stride * samples_read] frames = numpy.array(data, dtype=numpy.uint32).reshape(-1, self.stride).T # Test if sync markers line up,, if len(numpy.flatnonzero(frames[0, :] != SYNC_BV)) > 0: self.logger.warning('sync lost, discarding data') return None self.battery_low = len( numpy.flatnonzero(frames[1, :] & BATTERY_BV)) > 0 self.cms_in_range = len( numpy.flatnonzero(frames[1, :] & CMS_RANGE_BV) == 0) > 0 # Keep only first 32 channels + 8 external ones frames = frames[range(1, 33) + range(257, 266), :] # Undo byte adding that the biosemi has done frames = (frames >> 8) # First channel is status channel if self.status_as_markers: Y = (frames[:1, :] & 0x00ffff) else: Y = numpy.zeros((1, frames.shape[1])) X = frames[1:, :] + (2**23) # go from signed to unsigned # Calculate reference signal if len(self.reference_channels) > 0: REF = X[self.reference_channels, :] X = X[self.target_channels, :] # Re-reference the signal to the chosen reference if len(self.reference_channels) > 0: X = X - numpy.tile(numpy.mean(REF, axis=0), (X.shape[0], 1)) I = self._estimate_timing(X.shape[1]) self.logger.debug('Number of samples parsed: %d' % X.shape[1]) return psychic.DataSet(data=X, labels=Y, ids=I, feat_lab=self.feat_lab)
def _raw_to_dataset(self, data): """ Decodes a string of raw data read from the IMEC device into a Psychic dataset """ num_bytes = len(data) self.logger.debug('Handling datapacket of length %d' % num_bytes) if num_bytes < self.bytes_per_frame: self.logger.warning('Data incomplete: read at least %d bytes before' ' calling this function' % self.bytes_per_frame) return [None, bytes('')] #data = struct.unpack('%dB' % num_bytes, data_string) # Construct dataset from the raw data samples = [] first_frame = True i = 0 while i <= num_bytes-self.bytes_per_frame: # Search for the next frame. This frame should be right next to the # frame we just parsed. But sometimes, the device inserts some # bogus values between frames, which we need to skip frame_found = False frame_index = i for j in range(i, num_bytes-self.bytes_per_frame+1): # Data should begin with sync byte ('S' == 0x53) if data[j] != 0x53: continue # Next frame should also begin with sync byte if (j < num_bytes-2*self.bytes_per_frame and data[j+self.bytes_per_frame] != 0x53): continue # Battery level should be between 120 and 165 if data[j+2] < 120 or data[j+2] > 165: continue frame_found = True frame_index = j break if frame_index - i > 0: self.logger.debug('garbage bytes: %d' % (frame_index - i)) i = frame_index if not frame_found: # Done with this data packet break if first_frame: self.logger.debug('First frame found on index %d, seq number ' '%d' % (i, data[i+1])) first_frame = False frame = self._decode_frame(data[i:i+self.bytes_per_frame]) # Determine number of dropped frames. Note that if more than 255 # frames are dropped, this does not work. if self.last_frame == None: dropped_frames = 0 elif frame.seq > self.last_frame.seq: dropped_frames = (frame.seq-1) - self.last_frame.seq elif frame.seq < self.last_frame.seq: dropped_frames = (frame.seq+255) - self.last_frame.seq else: self.logger.warning('Data corrupt: duplicate frame number in ' 'data packet (%d = %d), i was %d' % (self.last_frame.seq, frame.seq, i)) # don't use this frame i += self.bytes_per_frame # fix dropped frames when we restore accurate sequence numbers dropped_frames = 0 continue if dropped_frames > 0: self.logger.warning('Dropped %d frames' % dropped_frames) self.droppedframeslog.write('%f, %f, %d\n' % (precision_timer(), self.last_id, dropped_frames)) self.droppedframeslog.flush() # Interpolate the dropped frames if possible for j in range(1, dropped_frames+1): if self.last_frame != None: inter = ( self.last_frame.X[:,1] + j * (frame.X[:,0]-self.last_frame.X[:,1]) / float(dropped_frames+1) ) samples.append(numpy.vstack((inter, inter)).T) else: samples.append(numpy.vstack((frame.X[:,0], frame.X[:,0])).T) # Append the current frame samples.append(frame.X) self.last_frame = frame i += self.bytes_per_frame if len(samples) == 0: return (None, data) X = numpy.hstack(samples)[self.target_channels,:] Y = numpy.zeros([1, X.shape[1]]) I = self._estimate_timing(X.shape[1]) self.logger.debug('Number of bytes parsed: %d' % i) d = psychic.DataSet(data=X, labels=Y, ids=I, feat_lab=self.feat_lab) return (d, data[i:])
def _generate_debug_image(self, d): """ Generate image describing the training data. """ d = psychic.DataSet(cl_lab=self.cl_lab, default=d) fig = psychic.plot_erp(d, enforce_equal_n=False) fig.set_size_inches(7, 11) return fig
def infer_temporal_pattern(data, spat_bf, y=None, refine=None, refine_params=None): """Estimate temporal pattern of an ERP component from a psychic DataSet. This function uses the metadata present in the psychic DataSet object to provide a convenient interface to the general purpose infer_temporal_pattern function. Parameters ---------- data : psychic.DataSet The trials in the format of a psychic DataSet. spat_bf : instance of LCMV The spatial beamformer that will be used to extract the ERP timecourse. y : 1D array (n_trials,) | 2D array (n_trials, 1) | None For each trial, a label indicating to which experimental condition the trial belongs. If None, data.y is used instead. Defaults to None. refine : 'zero' | 'peak-mean' | 'thres' | 'guass' | None The method used to refine the template: 'zero': Zero out everything outside the time region of interest. 'peak-mean': Find the peak inside the time region of interest. Then, find the points before and after the peak, where the signal drops below the average signal outside the time region of interest. Zero out everything outside those points. 'thres': As well as zero-ing out everything outside the time region of interest, also zero out any part of the signal which amplitude is below 4 standard deviations of the signal amplitude during the baseline period. 'gauss': Multiply the signal with a Gaussian kernel that is defined over time. Defaults to None, which means no refining of the template is performed. refine_params : dict | None Parameters for the chosen refining method. Each method uses different parameters taken from this dictionary. Possible parameters are: Used by 'zero', 'peak-mean' and 'thres': roi_time : tuple of floats The start and end time (in seconds, end is exclusive) of the time region of interest. Used by 'gauss': mu : float Time at which to center the Gaussian kernel in seconds. sigma : float Standard deviation (in seconds) of the Gaussian kernel. Returns ------- temp_pat : psychic.DataSet The temporal pattern of the ERP component, stored in a psychic DataSet object. """ if y is None: y = data.y channel_names, time = data.feat_lab[:2] if refine_params is None: refine_params = dict() # Translate between seconds and samples if 'roi_time' in refine_params: roi_time = refine_params['roi_time'] roi_time = (np.searchsorted(time, roi_time[0]), np.searchsorted(time, roi_time[1])) refine_params['roi_time'] = roi_time if 'mu' in refine_params: refine_params['mu'] = np.searchsorted(time, refine_params['mu']) if 'sigma' in refine_params: refine_params['sigma'] = np.searchsorted(time, refine_params['sigma']) baseline_time = (0, np.searchsorted(time, 0)) temp_pat = template.infer_temporal_pattern( data.data.transpose(2, 0, 1), y, spat_bf, baseline_time, refine, refine_params) return psychic.DataSet(temp_pat, ids=time)
def run(self): """ Don't call this directly. Use start() and start_capture() to start reading data from the device. """ self.running = True # We keep track of the last 10 seconds of estimations of the sample rate # of the device in a circular buffer. self.estimated_sample_rates = collections.deque( maxlen=numpy.ceil(10 / float(self.buffer_size_seconds))) # We keep track of a 'drift table'. The first row contains the # timestamps of the incomining data. The second row contains the number # of samples read so far. The true samplerate of the device can be # estimated by performing a linear regression on the number of samples # read versus the timestamps. npoints = numpy.ceil(60 / float(self.buffer_size_seconds)) self._drift_table = [ collections.deque(maxlen=npoints), collections.deque(maxlen=npoints) ] # Open BDF file output if self.bdf_file != None: self.bdf_writer = psychic.BDFWriter(self.bdf_file, self.sample_rate, self.nchannels) self._set_bdf_values() self.bdf_writer.write_header() self.file_output = True else: self.file_output = False # Perform initialization of the device driver, which returns the # timestamp of the first data packet self.T0 = self._open() self.last_id = 0 while self.running: try: # Record some data d = self._record_data() # Check whether the decoding of the data succeeded if d == None: continue # Add markers to the data d = self._add_markers(d) # Write data without gain factor to file if self.file_output: self.bdf_writer.write_raw(d) # Check whether calibration period is complete if precision_timer() > self.T0 + self.calibration_time: self.calibrated_event.set() if self.capture_data: # Apply gain factor to data, producing values that # correspond to actual voltage d = psychic.DataSet(d.data * self.gain + self.physical_min, default=d) # Append the data to the buffer and notify # any listeners (usually the classifier) self.data_condition.acquire() if self.data == None: self.data = d else: self.data += d self.data_condition.notify() self.data_condition.release() except IOError, e: self.logger.error('I/O Error: %s' % e) raise