def transform(ds, name=None, ops=[]): """ Transform feature data according to parameters. Transformation involves normalization, smoothing, and/or transformation. Parameters ---------- data : ndarray, shape=(n_epochs,) Feature data. ops : list Ordered list of transformation operations to perform on `data`. Each operation is represented by a dictionary of the function followed by applicable arguments. Returns ------- transformed : ndarray, shape=(n_epochs,) Data after transformation. """ data = ds.data for op in ops: func, args, kwargs = None, [], {} if 'FUNCTION' in op.keys(): func = op['FUNCTION'] if 'ARGS' in op.keys(): args = op['ARGS'] if 'KWARGS' in op.keys(): kwargs = op['KWARGS'] if func is not None: data = func(data, *args, **kwargs) return Dataset(name=name, features=ds.features, data=data)
def _create_ds(self, features, overlap=False): if self.verbose > 1 : print("Creating Dataset") if len(features) == 0 : return Dataset() ds = features.pop(0) for f in features: ds.concatenate(f, overlap=overlap) return ds
def spectral_entropy(ds, name=None): """ Process the epoched power spectral density and compute spectral entropy. Parameters ---------- f : ndarray List of arrays of frequencies for each epoch. Pxx : ndarray List of power spectral densities of `data` at each epoch. Returns ------- entropies : ndarray, shape=(n_epochs,) List of entropies at each epoch. """ f, Pxx = ds.features, ds.data entropies = [] for e in range(min([len(f), len(Pxx)])): entropies.append(s.spectral_entropy(f[e], Pxx[e])) entropies = np.array(entropies).reshape(-1, 1) return Dataset(name=name, features=["ENTROPY"], data=entropies)
def percentile_mean(dataobject, epoch_size, name=None, k=50): """ Process the data in `dataobject`, divide the data into epochs of `epoch_size`, and compute mean of data above the `k`-th percentile. Parameters ---------- dataobject : DataObject DataObject for processing. epoch_size : int Number of seconds in each epoch. k : float Percentile threshold. Returns ------- ds = Dataset Dataset with percentile mean at each epoch. Feature is named "<k> PERCENTILE MEAN" """ percentile_mean = [] epoch_len, n_epochs = epochify(dataobject.data, dataobject.resolution, epoch_size) for i in range(n_epochs): epoch = get_epoch(dataobject.data, i, epoch_len) percentile_mean.append(s.percentile_mean(epoch, k)) percentile_mean = np.array(percentile_mean).reshape(-1, 1) return Dataset(name=name, features=[str(k) + " PERCENTILE MEAN"], data=percentile_mean)
def ratio(ds, name=None, ratios={}): """ Process the epoched power spectral density and compute ratio between two bands. Parameters ---------- Returns ------- ratios : ndarray, shape=(n_epochs,) List of ratios at each epoch. """ data = ds.data features = ds.features.tolist() results = [] ratio_names = list(ratios.keys()) for ratio in ratio_names: num = ratios[ratio][0] den = ratios[ratio][1] numerator = data[:, features.index(num)].flatten() denom = data[:, features.index(den)].flatten() results.append(np.divide(numerator, denom)) results = np.array(results).T return Dataset(name=name, features=ratio_names, data=results)
def rms(dataobject, epoch_size, name=None): """ Process the data in `dataobject`, divide the data into epochs of `epoch_size`, and compute the root-mean-square at each epoch. Parameters ---------- dataobject : DataObject DataObject for processing. epoch_size : int Number of seconds in each epoch. Returns ------- ds = Dataset Dataset with root-mean-square at each epoch. Feature is named "RMS" """ rms = [] epoch_len, n_epochs = epochify(dataobject.data, dataobject.resolution, epoch_size) for i in range(n_epochs): epoch = get_epoch(dataobject.data, i, epoch_len) rms.append(s.rms(epoch)) rms = np.array(rms).reshape(-1,1) return Dataset(name=name, features=["RMS"], data=rms)
def fft(dataobject, epoch_size, name=None, nperseg_factor=1, noverlap_factor=0.1, detrend='constant'): """ Process the data in `dataobject`, divide the data into epochs of `epoch_size`, and compute the Fast Fourier Transform (FFT). Parameters ---------- dataobject : DataObject DataObject for processing. epoch_size : int Number of seconds in each epoch. nperseg_factor : float (0, inf) Multiplied by data resolution to calculate the NPERSEG value. noverlap_factor : float (0, inf) Multiplied by NPERSEG to calculate the NOVERLAP value. detrend : str or func or False, default='constant' Method to detrend each segment. Returns ------- ds = Dataset Dataset with frequencies as features and power spectral densities at each epoch. """ f, Pxx = [], [] epoch_len, n_epochs = epochify(dataobject.data, dataobject.resolution, epoch_size) for i in range(n_epochs): epoch = get_epoch(dataobject.data, i, epoch_len) fs = 1 / dataobject.resolution nperseg = fs * nperseg_factor noverlap = nperseg * noverlap_factor fi, Pxxi = s.welch(epoch, fs=fs, nperseg=nperseg, noverlap=noverlap, detrend=detrend) f.append(fi) Pxx.append(Pxxi) return Dataset(name=name, features=f, data=Pxx)
def classify(): global model, verbose print("-" * 30) print("Sleep Ensemble Classification") print("Current model:", model.name) print("-" * 30) print("Select files to classify") print("This model is configured to accept", model.reader.standard) filepaths = ask_filenames(filetypes=model.reader.filetypes) if len(filepaths) == 0: print("You didn't select any files! Returning you to the main menu") return print("Select where classifications should go:") destination = ask_directory() if destination is None or not os.path.isdir(destination): print( "You didn't select a destination! Returning you to the main menu") return save_input = yes_no_loop( "Do you also want to export processed data alongside classifications?") print("Identified", len(filepaths), "files to classify") if verbose == 1: jobs = trange(len(filepaths)) else: jobs = range(len(filepaths)) for i in jobs: if verbose > 1: print("Classifying", filepaths[i]) data, _ = model.read(filepaths[i], labels=False) name = Path(filepaths[i]).stem ds = model.process(data, None, name) p = model.predict(ds.data) result = Dataset(label_names=['PREDICTION'], labels=p.reshape(-1, 1)) if save_input: result = ds.concatenate(result) result.name = name + "-predictions" if verbose > 1: print("Writing results") result.write(destination) print("Completed classification jobs!")
def merge(ds, name=None, feature='MERGE', method='mean', axis=1): """ Merge features into a single feature by the given method. Parameters ---------- features : list List of 1-D feature ndarrays. method : {'mean', 'median', 'max', 'min', 'sum'}, default='mean' Method to compute the summary statistic of the band. Returns ------- feature : ndarray Merged feature data. """ data = ds.data data = aggregate(data, method, axis=axis) data = data.reshape(-1, 1) return Dataset(name=name, features=[feature], data=data)
def process(self, data, labels, name): eeg, neck, mass = data if self.verbose > 0 : print("Extracting Features") eeg, neck, mass = self._detrend(eeg, neck, mass) eeg_fft = self._fft(self.params['EEG_FFT'], eeg)[0] neck_fft, mass_fft = self._fft(self.params['EMG_FFT'], neck, mass) eeg_rms, neck_rms, mass_rms = self._rms(eeg, neck, mass) neck_prct, mass_prct = self._prct(neck, mass) neck_twitch, mass_twitch = self._ep_var(neck, mass) eeg_entropy, neck_entropy, mass_entropy = self._entropy(eeg_fft, neck_fft, mass_fft) if self.verbose > 1 : print("Calculating Frequency Bands") eeg_bands = sec.spectral_band(eeg_fft, bands=self.params['BANDS'], merge=self.params['BAND_MERGE']) if self.verbose > 1 : print("Calculating Frequency Ratios") ratios = sec.ratio(eeg_bands, ratios=self.params['RATIOS']) if self.verbose > 1 : print("Merging") emg_rms = self._merge(mass_rms, neck_rms, feature='EMG RMS', method=self.params['EMG_RMS_MERGE']) emg_prct = self._merge(mass_prct, neck_prct, feature='EMG PRCT', method=self.params['EMG_PRCT_MERGE']) emg_twitch = self._merge(mass_twitch, neck_twitch, feature='EMG TWITCH', method=self.params['EMG_TWITCH_MERGE']) emg_entropy = self._merge(mass_entropy, neck_entropy, feature='EMG ENTROPY', method=self.params['EMG_ENTROPY_MERGE']) features = [eeg_bands, ratios, eeg_rms, emg_rms, emg_prct, eeg_entropy, emg_entropy, emg_twitch] ds = self._create_ds(features, overlap=False) if self.verbose > 1 : print("Transforming Data") ds = sec.transform(ds, ops=self.params['TRANSFORM']) ds.data -= np.mean(ds.data, axis=0) if labels is not None: if self.verbose > 1 : print("Adding Labels") labels = Dataset(label_names=['LABELS'], labels=labels.data[:-1].reshape(-1,1)) ds = ds.concatenate(labels) ds.name = name ds.clean() return ds
def spectral_band(ds, name=None, bands=None, merge='sum'): """ Process the epoched power spectral density and compute power at a specific frequency band. Parameters ---------- f : ndarray List of arrays of frequencies for each epoch. Pxx : ndarray List of power spectral densities of `data` at each epoch. params : dict Dictionary of all band parameters: - BANDS : Dictionary of tuple intervals for bands. - BAND_MERGE : Method to merge power spectral densities within bands. Select from {'mean', 'max', 'min', 'sum'}. Returns ------- """ f, Pxx = ds.features, ds.data if bands is None: bands = {'ALL': (np.min(f), np.max(f))} names = list(bands.keys()) data = [] for e in range(min([len(f), len(Pxx)])): band = s.compute_bands(f[e], Pxx[e], bands, merge) band = [band[k] for k in names] data.append(band) data = np.array(data) return Dataset(name=name, features=names, data=data)
def epoched_variance(dataobject, epoch_size, name=None, sub_epoch_size=10, threshold='median', merge='sum'): """ Process the data in `dataobject`, divide the data into epochs of `epoch_size`, and compute mean of data above the `k`-th percentile. Parameters ---------- dataobject : DataObject DataObject for processing. epoch_size : float Number of seconds in each epoch. sub_epoch_size : int Number of sub-epochs in an epoch. threshold : {'mean', 'median', '%<k>', 'float'} Threshold to count as 'activity'. Must be one of: - 'mean' : Mean of sub-epochs. - 'median' : Median of sub-epochs. - '%<k>' : <k>-th percentile of sub-epochs. <k> is a non-negative integer. - float : Constant threshold. merge : {'mean', 'median', 'max', 'min', 'sum', 'count'} Determines merge method. Must be one of: - 'mean' : Mean of variances above threshold. - 'median' : Median of variances above threshold. - 'max' : Max of variances above threshold. - 'min' : Min of variances above threshold. - 'sum' : Sum of variances above threshold. - 'count' : Count of variances above threshold. Returns ------- ds = Dataset Dataset with epoched variance at each epoch. Feature is named "EPOCHED VARIANCE" """ activities = [] epoch_len, n_epochs = epochify(dataobject.data, dataobject.resolution, epoch_size) for i in range(n_epochs): epoch = get_epoch(dataobject.data, i, epoch_len) subepoch_var = [] subepoch_len = epoch_len // sub_epoch_size for j in range(sub_epoch_size): subepoch = epoch[j * subepoch_len:(j + 1) * subepoch_len] subepoch_var.append(np.var(subepoch)) activities.append(subepoch_var) activities = np.array(activities) shape = activities.shape threshold = determine_threshold(activities.flatten(), threshold) activities = list(activities.reshape(shape)) for i in range(n_epochs): above = np.where(activities[i] >= threshold) activities[i] = aggregate(activities[i][above], merge) activities = np.array(activities).reshape(-1, 1) ds = Dataset(name=name, features=["EPOCHED VARIANCE"], data=activities) return ds
def validate(): global model, verbose print("-" * 30) print("Sleep Ensemble Validation") print("Current model:", model.name) print("-" * 30) print("Select files to use for validation:") print("This model is configured to accept", model.reader.standard) filepaths = ask_filenames(filetypes=model.reader.filetypes) if len(filepaths) == 0: print("You didn't select any files! Returning you to the main menu") return save_input = yes_no_loop("Do you want to export the results?") if save_input: print("Select where results should go:") destination = ask_directory() if destination is None or not os.path.isdir(destination): print( "You didn't select a destination! Returning you to the main menu" ) return print("Identified", len(filepaths), "files for validation") req_train = yes_no_loop( "Do you wish to train and validate via cross-validation (y) or just validate (n)?" ) ds, data, labels = [], [], [] if verbose == 1: jobs = trange(len(filepaths)) else: jobs = range(len(filepaths)) for i in jobs: if verbose > 1: print("Reading", filepaths[i]) d, l = model.read(filepaths[i], labels=True) name = Path(filepaths[i]).stem ds_ = model.process(d, l, name) ds.append(ds_) data.append(ds_.data) labels.append(ds_.labels) if req_train: p, Y_hat = model.cross_validate(data, labels) else: p, Y_hat = model.predict(data) if p[0].ndim != 1: p_ = [] start = 0 for i in range(len(data)): p_.append(p[start:start + len(data[i])]) start = start + len(data[i]) p = p_ score = model.score(p, labels) print("Overall Score:", score) p_overall = np.concatenate(p) labels_overall = np.concatenate(labels) print_report(p_overall.reshape(-1), labels_overall.reshape(-1)) if save_input: if verbose > 1: print("Writing results") for i in range(len(p)): r = np.concatenate((Y_hat[i], p[i].reshape(-1, 1)), axis=1) result = Dataset(label_names=['AW', 'QW', 'NR', 'R', 'P'], labels=r) result = ds[i].concatenate(result) result.name = ds[i].name + "-validated-" + "%.4f" % score[i] result.write(destination) print("Completed validation")