def _load_data(self): logging.info("[+] loading training data") with utils.open(self.config.training_data_input) as file: user_ch = [line.split('\t') for line in file] self.user_ch = { int(line[0]): np.array(line[1].split(' '), dtype=np.int32) for line in user_ch } training_ch = [v for k, v in self.user_ch.items() if k % 10 != 0] validation_ch = [v for k, v in self.user_ch.items() if k % 10 == 0] self.training_pairs = np.vstack( utils.rolling_window(line, self.config.window_size + 1) for line in training_ch if len(line > self.config.window_size + 1)) self.validation_pairs = np.vstack( utils.rolling_window(line, self.config.window_size + 1) for line in validation_ch if len(line > self.config.window_size + 1)) self.training_pairs_count = len(self.training_pairs) self.validation_pairs_count = len(self.validation_pairs) self.training_step = np.ceil(self.training_pairs_count / self.config.batch_size) self.validation_step = np.ceil(self.validation_pairs_count / self.config.batch_size) logging.info("[-] loaded {}+{} pairs".format( self.training_pairs_count, self.validation_pairs_count))
def get_spline_slopes(x, y, kind="linear", num_bins=20, slope_x_start=None, slope_x_end=None): """ Fit spline to get slopes. """ if slope_x_start is None: slope_x_start = x.min() if slope_x_end is None: slope_x_end = x.max() fit = interp1d(x, y, kind=kind) x_coarse = np.linspace(x.min(), x.max(), num_bins) # find the maximum slope # use only specified range for slopes inds = (x_coarse >= slope_x_start) & (x_coarse <= slope_x_end) x_pairs = utils.rolling_window(x_coarse[inds], 2) y_coarse = fit(x_coarse) slopes = map(lambda pair: pair[-1] - pair[0], utils.rolling_window(y_coarse[inds], 2)) slopes = np.array(slopes) results = {"x_coarse": x_coarse, "y_coarse": y_coarse, "y_slopes": slopes, "x_pairs": x_pairs, "fit": fit} return results
def get_temp_from_voltage(self, window=None): """ Callibrates the voltage with the base temperature and puts the new temperature estimates to self.T1, self.T2 """ assert self.voltage self.T1_old = self.T1 self.T2_old = self.T2 linear = lambda x, a, b: a * x + b # To do: add the possibility to callibrate using a specific period if window is None: popt1, _ = curve_fit(linear, self.V1, self.T_base) popt2, _ = curve_fit(linear, self.V2, self.T_base) else: popt1, _ = curve_fit( linear, utils.rolling_window(self.V1, window).mean(1)[::window], utils.rolling_window(self.T_base, window).mean(1)[::window]) popt2, _ = curve_fit( linear, utils.rolling_window(self.V2, window).mean(1)[::window], utils.rolling_window(self.T_base, window).mean(1)[::window]) self.T1 = linear(self.V1, *popt1) self.T2 = linear(self.V2, *popt2)
def main(args): exp = args.model_num name = args.name loss = True if args.mode == 'loss' else False plot_dir = './plots/' logs_dir = './logs/' logs_dir += 'exp_{}/'.format(exp) plot_dir += 'exp_{}/'.format(exp) if loss: filename = logs_dir + 'train.csv' else: filename = logs_dir + 'valid.csv' df = pd.read_csv(filename, header=None, names=['iter', 'metric']) metric = df['metric'].data fig, ax = plt.subplots(figsize=(15, 8)) rolling_mean = np.mean(rolling_window(metric, 50), 1) rolling_std = np.std(rolling_window(metric, 50), 1) plt.plot(range(len(rolling_mean)), rolling_mean, alpha=0.98, linewidth=0.9) plt.fill_between(range(len(rolling_std)), rolling_mean - rolling_std, rolling_mean + rolling_std, alpha=0.5) title = 'Train Loss' if loss else 'Valid Acc' xtitle = 'Loss' if loss else 'Acc' plt.title(title) plt.xlabel('Iteration') plt.ylabel(xtitle) plt.grid() plt.tight_layout() plt.savefig(plot_dir + name, format='png', dpi=300)
def get_angle_deviations(self, window): assert hasattr(self, 'theta') baseline_theta = utils.rolling_window(self.theta, window).mean(1) baseline_phi = utils.rolling_window(self.phi, window).mean(1) self.theta_dev = self.theta[window // 2:-window // 2 + 1] - baseline_theta self.phi_dev = self.phi[window // 2:-window // 2 + 1] - baseline_phi
def get_temp_deviations(self, window): baseline1 = utils.rolling_window(self.T1, window).mean(1) baseline2 = utils.rolling_window(self.T2, window).mean(1) self.T1_dev = self.T1[window // 2:-window // 2 + 1] - baseline1 self.T2_dev = self.T2[window // 2:-window // 2 + 1] - baseline2 self.DT = self.T1 - self.T2 baselineD = utils.rolling_window(self.DT, window).mean(1) self.DT_dev = self.DT[window // 2:-window // 2 + 1] - baselineD
def mean_filter(data, window): pad = window // 2 rolled = rolling_window(data, window).mean(1) rolled = np.hstack((np.ones((pad,)) * rolled[:pad].mean(), rolled, np.ones((pad,)) * rolled[-pad:].mean())) return rolled
def process(self, method, window): """ Converts the temperature data to values proportional to the likelihood of being jumps. Args: method: function array => array, algorithm to be used window: int, size of the rolling window Returns: array """ if self.baseline is None: raise AttributeError( "You must run preprocessing first. " "If you don't know what to use, just use an identity filter") assert window % 2 pad = window // 2 strided_data = utils.rolling_window(self.filtered, window) self.processed = np.hstack( (np.zeros(pad), method(strided_data), np.zeros(pad)))
def windowing_data(input_data, seizure_start_time_offsets, seizure_lengths, load_Core): S, T = input_data.shape # feature_extraction.graphL_core.num_nodes = S sampling_freq = load_Core.sampling_freq if (load_Core.num_windows is not None): win_len_sec = T / (sampling_freq * load_Core.num_windows) stride_sec = win_len_sec else: win_len_sec = 2.5 stride_sec = 1.5 seizure_start_time_offsets *= sampling_freq seizure_lengths *= sampling_freq win_len = int(np.ceil(win_len_sec * sampling_freq)) stride = int(np.ceil(stride_sec * sampling_freq)) X = rolling_window( input_data, win_len, stride ) # np.swapaxes(, 0, 1) # np.lib.stride_tricks.as_strided(input_data, strides = st, shape = (input_data.shape[1] - w + 1, w))[0::o] if (load_Core.down_sampl_ratio is not None): X = DownSampler(load_Core).apply(X) X, conv_sizes = data_convert(X, load_Core) # X intervals_with_stride = np.arange( 0, T - win_len, stride ) # rolling_window(np.arange(T)[np.newaxis,:], win_len, stride) # num_windows = len(intervals_with_stride) print(' win_len_sec: %f , num_windows: %d' % (win_len_sec, num_windows)) y = np.zeros((num_windows, )) flag_ictal = False y_detection = 1 if load_Core.detection_flag else 0 def state_gen(y, win_ind): if (len(load_Core.state_win_lengths) < 1): return y state_counter = 2 if load_Core.detection_flag else 1 num_winds = list( np.ceil(np.array(load_Core.state_win_lengths) / win_len_sec).astype(np.int)) end_ind = win_ind for le in num_winds: start_ind = np.max((0, end_ind - le)) y[start_ind:end_ind] = state_counter state_counter += 1 end_ind = start_ind if (end_ind <= 0): break return y if (seizure_start_time_offsets >= 0 and seizure_lengths >= 0): for win_ind in range(num_windows): w = intervals_with_stride[win_ind] if ((seizure_start_time_offsets < w + win_len) and (seizure_start_time_offsets + seizure_lengths > w)): y[win_ind] = y_detection if (not flag_ictal): flag_ictal = True y = state_gen(y, win_ind) dim = X.shape[2] return X, y, S, dim, conv_sizes
def loadexpt(cellidx, filename, method, history, fraction=1., mean_adapt=False, roll=True): """ Loads an experiment from disk Parameters ---------- cellidx : int Index of the cell to load filename : string Name of the hdf5 file to load method : string The key in the hdf5 file to load ('train' or 'test') history : int Number of samples of history to include in the toeplitz stimulus fraction : float, optional Fraction of the experiment to load, must be between 0 and 1. (Default: 1.0) """ assert fraction > 0 and fraction <= 1, "Fraction of data to load must be between 0 and 1" # currently only works with the Oct. 07, 15 experiment expt = '15-10-07' with notify('Loading {}ing data'.format(method)): # load the hdf5 file f = h5py.File(os.path.join(datadirs[os.uname()[1]], expt, filename + '.h5'), 'r') # length of the experiment expt_length = f[method]['time'].size num_samples = int(np.floor(expt_length * fraction)) # load the stimulus stim = zscore(np.array(f[method]['stimulus'][:num_samples]).astype('float32')) # photoreceptor model of mean adaptation if mean_adapt: stim = pr_filter(10e-3, stim) # reshaped stimulus (nsamples, time/channel, space, space) if roll: stim_reshaped = np.rollaxis(np.rollaxis(rolling_window(stim, history, axis=0), 2), 3, 1) else: stim_reshaped = stim # get the response for this cell resp = np.array(f[method]['response/firing_rate_10ms'][cellidx, history:num_samples]) return Batch(stim_reshaped, resp)
def timestep_slice_data(data, slice_size=10, rescale=True): # Load inputs and outputs labels = data['stages'][:, 2] pows = data['pows'] if rescale: scaler = StandardScaler() scaler.fit(pows) pows = scaler.transform(pows) pows = pows.swapaxes(0, 1) # timeslice labels [ N,slice_size ] seq_labels = rolling_window(labels, slice_size) # timeslicing pows is awkward... seq_pows = rolling_window(pows, slice_size) seq_pows = seq_pows.swapaxes(0, 1) seq_pows = seq_pows.swapaxes(1, 2) return seq_pows, seq_labels
def gen_feat(filelist, sample_data=True): filter_sizes = [int(x) for x in FLAGS.filter_sizes.split(',')] with tf.device('/cpu:0'): with tf.Session( config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) as sess: model = UnsupSeech(window_length=FLAGS.window_length, output_length=FLAGS.output_length, filter_sizes=filter_sizes, num_filters=FLAGS.num_filters, fc_size=FLAGS.fc_size, dropout_keep_prob=1.0, train_files=filelist, create_new_train_dir=False, is_training=False, decoder_layers=FLAGS.decoder_layers) if FLAGS.train_dir != "": ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) if ckpt and ckpt.model_checkpoint_path: print("Reading model parameters from %s" % ckpt.model_checkpoint_path) model.saver.restore(sess, ckpt.model_checkpoint_path) # model is now loaded with the trained parameters for myfile in filelist: input_signal = training_data[myfile][20 * 16000:] if FLAGS.show_feat: feat = model.gen_feat_batch( sess, utils.rolling_window(input_signal, FLAGS.window_length, 180)[:500]) pyplot.imshow(feat.T) pyplot.show() print feat pre_sig_length = 1600 #1450 gen_signal = input_signal[:pre_sig_length] print 'Generating signal...' for i in xrange(FLAGS.gen_steps): next_signal = model.generate_signal( sess, gen_signal[-FLAGS.window_length:], temperature=FLAGS.temp) #model.gen_next_batch(sess, [gen_signal[-FLAGS.window_length:]]) #input_signal = input_signal[FLAGS.output_length:] + next_signal[0] if i % 100 == 0: print next_signal[0] print(gen_signal.shape) gen_signal = np.append(gen_signal, next_signal) utils.writeSignal(gen_signal, 'gentest.wav') print 'done!' else: print("Could not open training dir: %s" % FLAGS.train_dir) else: print("Train_dir parameter is empty")
def apply(self, X_raw, load_core): win_length = int( np.ceil(load_core.welchs_win_len * load_core.sampling_freq)) X = rolling_window( X_raw, win_length, int(np.ceil(load_core.welchs_stride * load_core.sampling_freq))) X = np.swapaxes(np.swapaxes(X, 0, 1), 1, 2) f_signal = rfft(X) W = fftfreq(f_signal.shape[-1], d=1 / load_core.sampling_freq) # f_signal = np.swapaxes(f_signal, 2, 3) # if(load_core.down_sampl_ratio is not None): # f_signal = DownSampler(load_core).apply(f_signal) conv_sizes = [] all_sizess = np.zeros_like(W) for i in np.arange(len(load_core.freq_bands)): if (i > 0): lowcut = load_core.freq_bands[i - 1] else: lowcut = load_core.initial_freq_band highcut = load_core.freq_bands[i] sizess = np.where(W < highcut, np.ones_like(W), np.zeros_like(W)) sizess = np.where(W < lowcut, np.zeros_like(W), sizess) all_sizess += sizess conv_sizes.append(int(np.sum(sizess) * f_signal.shape[-2])) in_FFT_W = f_signal[..., np.squeeze(np.argwhere(all_sizess == 1))] FFT_W = np.reshape(in_FFT_W, (in_FFT_W.shape[0], in_FFT_W.shape[1], in_FFT_W.shape[2] * in_FFT_W.shape[3])) # W = np.tile(W,np.hstack((f_signal.shape[:-1],1))) # FFT_W = None # for i in np.arange(len(load_core.freq_bands)): # if(i>0): # lowcut = load_core.freq_bands[i-1] # else: # lowcut = load_core.initial_freq_band # highcut = load_core.freq_bands[i] # # butter_bandpass_filter(data, lowcut, highcut, fs) # cut_f_signal = f_signal.copy() # # cut_f_signal = np.where(W<highcut, cut_f_signal,0 ) # # cut_f_signal = np.where(W>=lowcut, cut_f_signal,0 ) # cut_f_signal[ W >= highcut] = 0 # np.abs(W) # cut_f_signal[ W < lowcut] = 0 # cut_f_signal = np.reshape(cut_f_signal, np.hstack((cut_f_signal.shape[:-2],np.multiply(cut_f_signal.shape[-2],cut_f_signal.shape[-1]))))# check again if correct ????????????? # if(load_core.down_sampl_ratio is not None): # cut_f_signal = DownSampler(load_core).apply(cut_f_signal) # if(FFT_W is None): # FFT_W = cut_f_signal # else: # FFT_W = np.concatenate((FFT_W,cut_f_signal), axis=-1) # conv_sizes.append(cut_f_signal.shape[-1]) return FFT_W, np.array(conv_sizes)
def smear_labels(a: np.ndarray, size: int) -> np.ndarray: """ Gives positive labels to points within $size points of actual positive labels. Args: a: np.array, labels array size: int, how far away the labels are smeared """ pad = size # Amount of zero's to add on both sides a_padded = np.concatenate((np.zeros(pad), a, np.zeros(pad))) rolled = utils.rolling_window(a_padded, size * 2 + 1) return rolled.max(1).astype(a.dtype)
def slices(self, X): """ Given an input X with dimention N, return a ndarray of dimention 3 with all the instances values for each window. For example, if the input has shape (10, 400), and the stride_ratio is 0.25, then this will generate 301 windows with shape (10, 100). The final result would have a shape of (301, 10, 100). """ self.logger.debug('Slicing X with shape {}'.format(X.shape)) sample_shape = list(X[0].shape) window_shape = np.maximum( np.array([s * self.stride_ratio for s in sample_shape]), 1).astype(np.int16) self.logger.debug('Got window shape: {}'.format(window_shape.shape)) # # Calculate the windows that are going to be used and the total # number of new generated samples. # windows_count = [ sample_shape[i] - window_shape[i] + 1 for i in range(len(sample_shape)) ] new_instances_total = np.prod(windows_count) self.logger.debug('Slicing {} windows.'.format(windows_count)) # # For each sample, get all the windows with their values # sliced_X = np.array([rolling_window(x, window_shape) for x in X]) # # Swap the 0 and 1 axis so as to get for each window, the value of each sample. # sliced_X = np.swapaxes(sliced_X, 0, 1) if len(sliced_X.shape) > 3: shape = list(sliced_X.shape) sliced_X = sliced_X.reshape(shape[:2] + [np.prod(shape[2:])]) self.logger.info( 'Scanning turned X ({}) into sliced_X ({}). {} new instances were added ' 'per sample'.format(X.shape, sliced_X.shape, new_instances_total)) return sliced_X
def get_full_field_flicker(period=1, low_contrast=0.1, high_contrast=1.0): sample_rate = 100 flicker_sequence = np.hstack([ low_contrast * np.random.randn(period * sample_rate), high_contrast * np.random.randn(period * sample_rate), low_contrast * np.random.randn(period * sample_rate) ]) # Convert flicker sequence into full field movie full_field_flicker = np.outer(flicker_sequence, np.ones((1, 50, 50))) full_field_flicker = full_field_flicker.reshape( (flicker_sequence.shape[0], 50, 50)) # Convert movie to 400ms long samples in the correct format for our model full_field_movies = rolling_window(full_field_flicker, 40) full_field_movies = np.rollaxis(full_field_movies, 2) full_field_movies = np.rollaxis(full_field_movies, 3, 1) return full_field_movies
def get_full_field_flicker(period=1, low_contrast=0.1, high_contrast=1.0): sample_rate = 100 flicker_sequence = np.hstack( [ low_contrast * np.random.randn(period * sample_rate), high_contrast * np.random.randn(period * sample_rate), low_contrast * np.random.randn(period * sample_rate), ] ) # Convert flicker sequence into full field movie full_field_flicker = np.outer(flicker_sequence, np.ones((1, 50, 50))) full_field_flicker = full_field_flicker.reshape((flicker_sequence.shape[0], 50, 50)) # Convert movie to 400ms long samples in the correct format for our model full_field_movies = rolling_window(full_field_flicker, 40) full_field_movies = np.rollaxis(full_field_movies, 2) full_field_movies = np.rollaxis(full_field_movies, 3, 1) return full_field_movies
def calc_event_data(etdata, evt, w = {255:1, 0: 1, 1: 50, 2: 1, 3: 1, 4: 1, 5: 1, 6: 1, 'vel': 18, 'etdq': 200}, ): """Calculates event parameters. Parameters: etdata -- an instance of ETData evt -- compact event vector w -- dictionary of context to take into account for each event type; in ms Returns: posx_s -- onset position, horizontal posx_e -- offset position, horizontal posy_s -- onset position, vertical posy_e -- offset position, vertical posx_mean -- mean postion, horizontal posy_mean -- mean postion, vertical posx_med -- median postion, horizontal posy_med -- median postion, vertical pv -- peak velocity pv_index -- index for peak velocity rms -- precision, 2D rms std -- precision, 2D std """ #init params data = etdata.data fs = etdata.fs e = {k:v for k, v in zip(['s', 'e', 'evt'], evt)} ws = w[e['evt']] ws = 1 if not(ws > 1) else round_up_to_odd(ws/1000.0*fs, min_val=3) ws_vel = round_up_to_odd(w['vel']/1000.0*fs, min_val=3) w_etdq = int(w['etdq']/1000.*fs) #calculate velocity using Savitzky-Golay filter vel = np.hypot(sg.savgol_filter(data['x'], ws_vel, 2, 1), sg.savgol_filter(data['y'], ws_vel, 2, 1))*fs ind_s = e['s']+ws ind_s = ind_s if ind_s < e['e'] else e['e'] ind_e = e['e']-ws ind_e = ind_e if ind_e > e['s'] else e['s'] posx_s = np.nanmean(data[e['s']:ind_s]['x']) posy_s = np.nanmean(data[e['s']:ind_s]['y']) posx_e = np.nanmean(data[ind_e:e['e']]['x']) posy_e = np.nanmean(data[ind_e:e['e']]['y']) posx_mean = np.nanmean(data[e['s']:e['e']]['x']) posy_mean = np.nanmean(data[e['s']:e['e']]['y']) posx_med = np.nanmedian(data[e['s']:e['e']]['x']) posy_med = np.nanmedian(data[e['s']:e['e']]['y']) pv = np.max(vel[e['s']:e['e']]) pv_index = e['s']+ np.argmax(vel[e['s']:e['e']]) if e['e']-e['s']>w_etdq: x_ = rolling_window(data[e['s']:e['e']]['x'], w_etdq) y_ = rolling_window(data[e['s']:e['e']]['y'], w_etdq) std = np.median(np.hypot(np.std(x_, axis=1), np.std(y_, axis=1))) rms = np.median(np.hypot(np.sqrt(np.mean(np.diff(x_)**2, axis=1)), np.sqrt(np.mean(np.diff(y_)**2, axis=1)))) else: std = 0 rms = 0 return posx_s, posx_e, posy_s, posy_e, posx_mean, posy_mean, posx_med, posy_med, pv, pv_index, rms, std
def plot_results(self, window=200, agent_subset=None, std=True): if not agent_subset: agent_subset = self.agents.keys() series_to_plot = { 'cumulative rewards': { agent_name: self.rewards_per_episode[agent_name] for agent_name in agent_subset }, 'best score': { agent_name: self.best_score_per_episode[agent_name] for agent_name in agent_subset }, 'time steps': { agent_name: self.time_steps_per_episode[agent_name] for agent_name in agent_subset } } agents_to_plot = { agent_name: self.agents[agent_name] for agent_name in agent_subset } loss_per_agents = { 'critic_loss': { agent_name: (np.array(agent.critic.loss_history) if 'critic' in agent.__dict__.keys() else np.array([])) for agent_name, agent in agents_to_plot.items() }, 'actor_loss': { agent_name: (np.array(agent.actor.loss_history) if 'actor' in agent.__dict__.keys() else np.array([])) for agent_name, agent in agents_to_plot.items() } } series_to_plot.update(loss_per_agents) fig, axs = plt.subplots(len(series_to_plot), 1, figsize=(10, 20), facecolor='w', edgecolor='k') axs = axs.ravel() for idx, (series_name, dict_series) in enumerate(series_to_plot.items()): for jdx, (agent_name, series) in enumerate(dict_series.items()): if series.size == 0: axs[idx].plot([0.0], [0.0], label=agent_name) continue cm_idx = jdx % self.max_diff_colors # jdx // self.num_lines_style * float(self.num_lines_style) / self.max_diff_colors (upward) ls_idx = min( jdx // self.max_diff_colors, self.num_lines_style) # jdx % self.num_lines_style series_mvg = ut.rolling_window(series, window=window) series_mvg_avg = np.mean(series_mvg, axis=1) lines = axs[idx].plot(range(len(series_mvg_avg)), series_mvg_avg, label=agent_name) lines[0].set_color(self.cm(cm_idx)) lines[0].set_linestyle(self.line_styles[ls_idx]) if std: series_mvg_std = np.std(series_mvg, axis=1) area = axs[idx].fill_between( range(len(series_mvg_avg)), series_mvg_avg - series_mvg_std, series_mvg_avg + series_mvg_std, alpha=0.15) area.set_color(self.cm(cm_idx)) area.set_linestyle(self.line_styles[ls_idx]) box = axs[idx].get_position() axs[idx].set_position( [box.x0, box.y0, box.width * 0.8, box.height]) axs[idx].set_title(f"{series_name} per episode", fontsize=15) axs[idx].set_ylabel(f"avg {series_name}", fontsize=10) axs[idx].set_xlabel(f"episodes", fontsize=10) axs[idx].legend(loc='center left', bbox_to_anchor=(1, 0.5)) fig.tight_layout()
def extractFeatures(etdata, **kwargs): '''Extracts features for IRF ''' #get parameters data = etdata.data w, w_vel, w_dir = kwargs['w'], kwargs['w_vel'], kwargs['w_dir'] tic = time.time() #find sampling rate fs = etdata.fs #window size for spatial measures in samples ws = round_up_to_odd(w/1000.0*fs+1) #window size in samples for velocity calculation ws_vel = round_up_to_odd(w_vel/1000.0*fs) #window size in samples for direction calculation ws_dir = round_up_to_odd(w_dir/1000.0*fs) maskInterp = np.zeros(len(data), dtype=np.bool) '''Legacy code. Interpolates through missing points. if kwargs.has_key('interp') and kwargs['interp']: r = np.arange(len(data)) _mask = np.isnan(data['x']) | np.isnan(data['y']) fx = interp.PchipInterpolator(r[~_mask], data[~_mask]['x'], extrapolate=True) fy = interp.PchipInterpolator(r[~_mask], data[~_mask]['y'], extrapolate=True) data['x'][_mask]=fx(r[_mask]) data['y'][_mask]=fy(r[_mask]) maskInterp = _mask ''' #prepare data for vectorized processing ws_pad=(max((ws, ws_vel, ws_dir))-1)/2 x_padded = np.pad(data['x'], (ws_pad, ws_pad), 'constant', constant_values=np.nan) y_padded = np.pad(data['y'], (ws_pad, ws_pad), 'constant', constant_values=np.nan) ws_dir_pad=(ws_dir-1)/2 x_padded_dir=np.pad(data['x'], (ws_dir_pad, ws_dir_pad), 'constant', constant_values=np.nan) y_padded_dir=np.pad(data['y'], (ws_dir_pad, ws_dir_pad), 'constant', constant_values=np.nan) x_windowed = rolling_window(x_padded, ws) y_windowed = rolling_window(y_padded, ws) dx_windowed = rolling_window(np.diff(x_padded), ws-1) dy_windowed = rolling_window(np.diff(y_padded), ws-1) x_windowed_dir = rolling_window(np.diff(x_padded_dir), ws_dir-1) y_windowed_dir = rolling_window(np.diff(y_padded_dir), ws_dir-1) #%%Extract features features=dict() #sampling rate features['fs'] = np.ones(len(data))*fs for d, dd in zip(['x', 'y'], [x_windowed, y_windowed]): #difference between positions of preceding and succeding windows, #aka tobii feature, together with data quality features and its variants means=np.nanmean(dd, axis = 1) meds=np.nanmedian(dd, axis = 1) features['mean-diff-%s'%d] = np.roll(means, -(ws-1)/2) - \ np.roll(means, (ws-1)/2) features['med-diff-%s'%d] = np.roll(meds, -(ws-1)/2) - \ np.roll(meds, (ws-1)/2) #standard deviation features['std-%s'%d] = np.nanstd(dd, axis=1) features['std-next-%s'%d] = np.roll(features['std-%s'%d], -(ws-1)/2) features['std-prev-%s'%d] = np.roll(features['std-%s'%d], (ws-1)/2) features['mean-diff']= np.hypot(features['mean-diff-x'], features['mean-diff-y']) features['med-diff']= np.hypot(features['med-diff-x'], features['med-diff-y']) features['std'] = np.hypot(features['std-x'], features['std-y']) features['std-diff'] = np.hypot(features['std-next-x'], features['std-next-y']) - \ np.hypot(features['std-prev-x'], features['std-prev-y']) #BCEA P = 0.68 #cumulative probability of area under the multivariate normal k = np.log(1/(1-P)) #rho = [np.corrcoef(px, py)[0,1] for px, py in zip(x_windowed, y_windowed)] rho = vcorrcoef(x_windowed, y_windowed) features['bcea'] = 2 * k * np.pi * \ features['std-x'] * features['std-y'] * \ np.sqrt(1-np.power(rho,2)) features['bcea-diff'] = np.roll(features['bcea'], -(ws-1)/2) - \ np.roll(features['bcea'], (ws-1)/2) #RMS features['rms'] = np.hypot(np.sqrt(np.mean(np.square(dx_windowed), axis=1)), np.sqrt(np.mean(np.square(dy_windowed), axis=1))) features['rms-diff'] = np.roll(features['rms'], -(ws-1)/2) - \ np.roll(features['rms'], (ws-1)/2) #disp, aka idt feature x_range = np.nanmax(x_windowed, axis=1) - np.nanmin(x_windowed, axis=1) y_range = np.nanmax(y_windowed, axis=1) - np.nanmin(y_windowed, axis=1) features['disp'] = x_range + y_range #velocity and acceleration features['vel']=np.hypot(sg.savgol_filter(data['x'], ws_vel, 2, 1), sg.savgol_filter(data['y'], ws_vel, 2, 1))*fs features['acc']=np.hypot(sg.savgol_filter(data['x'], ws_vel, 2, 2), sg.savgol_filter(data['y'], ws_vel, 2, 2))*fs**2 #rayleightest angl = np.arctan2(y_windowed_dir, x_windowed_dir) features['rayleightest'] = ast.rayleightest(angl, axis=1) #i2mc if kwargs.has_key('i2mc') and kwargs['i2mc'] is not None: features['i2mc'] = kwargs['i2mc']['finalweights'].flatten() else: features['i2mc'] = np.zeros(len(data)) #remove padding and nans mask_nans = np.any([np.isnan(values) for key, values\ in features.iteritems()], axis=0) mask_pad = np.zeros_like(data['x'], dtype=np.bool) mask_pad[:ws_pad] = True mask_pad[-ws_pad:] = True mask = mask_nans | mask_pad | maskInterp features={key: values[~mask].astype(np.float32) for key, values \ in features.iteritems()} dtype = np.dtype(zip(features.keys(), itertools.repeat(np.float32))) features = np.core.records.fromarrays(features.values(), dtype=dtype) #return features toc = time.time() if kwargs.has_key('print_et') and kwargs['print_et']: print 'Feature extraction took %.3f s.'%(toc-tic) return features, ~mask
progress=False) closing_prices = stock_data["Close"].rolling(window=3).mean()[3:].values # initialization window = 5 dA_model = AutoEncoder(window, 4) loss = MaxSE() y = closing_prices.copy() # training for i in range(5): e = [] T = rolling_window(y.copy()[:1000], window) np.random.shuffle(T) for t in tqdm(T): t -= t.min() t /= t.max() t -= 0.5 t *= 2 e.append(dA_model.learn(t, loss, noise=GaussianNoise(0, 0.001))) print(np.mean(e)) # encoding prices = [ i.copy() for i in np.array_split(y, len(y) // window) if len(i) == window