Esempio n. 1
0
    def _load_data(self):
        logging.info("[+] loading training data")
        with utils.open(self.config.training_data_input) as file:
            user_ch = [line.split('\t') for line in file]
        self.user_ch = {
            int(line[0]): np.array(line[1].split(' '), dtype=np.int32)
            for line in user_ch
        }

        training_ch = [v for k, v in self.user_ch.items() if k % 10 != 0]
        validation_ch = [v for k, v in self.user_ch.items() if k % 10 == 0]
        self.training_pairs = np.vstack(
            utils.rolling_window(line, self.config.window_size + 1)
            for line in training_ch if len(line > self.config.window_size + 1))
        self.validation_pairs = np.vstack(
            utils.rolling_window(line, self.config.window_size + 1)
            for line in validation_ch
            if len(line > self.config.window_size + 1))
        self.training_pairs_count = len(self.training_pairs)
        self.validation_pairs_count = len(self.validation_pairs)
        self.training_step = np.ceil(self.training_pairs_count /
                                     self.config.batch_size)
        self.validation_step = np.ceil(self.validation_pairs_count /
                                       self.config.batch_size)
        logging.info("[-] loaded {}+{} pairs".format(
            self.training_pairs_count, self.validation_pairs_count))
Esempio n. 2
0
def get_spline_slopes(x, y, kind="linear", num_bins=20,
                      slope_x_start=None,
                      slope_x_end=None):
    """
    Fit spline to get slopes.
    """
    if slope_x_start is None:
        slope_x_start = x.min()
    if slope_x_end is None:
        slope_x_end = x.max()
    fit = interp1d(x, y, kind=kind)
    x_coarse = np.linspace(x.min(), x.max(), num_bins)
    # find the maximum slope
    # use only specified range for slopes
    inds = (x_coarse >= slope_x_start) & (x_coarse <= slope_x_end)
    x_pairs = utils.rolling_window(x_coarse[inds], 2)
    y_coarse = fit(x_coarse)
    slopes = map(lambda pair: pair[-1] - pair[0],
                 utils.rolling_window(y_coarse[inds], 2))
    slopes = np.array(slopes)
    results = {"x_coarse": x_coarse,
               "y_coarse": y_coarse,
               "y_slopes": slopes,
               "x_pairs": x_pairs,
               "fit": fit}
    return results
    def get_temp_from_voltage(self, window=None):
        """
        Callibrates the voltage with the base temperature and puts the new temperature estimates to self.T1, self.T2
        """
        assert self.voltage

        self.T1_old = self.T1
        self.T2_old = self.T2

        linear = lambda x, a, b: a * x + b
        # To do: add the possibility to callibrate using a specific period
        if window is None:
            popt1, _ = curve_fit(linear, self.V1, self.T_base)
            popt2, _ = curve_fit(linear, self.V2, self.T_base)
        else:
            popt1, _ = curve_fit(
                linear,
                utils.rolling_window(self.V1, window).mean(1)[::window],
                utils.rolling_window(self.T_base, window).mean(1)[::window])
            popt2, _ = curve_fit(
                linear,
                utils.rolling_window(self.V2, window).mean(1)[::window],
                utils.rolling_window(self.T_base, window).mean(1)[::window])

        self.T1 = linear(self.V1, *popt1)
        self.T2 = linear(self.V2, *popt2)
Esempio n. 4
0
def main(args):
    exp = args.model_num
    name = args.name
    loss = True if args.mode == 'loss' else False

    plot_dir = './plots/'
    logs_dir = './logs/'
    logs_dir += 'exp_{}/'.format(exp)
    plot_dir += 'exp_{}/'.format(exp)

    if loss:
        filename = logs_dir + 'train.csv'
    else:
        filename = logs_dir + 'valid.csv'
    df = pd.read_csv(filename, header=None, names=['iter', 'metric'])
    metric = df['metric'].data

    fig, ax = plt.subplots(figsize=(15, 8))
    rolling_mean = np.mean(rolling_window(metric, 50), 1)
    rolling_std = np.std(rolling_window(metric, 50), 1)
    plt.plot(range(len(rolling_mean)), rolling_mean, alpha=0.98, linewidth=0.9)
    plt.fill_between(range(len(rolling_std)),
                     rolling_mean - rolling_std,
                     rolling_mean + rolling_std,
                     alpha=0.5)
    title = 'Train Loss' if loss else 'Valid Acc'
    xtitle = 'Loss' if loss else 'Acc'
    plt.title(title)
    plt.xlabel('Iteration')
    plt.ylabel(xtitle)
    plt.grid()
    plt.tight_layout()
    plt.savefig(plot_dir + name, format='png', dpi=300)
    def get_angle_deviations(self, window):
        assert hasattr(self, 'theta')

        baseline_theta = utils.rolling_window(self.theta, window).mean(1)
        baseline_phi = utils.rolling_window(self.phi, window).mean(1)

        self.theta_dev = self.theta[window // 2:-window // 2 +
                                    1] - baseline_theta
        self.phi_dev = self.phi[window // 2:-window // 2 + 1] - baseline_phi
    def get_temp_deviations(self, window):
        baseline1 = utils.rolling_window(self.T1, window).mean(1)
        baseline2 = utils.rolling_window(self.T2, window).mean(1)

        self.T1_dev = self.T1[window // 2:-window // 2 + 1] - baseline1
        self.T2_dev = self.T2[window // 2:-window // 2 + 1] - baseline2

        self.DT = self.T1 - self.T2

        baselineD = utils.rolling_window(self.DT, window).mean(1)
        self.DT_dev = self.DT[window // 2:-window // 2 + 1] - baselineD
Esempio n. 7
0
def mean_filter(data, window):
    pad = window // 2

    rolled = rolling_window(data, window).mean(1)
    rolled = np.hstack((np.ones((pad,)) * rolled[:pad].mean(), rolled, np.ones((pad,)) * rolled[-pad:].mean()))

    return rolled
    def process(self, method, window):
        """
        Converts the temperature data to values proportional to the likelihood of being jumps.

        Args:
            method: function array => array, algorithm to be used
            window: int, size of the rolling window

        Returns:
            array
        """

        if self.baseline is None:
            raise AttributeError(
                "You must run preprocessing first. "
                "If you don't know what to use, just use an identity filter")

        assert window % 2

        pad = window // 2

        strided_data = utils.rolling_window(self.filtered, window)

        self.processed = np.hstack(
            (np.zeros(pad), method(strided_data), np.zeros(pad)))
Esempio n. 9
0
def windowing_data(input_data, seizure_start_time_offsets, seizure_lengths,
                   load_Core):
    S, T = input_data.shape
    #     feature_extraction.graphL_core.num_nodes = S
    sampling_freq = load_Core.sampling_freq
    if (load_Core.num_windows is not None):
        win_len_sec = T / (sampling_freq * load_Core.num_windows)
        stride_sec = win_len_sec
    else:
        win_len_sec = 2.5
        stride_sec = 1.5

    seizure_start_time_offsets *= sampling_freq
    seizure_lengths *= sampling_freq
    win_len = int(np.ceil(win_len_sec * sampling_freq))
    stride = int(np.ceil(stride_sec * sampling_freq))
    X = rolling_window(
        input_data, win_len, stride
    )  # np.swapaxes(, 0, 1) # np.lib.stride_tricks.as_strided(input_data, strides = st, shape = (input_data.shape[1] - w + 1, w))[0::o]
    if (load_Core.down_sampl_ratio is not None):
        X = DownSampler(load_Core).apply(X)
    X, conv_sizes = data_convert(X, load_Core)  # X
    intervals_with_stride = np.arange(
        0, T - win_len, stride
    )  # rolling_window(np.arange(T)[np.newaxis,:], win_len, stride) #
    num_windows = len(intervals_with_stride)
    print('    win_len_sec: %f , num_windows: %d' % (win_len_sec, num_windows))
    y = np.zeros((num_windows, ))
    flag_ictal = False
    y_detection = 1 if load_Core.detection_flag else 0

    def state_gen(y, win_ind):
        if (len(load_Core.state_win_lengths) < 1):
            return y
        state_counter = 2 if load_Core.detection_flag else 1
        num_winds = list(
            np.ceil(np.array(load_Core.state_win_lengths) /
                    win_len_sec).astype(np.int))
        end_ind = win_ind
        for le in num_winds:
            start_ind = np.max((0, end_ind - le))
            y[start_ind:end_ind] = state_counter
            state_counter += 1
            end_ind = start_ind
            if (end_ind <= 0):
                break
        return y

    if (seizure_start_time_offsets >= 0 and seizure_lengths >= 0):
        for win_ind in range(num_windows):
            w = intervals_with_stride[win_ind]
            if ((seizure_start_time_offsets < w + win_len)
                    and (seizure_start_time_offsets + seizure_lengths > w)):
                y[win_ind] = y_detection
                if (not flag_ictal):
                    flag_ictal = True
                    y = state_gen(y, win_ind)

    dim = X.shape[2]
    return X, y, S, dim, conv_sizes
Esempio n. 10
0
def loadexpt(cellidx, filename, method, history, fraction=1., mean_adapt=False, roll=True):
    """
    Loads an experiment from disk

    Parameters
    ----------
    cellidx : int
        Index of the cell to load

    filename : string
        Name of the hdf5 file to load

    method : string
        The key in the hdf5 file to load ('train' or 'test')

    history : int
        Number of samples of history to include in the toeplitz stimulus

    fraction : float, optional
        Fraction of the experiment to load, must be between 0 and 1. (Default: 1.0)

    """

    assert fraction > 0 and fraction <= 1, "Fraction of data to load must be between 0 and 1"

    # currently only works with the Oct. 07, 15 experiment
    expt = '15-10-07'

    with notify('Loading {}ing data'.format(method)):

        # load the hdf5 file
        f = h5py.File(os.path.join(datadirs[os.uname()[1]], expt, filename + '.h5'), 'r')

        # length of the experiment
        expt_length = f[method]['time'].size
        num_samples = int(np.floor(expt_length * fraction))

        # load the stimulus
        stim = zscore(np.array(f[method]['stimulus'][:num_samples]).astype('float32'))

        # photoreceptor model of mean adaptation
        if mean_adapt:
            stim = pr_filter(10e-3, stim)

        # reshaped stimulus (nsamples, time/channel, space, space)
        if roll:
            stim_reshaped = np.rollaxis(np.rollaxis(rolling_window(stim, history, axis=0), 2), 3, 1)
        else:
            stim_reshaped = stim

        # get the response for this cell
        resp = np.array(f[method]['response/firing_rate_10ms'][cellidx, history:num_samples])

    return Batch(stim_reshaped, resp)
Esempio n. 11
0
def timestep_slice_data(data, slice_size=10, rescale=True):
    # Load inputs and outputs
    labels = data['stages'][:, 2]
    pows = data['pows']
    if rescale:
        scaler = StandardScaler()
        scaler.fit(pows)
        pows = scaler.transform(pows)

    pows = pows.swapaxes(0, 1)

    # timeslice labels [ N,slice_size ]
    seq_labels = rolling_window(labels, slice_size)

    # timeslicing pows is awkward...
    seq_pows = rolling_window(pows, slice_size)
    seq_pows = seq_pows.swapaxes(0, 1)
    seq_pows = seq_pows.swapaxes(1, 2)

    return seq_pows, seq_labels
Esempio n. 12
0
def gen_feat(filelist, sample_data=True):
    filter_sizes = [int(x) for x in FLAGS.filter_sizes.split(',')]
    with tf.device('/cpu:0'):
        with tf.Session(
                config=tf.ConfigProto(allow_soft_placement=True,
                                      log_device_placement=False)) as sess:
            model = UnsupSeech(window_length=FLAGS.window_length,
                               output_length=FLAGS.output_length,
                               filter_sizes=filter_sizes,
                               num_filters=FLAGS.num_filters,
                               fc_size=FLAGS.fc_size,
                               dropout_keep_prob=1.0,
                               train_files=filelist,
                               create_new_train_dir=False,
                               is_training=False,
                               decoder_layers=FLAGS.decoder_layers)
            if FLAGS.train_dir != "":
                ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
                if ckpt and ckpt.model_checkpoint_path:
                    print("Reading model parameters from %s" %
                          ckpt.model_checkpoint_path)
                    model.saver.restore(sess, ckpt.model_checkpoint_path)
                    # model is now loaded with the trained parameters
                    for myfile in filelist:
                        input_signal = training_data[myfile][20 * 16000:]
                        if FLAGS.show_feat:
                            feat = model.gen_feat_batch(
                                sess,
                                utils.rolling_window(input_signal,
                                                     FLAGS.window_length,
                                                     180)[:500])
                            pyplot.imshow(feat.T)
                            pyplot.show()
                            print feat
                        pre_sig_length = 1600  #1450
                        gen_signal = input_signal[:pre_sig_length]
                        print 'Generating signal...'
                        for i in xrange(FLAGS.gen_steps):
                            next_signal = model.generate_signal(
                                sess,
                                gen_signal[-FLAGS.window_length:],
                                temperature=FLAGS.temp)
                            #model.gen_next_batch(sess, [gen_signal[-FLAGS.window_length:]])
                            #input_signal = input_signal[FLAGS.output_length:] + next_signal[0]
                            if i % 100 == 0:
                                print next_signal[0]
                                print(gen_signal.shape)
                            gen_signal = np.append(gen_signal, next_signal)
                        utils.writeSignal(gen_signal, 'gentest.wav')
                        print 'done!'
                else:
                    print("Could not open training dir: %s" % FLAGS.train_dir)
            else:
                print("Train_dir parameter is empty")
Esempio n. 13
0
    def apply(self, X_raw, load_core):
        win_length = int(
            np.ceil(load_core.welchs_win_len * load_core.sampling_freq))
        X = rolling_window(
            X_raw, win_length,
            int(np.ceil(load_core.welchs_stride * load_core.sampling_freq)))
        X = np.swapaxes(np.swapaxes(X, 0, 1), 1, 2)
        f_signal = rfft(X)
        W = fftfreq(f_signal.shape[-1], d=1 / load_core.sampling_freq)

        #         f_signal = np.swapaxes(f_signal, 2, 3)
        #         if(load_core.down_sampl_ratio is not None):
        #             f_signal = DownSampler(load_core).apply(f_signal)
        conv_sizes = []
        all_sizess = np.zeros_like(W)
        for i in np.arange(len(load_core.freq_bands)):
            if (i > 0):
                lowcut = load_core.freq_bands[i - 1]
            else:
                lowcut = load_core.initial_freq_band
            highcut = load_core.freq_bands[i]
            sizess = np.where(W < highcut, np.ones_like(W), np.zeros_like(W))
            sizess = np.where(W < lowcut, np.zeros_like(W), sizess)
            all_sizess += sizess
            conv_sizes.append(int(np.sum(sizess) * f_signal.shape[-2]))
        in_FFT_W = f_signal[..., np.squeeze(np.argwhere(all_sizess == 1))]
        FFT_W = np.reshape(in_FFT_W, (in_FFT_W.shape[0], in_FFT_W.shape[1],
                                      in_FFT_W.shape[2] * in_FFT_W.shape[3]))
        #         W = np.tile(W,np.hstack((f_signal.shape[:-1],1)))
        #         FFT_W = None
        #         for i in np.arange(len(load_core.freq_bands)):
        #             if(i>0):
        #                 lowcut = load_core.freq_bands[i-1]
        #             else:
        #                 lowcut = load_core.initial_freq_band
        #             highcut = load_core.freq_bands[i]
        # #             butter_bandpass_filter(data, lowcut, highcut, fs)
        #             cut_f_signal = f_signal.copy()
        # #             cut_f_signal = np.where(W<highcut, cut_f_signal,0 )
        # #             cut_f_signal = np.where(W>=lowcut, cut_f_signal,0 )
        #             cut_f_signal[ W >= highcut] = 0 # np.abs(W)
        #             cut_f_signal[ W < lowcut] = 0
        #             cut_f_signal = np.reshape(cut_f_signal, np.hstack((cut_f_signal.shape[:-2],np.multiply(cut_f_signal.shape[-2],cut_f_signal.shape[-1]))))# check again if correct ?????????????
        #             if(load_core.down_sampl_ratio is not None):
        #                 cut_f_signal = DownSampler(load_core).apply(cut_f_signal)
        #             if(FFT_W is None):
        #                 FFT_W = cut_f_signal
        #             else:
        #                 FFT_W = np.concatenate((FFT_W,cut_f_signal), axis=-1)
        #             conv_sizes.append(cut_f_signal.shape[-1])
        return FFT_W, np.array(conv_sizes)
def smear_labels(a: np.ndarray, size: int) -> np.ndarray:
    """
    Gives positive labels to points within $size points of actual positive labels.
    
    Args:
        a: np.array, labels array
        size: int, how far away the labels are smeared
    """

    pad = size  # Amount of zero's to add on both sides
    a_padded = np.concatenate((np.zeros(pad), a, np.zeros(pad)))
    rolled = utils.rolling_window(a_padded, size * 2 + 1)

    return rolled.max(1).astype(a.dtype)
Esempio n. 15
0
    def slices(self, X):
        """
        Given an input X with dimention N, return a ndarray of dimention 3 with all the instances
        values for each window.

        For example, if the input has shape (10, 400), and the stride_ratio is 0.25, then this
        will generate 301 windows with shape (10, 100). The final result would have a shape of
        (301, 10, 100).
        """
        self.logger.debug('Slicing X with shape {}'.format(X.shape))
        sample_shape = list(X[0].shape)

        window_shape = np.maximum(
            np.array([s * self.stride_ratio for s in sample_shape]),
            1).astype(np.int16)
        self.logger.debug('Got window shape: {}'.format(window_shape.shape))

        #
        # Calculate the windows that are going to be used and the total
        # number of new generated samples.
        #
        windows_count = [
            sample_shape[i] - window_shape[i] + 1
            for i in range(len(sample_shape))
        ]
        new_instances_total = np.prod(windows_count)

        self.logger.debug('Slicing {} windows.'.format(windows_count))

        #
        # For each sample, get all the windows with their values
        #
        sliced_X = np.array([rolling_window(x, window_shape) for x in X])

        #
        # Swap the 0 and 1 axis so as to get for each window, the value of each sample.
        #
        sliced_X = np.swapaxes(sliced_X, 0, 1)

        if len(sliced_X.shape) > 3:
            shape = list(sliced_X.shape)
            sliced_X = sliced_X.reshape(shape[:2] + [np.prod(shape[2:])])

        self.logger.info(
            'Scanning turned X ({}) into sliced_X ({}). {} new instances were added '
            'per sample'.format(X.shape, sliced_X.shape, new_instances_total))

        return sliced_X
def get_full_field_flicker(period=1, low_contrast=0.1, high_contrast=1.0):
    sample_rate = 100
    flicker_sequence = np.hstack([
        low_contrast * np.random.randn(period * sample_rate),
        high_contrast * np.random.randn(period * sample_rate),
        low_contrast * np.random.randn(period * sample_rate)
    ])

    # Convert flicker sequence into full field movie
    full_field_flicker = np.outer(flicker_sequence, np.ones((1, 50, 50)))
    full_field_flicker = full_field_flicker.reshape(
        (flicker_sequence.shape[0], 50, 50))

    # Convert movie to 400ms long samples in the correct format for our model
    full_field_movies = rolling_window(full_field_flicker, 40)
    full_field_movies = np.rollaxis(full_field_movies, 2)
    full_field_movies = np.rollaxis(full_field_movies, 3, 1)
    return full_field_movies
def get_full_field_flicker(period=1, low_contrast=0.1, high_contrast=1.0):
    sample_rate = 100
    flicker_sequence = np.hstack(
        [
            low_contrast * np.random.randn(period * sample_rate),
            high_contrast * np.random.randn(period * sample_rate),
            low_contrast * np.random.randn(period * sample_rate),
        ]
    )

    # Convert flicker sequence into full field movie
    full_field_flicker = np.outer(flicker_sequence, np.ones((1, 50, 50)))
    full_field_flicker = full_field_flicker.reshape((flicker_sequence.shape[0], 50, 50))

    # Convert movie to 400ms long samples in the correct format for our model
    full_field_movies = rolling_window(full_field_flicker, 40)
    full_field_movies = np.rollaxis(full_field_movies, 2)
    full_field_movies = np.rollaxis(full_field_movies, 3, 1)
    return full_field_movies
Esempio n. 18
0
def calc_event_data(etdata, evt,
                    w = {255:1,
                         0: 1,
                         1: 50,
                         2: 1,
                         3: 1,
                         4: 1,
                         5: 1,
                         6: 1,
                         'vel': 18,
                         'etdq': 200}, ):
    """Calculates event parameters.
    Parameters:
        etdata  --  an instance of ETData
        evt     --  compact event vector
        w       --  dictionary of context to take into account
                    for each event type; in ms
    Returns:
        posx_s      --  onset position, horizontal
        posx_e      --  offset position, horizontal
        posy_s      --  onset position, vertical
        posy_e      --  offset position, vertical
        posx_mean   --  mean postion, horizontal
        posy_mean   --  mean postion, vertical
        posx_med    --  median postion, horizontal
        posy_med    --  median postion, vertical
        pv          --  peak velocity
        pv_index    --  index for peak velocity
        rms         --  precision, 2D rms
        std         --  precision, 2D std
    """

    #init params
    data = etdata.data
    fs = etdata.fs
    e = {k:v for k, v in zip(['s', 'e', 'evt'], evt)}
    ws = w[e['evt']]
    ws = 1 if not(ws > 1) else  round_up_to_odd(ws/1000.0*fs, min_val=3)
    ws_vel = round_up_to_odd(w['vel']/1000.0*fs, min_val=3)
    w_etdq = int(w['etdq']/1000.*fs)

    #calculate velocity using Savitzky-Golay filter
    vel = np.hypot(sg.savgol_filter(data['x'], ws_vel, 2, 1),
                   sg.savgol_filter(data['y'], ws_vel, 2, 1))*fs

    ind_s = e['s']+ws
    ind_s = ind_s if ind_s < e['e'] else e['e']
    ind_e = e['e']-ws
    ind_e = ind_e if ind_e > e['s'] else e['s']

    posx_s = np.nanmean(data[e['s']:ind_s]['x'])
    posy_s = np.nanmean(data[e['s']:ind_s]['y'])
    posx_e = np.nanmean(data[ind_e:e['e']]['x'])
    posy_e = np.nanmean(data[ind_e:e['e']]['y'])

    posx_mean = np.nanmean(data[e['s']:e['e']]['x'])
    posy_mean = np.nanmean(data[e['s']:e['e']]['y'])
    posx_med = np.nanmedian(data[e['s']:e['e']]['x'])
    posy_med = np.nanmedian(data[e['s']:e['e']]['y'])

    pv = np.max(vel[e['s']:e['e']])
    pv_index = e['s']+ np.argmax(vel[e['s']:e['e']])

    if e['e']-e['s']>w_etdq:
        x_ = rolling_window(data[e['s']:e['e']]['x'], w_etdq)
        y_ = rolling_window(data[e['s']:e['e']]['y'], w_etdq)

        std = np.median(np.hypot(np.std(x_, axis=1), np.std(y_, axis=1)))
        rms = np.median(np.hypot(np.sqrt(np.mean(np.diff(x_)**2, axis=1)),
                                 np.sqrt(np.mean(np.diff(y_)**2, axis=1))))
    else:
        std = 0
        rms = 0

    return posx_s, posx_e, posy_s, posy_e, posx_mean, posy_mean, posx_med, posy_med, pv, pv_index, rms, std
Esempio n. 19
0
    def plot_results(self, window=200, agent_subset=None, std=True):

        if not agent_subset:
            agent_subset = self.agents.keys()

        series_to_plot = {
            'cumulative rewards': {
                agent_name: self.rewards_per_episode[agent_name]
                for agent_name in agent_subset
            },
            'best score': {
                agent_name: self.best_score_per_episode[agent_name]
                for agent_name in agent_subset
            },
            'time steps': {
                agent_name: self.time_steps_per_episode[agent_name]
                for agent_name in agent_subset
            }
        }

        agents_to_plot = {
            agent_name: self.agents[agent_name]
            for agent_name in agent_subset
        }
        loss_per_agents = {
            'critic_loss': {
                agent_name: (np.array(agent.critic.loss_history) if 'critic'
                             in agent.__dict__.keys() else np.array([]))
                for agent_name, agent in agents_to_plot.items()
            },
            'actor_loss': {
                agent_name: (np.array(agent.actor.loss_history) if 'actor'
                             in agent.__dict__.keys() else np.array([]))
                for agent_name, agent in agents_to_plot.items()
            }
        }

        series_to_plot.update(loss_per_agents)

        fig, axs = plt.subplots(len(series_to_plot),
                                1,
                                figsize=(10, 20),
                                facecolor='w',
                                edgecolor='k')
        axs = axs.ravel()

        for idx, (series_name,
                  dict_series) in enumerate(series_to_plot.items()):
            for jdx, (agent_name, series) in enumerate(dict_series.items()):
                if series.size == 0:
                    axs[idx].plot([0.0], [0.0], label=agent_name)
                    continue

                cm_idx = jdx % self.max_diff_colors
                # jdx // self.num_lines_style * float(self.num_lines_style) / self.max_diff_colors (upward)
                ls_idx = min(
                    jdx // self.max_diff_colors,
                    self.num_lines_style)  # jdx % self.num_lines_style

                series_mvg = ut.rolling_window(series, window=window)
                series_mvg_avg = np.mean(series_mvg, axis=1)

                lines = axs[idx].plot(range(len(series_mvg_avg)),
                                      series_mvg_avg,
                                      label=agent_name)

                lines[0].set_color(self.cm(cm_idx))
                lines[0].set_linestyle(self.line_styles[ls_idx])

                if std:
                    series_mvg_std = np.std(series_mvg, axis=1)
                    area = axs[idx].fill_between(
                        range(len(series_mvg_avg)),
                        series_mvg_avg - series_mvg_std,
                        series_mvg_avg + series_mvg_std,
                        alpha=0.15)
                    area.set_color(self.cm(cm_idx))
                    area.set_linestyle(self.line_styles[ls_idx])

            box = axs[idx].get_position()
            axs[idx].set_position(
                [box.x0, box.y0, box.width * 0.8, box.height])
            axs[idx].set_title(f"{series_name} per episode", fontsize=15)
            axs[idx].set_ylabel(f"avg {series_name}", fontsize=10)
            axs[idx].set_xlabel(f"episodes", fontsize=10)
            axs[idx].legend(loc='center left', bbox_to_anchor=(1, 0.5))

        fig.tight_layout()
Esempio n. 20
0
def extractFeatures(etdata, **kwargs):
    '''Extracts features for IRF
    '''

    #get parameters
    data = etdata.data
    w, w_vel, w_dir = kwargs['w'], kwargs['w_vel'], kwargs['w_dir']

    tic = time.time()

    #find sampling rate
    fs = etdata.fs

    #window size for spatial measures in samples
    ws = round_up_to_odd(w/1000.0*fs+1)

    #window size in samples for velocity calculation
    ws_vel = round_up_to_odd(w_vel/1000.0*fs)

    #window size in samples for direction calculation
    ws_dir = round_up_to_odd(w_dir/1000.0*fs)

    maskInterp = np.zeros(len(data), dtype=np.bool)
    '''Legacy code. Interpolates through missing points.
    if kwargs.has_key('interp') and kwargs['interp']:
        r = np.arange(len(data))
        _mask = np.isnan(data['x']) | np.isnan(data['y'])
        fx = interp.PchipInterpolator(r[~_mask], data[~_mask]['x'],
                                    extrapolate=True)
        fy = interp.PchipInterpolator(r[~_mask], data[~_mask]['y'],
                                    extrapolate=True)
        data['x'][_mask]=fx(r[_mask])
        data['y'][_mask]=fy(r[_mask])
        maskInterp = _mask
    '''

    #prepare data for vectorized processing
    ws_pad=(max((ws, ws_vel, ws_dir))-1)/2
    x_padded = np.pad(data['x'], (ws_pad, ws_pad),
                      'constant', constant_values=np.nan)
    y_padded = np.pad(data['y'], (ws_pad, ws_pad),
                      'constant', constant_values=np.nan)

    ws_dir_pad=(ws_dir-1)/2
    x_padded_dir=np.pad(data['x'], (ws_dir_pad, ws_dir_pad),
                        'constant', constant_values=np.nan)
    y_padded_dir=np.pad(data['y'], (ws_dir_pad, ws_dir_pad),
                        'constant', constant_values=np.nan)

    x_windowed = rolling_window(x_padded, ws)
    y_windowed = rolling_window(y_padded, ws)
    dx_windowed = rolling_window(np.diff(x_padded), ws-1)
    dy_windowed = rolling_window(np.diff(y_padded), ws-1)
    x_windowed_dir = rolling_window(np.diff(x_padded_dir), ws_dir-1)
    y_windowed_dir = rolling_window(np.diff(y_padded_dir), ws_dir-1)

    #%%Extract features
    features=dict()

    #sampling rate
    features['fs'] = np.ones(len(data))*fs

    for d, dd in zip(['x', 'y'], [x_windowed, y_windowed]):
        #difference between positions of preceding and succeding windows,
        #aka tobii feature, together with data quality features and its variants
        means=np.nanmean(dd, axis = 1)
        meds=np.nanmedian(dd, axis = 1)
        features['mean-diff-%s'%d] = np.roll(means, -(ws-1)/2) - \
                                     np.roll(means,  (ws-1)/2)
        features['med-diff-%s'%d] = np.roll(meds, -(ws-1)/2) - \
                                    np.roll(meds,  (ws-1)/2)

        #standard deviation
        features['std-%s'%d] = np.nanstd(dd, axis=1)
        features['std-next-%s'%d] = np.roll(features['std-%s'%d], -(ws-1)/2)
        features['std-prev-%s'%d] = np.roll(features['std-%s'%d],  (ws-1)/2)

    features['mean-diff']= np.hypot(features['mean-diff-x'],
                                    features['mean-diff-y'])
    features['med-diff']= np.hypot(features['med-diff-x'],
                                   features['med-diff-y'])

    features['std'] = np.hypot(features['std-x'], features['std-y'])
    features['std-diff'] = np.hypot(features['std-next-x'], features['std-next-y']) - \
                           np.hypot(features['std-prev-x'], features['std-prev-y'])

    #BCEA
    P = 0.68 #cumulative probability of area under the multivariate normal
    k = np.log(1/(1-P))
    #rho = [np.corrcoef(px, py)[0,1] for px, py in zip(x_windowed, y_windowed)]
    rho = vcorrcoef(x_windowed, y_windowed)
    features['bcea'] = 2 * k * np.pi * \
                       features['std-x'] * features['std-y'] * \
                       np.sqrt(1-np.power(rho,2))
    features['bcea-diff'] = np.roll(features['bcea'], -(ws-1)/2) - \
                            np.roll(features['bcea'], (ws-1)/2)

    #RMS
    features['rms'] = np.hypot(np.sqrt(np.mean(np.square(dx_windowed), axis=1)),
                               np.sqrt(np.mean(np.square(dy_windowed), axis=1)))
    features['rms-diff'] = np.roll(features['rms'], -(ws-1)/2) - \
                           np.roll(features['rms'], (ws-1)/2)

    #disp, aka idt feature
    x_range = np.nanmax(x_windowed, axis=1) - np.nanmin(x_windowed, axis=1)
    y_range = np.nanmax(y_windowed, axis=1) - np.nanmin(y_windowed, axis=1)
    features['disp'] = x_range + y_range

    #velocity and acceleration
    features['vel']=np.hypot(sg.savgol_filter(data['x'], ws_vel, 2, 1),
                             sg.savgol_filter(data['y'], ws_vel, 2, 1))*fs

    features['acc']=np.hypot(sg.savgol_filter(data['x'], ws_vel, 2, 2),
                             sg.savgol_filter(data['y'], ws_vel, 2, 2))*fs**2

    #rayleightest
    angl = np.arctan2(y_windowed_dir, x_windowed_dir)
    features['rayleightest'] = ast.rayleightest(angl, axis=1)

    #i2mc
    if kwargs.has_key('i2mc') and kwargs['i2mc'] is not None:
        features['i2mc'] = kwargs['i2mc']['finalweights'].flatten()
    else:
        features['i2mc'] = np.zeros(len(data))

    #remove padding and nans
    mask_nans = np.any([np.isnan(values) for key, values\
                                         in features.iteritems()], axis=0)
    mask_pad = np.zeros_like(data['x'], dtype=np.bool)
    mask_pad[:ws_pad] = True
    mask_pad[-ws_pad:] = True
    mask = mask_nans | mask_pad | maskInterp
    features={key: values[~mask].astype(np.float32) for key, values \
                                                    in features.iteritems()}

    dtype = np.dtype(zip(features.keys(), itertools.repeat(np.float32)))
    features = np.core.records.fromarrays(features.values(), dtype=dtype)

    #return features
    toc = time.time()
    if kwargs.has_key('print_et') and kwargs['print_et']:
        print 'Feature extraction took %.3f s.'%(toc-tic)
    return features, ~mask
                         progress=False)

closing_prices = stock_data["Close"].rolling(window=3).mean()[3:].values

# initialization
window = 5

dA_model = AutoEncoder(window, 4)
loss = MaxSE()

y = closing_prices.copy()

# training
for i in range(5):
    e = []
    T = rolling_window(y.copy()[:1000], window)

    np.random.shuffle(T)
    for t in tqdm(T):
        t -= t.min()
        t /= t.max()
        t -= 0.5
        t *= 2
        e.append(dA_model.learn(t, loss, noise=GaussianNoise(0, 0.001)))

    print(np.mean(e))

# encoding
prices = [
    i.copy() for i in np.array_split(y,
                                     len(y) // window) if len(i) == window