def main():

    (raw_data_path, intermediate_data_path, processed_data_path,
     figure_path) = cf.path_config()

    (unique_subjects, unique_sessions,
     unique_reward_codes) = md.extract_subjects_sessions(raw_data_path,
                                                         reward_task=1)

    start_time = time.time()

    valid_subj_id = []
    valid_session_n = []

    for subj_id in unique_subjects:
        for session_n in unique_sessions:

            _, _, reward_code = ep.find_data_files(subj_id=subj_id,
                                                   session_n=session_n,
                                                   reward_task=1,
                                                   lum_task=0,
                                                   raw_data_path=raw_data_path)

            reward_samples = ep.read_hdf5('samples',
                                          subj_id,
                                          session_n,
                                          processed_data_path,
                                          reward_code=reward_code,
                                          id_str='bandpass')
            reward_messages = ep.read_hdf5('messages',
                                           subj_id,
                                           session_n,
                                           processed_data_path,
                                           reward_code=reward_code,
                                           id_str='bandpass')
            reward_events = ep.read_hdf5('events',
                                         subj_id,
                                         session_n,
                                         processed_data_path,
                                         reward_code=reward_code,
                                         id_str='bandpass')

            if reward_samples.lowpass_pupil_diameter.isna().sum() == 0:
                valid_subj_id.append(subj_id)
                valid_session_n.append(session_n)
                print('sample is valid!')
            else:
                print('sample is invalid. contains nans.')

    end_time = time.time()

    time_elapsed = end_time - start_time
    print('time elapsed: ', time_elapsed)

    valid_data_id = pd.DataFrame({
        'valid_sub': valid_subj_id,
        'valid_session': valid_session_n
    })

    valid_data_id.to_csv(os.path.join(processed_data_path, 'valid_data.csv'))
Exemple #2
0
def main():

    (raw_data_path, intermediate_data_path,
    processed_data_path, figure_path) = cf.path_config()


    unique_subjects, unique_sessions, unique_reward_codes = md.extract_subjects_sessions(raw_data_path,
     reward_task=1)


    for subj_id in unique_subjects:
        for session_n in unique_sessions:

            print('processing subject {}'.format(subj_id) + ', session {}'.format(session_n))

            subj_data_file, subj_data_file_raw, reward_code = ep.find_data_files(subj_id=subj_id,
            session_n=session_n, reward_task=1, lum_task=0,
            raw_data_path=raw_data_path)

            print(subj_data_file, reward_code)

            samples, events, messages = ep.read_edf(subj_data_file_raw)
            print(samples.head())

            samples, events, messages = ep.clean_df(samples, events, messages, reward_task=1)
            print(samples.head(), events.head(), messages.head())

            samples, events, messages = ep.extract_experimental_data(samples, events,
             messages)

            samples, events, messages = ep.define_relative_time(samples, events, messages)

            hdf = ep.save_hdf5(samples, events, messages, subj_id, session_n,
            intermediate_data_path, reward_code=reward_code)
Exemple #3
0
def main():

    (raw_data_path, _, processed_data_path, figure_path,
     simulated_data_path) = cf.path_config()

    (unique_subjects, unique_sessions,
     unique_reward_codes) = md.extract_subjects_sessions(raw_data_path,
                                                         reward_task=1)

    start_time = time.time()

    for subject in unique_subjects:
        for reward_code, session in zip(unique_reward_codes, unique_sessions):

            processed_fn = ('tepr' + '_sub-' + str(subject) + '_sess-' +
                            str(session) + '_cond-' + str(reward_code) +
                            '_trial.csv')

            learning_signals_fn = 'sub-{}_cond-{}_learning_signals.csv'.format(
                subject, reward_code)
            pupil_amplitude_base = 'tepr_sub-{}_sess-*_cond-{}_trial_peaks.csv'.format(
                subject, reward_code)  #cant handle wildcard
            pupil_summary_base = 'tepr_sub-{}_sess-*_cond-{}_trial_means.csv'.format(
                subject, reward_code)

            pupil_amplitude_fn = glob.glob(
                os.path.join(processed_data_path, pupil_amplitude_base))
            pupil_summary_fn = glob.glob(
                os.path.join(processed_data_path, pupil_summary_base))

            if not pupil_amplitude_fn:
                print('No data for this session.')
                continue

            learning_signals_df = pd.read_csv(
                os.path.join(simulated_data_path, learning_signals_fn))
            pupil_amplitude_df = pd.read_csv(pupil_amplitude_fn[0])
            pupil_summary_df = pd.read_csv(pupil_summary_fn[0])

            try:
                trial_df = pd.concat([
                    learning_signals_df, pupil_amplitude_df, pupil_summary_df
                ],
                                     axis=1)
                trial_df = trial_df.loc[:, ~trial_df.columns.duplicated()]
                trial_df.drop(columns=['trial_epoch'], inplace=True)
                trial_df.to_csv(os.path.join(processed_data_path,
                                             processed_fn),
                                index=False)
            except:
                print('error in concat.')
                pass

    end_time = time.time()

    time_elapsed = end_time - start_time
    print('time elapsed: ', time_elapsed)
Exemple #4
0
def main():

    (raw_data_path, intermediate_data_path, processed_data_path,
     figure_path) = cf.path_config()

    (unique_subjects, unique_sessions,
     unique_reward_codes) = md.extract_subjects_sessions(raw_data_path,
                                                         reward_task=1)

    start_time = time.time()

    for subj_id in unique_subjects:
        for session_n in unique_sessions:

            print('segmenting subject {}'.format(subj_id) +
                  ' session {}'.format(session_n))

            time.sleep(1)

            _, _, reward_code = ep.find_data_files(subj_id=subj_id,
                                                   session_n=session_n,
                                                   reward_task=1,
                                                   lum_task=0,
                                                   raw_data_path=raw_data_path)

            reward_samples = ep.read_hdf5('samples',
                                          subj_id,
                                          session_n,
                                          intermediate_data_path,
                                          reward_code=reward_code)
            reward_messages = ep.read_hdf5('messages',
                                           subj_id,
                                           session_n,
                                           intermediate_data_path,
                                           reward_code=reward_code)
            reward_events = ep.read_hdf5('events',
                                         subj_id,
                                         session_n,
                                         intermediate_data_path,
                                         reward_code=reward_code)

            segmented_reward_samples = sg.segment(reward_samples,
                                                  reward_messages)

            hdf = ep.save_hdf5(segmented_reward_samples,
                               reward_events,
                               reward_messages,
                               subj_id,
                               session_n,
                               intermediate_data_path,
                               reward_code=reward_code,
                               id_str='seg')

    end_time = time.time()

    time_elapsed = end_time - start_time
    print('time elapsed: ', time_elapsed)
def main():

    (raw_data_path, intermediate_data_path,
    processed_data_path, figure_path) = cf.path_config()

    (unique_subjects, unique_sessions, unique_reward_codes) = md.extract_subjects_sessions(raw_data_path,
    reward_task=1)

    start_time = time.time()

    trial_end = 2000

    for subj_id in unique_subjects:
        for session_n in unique_sessions:


            print('z-scoring baseline corrected & lowpass filtered data for subject {}'.format(subj_id) +
            ' session {}'.format(session_n))


            _, _, reward_code = ep.find_data_files(subj_id=subj_id,
            session_n=session_n, reward_task=1, lum_task=0,
            raw_data_path=raw_data_path)


            reward_samples = ep.read_hdf5('samples', subj_id, session_n,
            processed_data_path, reward_code=reward_code, id_str='corr')
            reward_messages = ep.read_hdf5('messages', subj_id, session_n,
            processed_data_path, reward_code=reward_code, id_str='corr')
            reward_events = ep.read_hdf5('events', subj_id, session_n,
            processed_data_path, reward_code=reward_code, id_str='corr')


            reward_samples = z.zscore(reward_samples)

            plotting_reward_samples = reward_samples.loc[reward_samples.trial_sample <= trial_end]

            ## TODO: get reasonable y limits for all data within a subject and use that for plotting

            fig,figname = vz.visualize(plotting_reward_samples.trial_sample,
            plotting_reward_samples.z_pupil_diameter,
            subj_id, session_n, reward_code, id_str='zscored')

            vz.save(fig, figname)

            hdf = ep.save_hdf5(reward_samples, reward_events, reward_messages,
            subj_id, session_n, processed_data_path,
            reward_code=reward_code, id_str='zscored')
            print('z-scored data saved')



        end_time = time.time()

        time_elapsed = end_time - start_time
        print('time elapsed: ', time_elapsed)
def main():

    (raw_data_path, intermediate_data_path,
    processed_data_path, figure_path) = cf.path_config()


    (unique_subjects, unique_sessions, unique_reward_codes) = md.extract_subjects_sessions(raw_data_path,
     reward_task=1)


    start_time = time.time()

    for subj_id in unique_subjects:
        for session_n in unique_sessions:

            _, _, reward_code = ep.find_data_files(subj_id=subj_id,
            session_n=session_n, reward_task=1, lum_task=0,
            raw_data_path=raw_data_path)


            reward_samples = ep.read_hdf5('samples', subj_id, session_n,
            processed_data_path, reward_code=reward_code, id_str='zscored')
            reward_messages = ep.read_hdf5('messages', subj_id, session_n,
            processed_data_path, reward_code=reward_code, id_str='zscored')
            reward_events = ep.read_hdf5('events', subj_id, session_n,
            processed_data_path, reward_code=reward_code, id_str='zscored')

            if (np.isnan(reward_samples.z_pupil_diameter).sum() == 0) != 1:
                print('This session has no data.')
                continue

            peaks_df = amp.locate_peaks(reward_samples,  subj_id,
            session_n, reward_code, save=True)

            figures = []

            for trial_epoch in peaks_df.trial_epoch.unique():
                epoch_samples = peaks_df.loc[peaks_df.trial_epoch == trial_epoch]
                fig_name, fig = amp.plot_extrema(epoch_samples, subj_id,
                 session_n, reward_code, id_str=str(trial_epoch))
                figures.append(fig)

            super_fig_name = ('tepr' +  '_sub-' + str(subj_id) + '_sess-' +
            str(session_n) +  '_cond-' + str(reward_code) + '_trial')

            amp.save_extrema(super_fig_name, figures)

    end_time = time.time()

    time_elapsed = end_time - start_time
    print('time elapsed: ', time_elapsed)
def main():


    (raw_data_path, intermediate_data_path,
    processed_data_path, figure_path) = cf.path_config()


    (unique_subjects, unique_sessions, unique_reward_codes) = md.extract_subjects_sessions(raw_data_path,
     reward_task=1)

    start_time = time.time()


    for subj_id in unique_subjects:
        for session_n in unique_sessions:


            _, _, reward_code = ep.find_data_files(subj_id=subj_id,
            session_n=session_n, reward_task=1, lum_task=0,
            raw_data_path=raw_data_path)

            print('plotting baseline-corrected task-evoked response for subject {}'.format(subj_id) +
            ' session {}'.format(session_n) +
            ' condition {}'.format(reward_code))


            reward_samples = ep.read_hdf5('samples', subj_id, session_n,
            processed_data_path, reward_code=reward_code, id_str='corr')
            reward_messages = ep.read_hdf5('messages', subj_id, session_n,
            processed_data_path, reward_code=reward_code, id_str='corr')
            reward_events = ep.read_hdf5('events', subj_id, session_n,
            processed_data_path, reward_code=reward_code, id_str='corr')

            reward_samples.rename(columns={'sample': 'trial_samples'}, inplace=True)


            fig, fig_name = vz.visualize(reward_samples.trial_sample,reward_samples.bc_pupil_diameter,
             subj_id=subj_id, session_n=session_n,
            reward_code=reward_code, id_str='corr')
            vz.save(fig, fig_name)

    end_time = time.time()
    elapsed_time = end_time - start_time
    print('time elapsed to plot time series: ', elapsed_time)
Exemple #8
0
def main():

    (raw_data_path, intermediate_data_path, processed_data_path,
     figure_path) = cf.path_config()

    (unique_subjects, unique_sessions,
     unique_reward_codes) = md.extract_subjects_sessions(raw_data_path,
                                                         reward_task=1)

    for subj_id in unique_subjects:
        for session_n in unique_sessions:

            print('checking preprocessing for subject {}'.format(subj_id) +
                  ' session {}'.format(session_n))

            time.sleep(1)

            subj_data_file, subj_data_file_raw, reward_code = ep.find_data_files(
                subj_id=subj_id,
                session_n=session_n,
                reward_task=1,
                lum_task=0,
                raw_data_path=raw_data_path)

            reward_samples = ep.read_hdf5('samples',
                                          subj_id,
                                          session_n,
                                          intermediate_data_path,
                                          reward_code=reward_code)
            print('samples')
            reward_messages = ep.read_hdf5('messages',
                                           subj_id,
                                           session_n,
                                           intermediate_data_path,
                                           reward_code=reward_code)
            print('messages')
            reward_events = ep.read_hdf5('events',
                                         subj_id,
                                         session_n,
                                         intermediate_data_path,
                                         reward_code=reward_code)
            print('events')

            time.sleep(1)
def main():

    (raw_data_path, intermediate_data_path,
    processed_data_path, figure_path, _) = cf.path_config()


    (unique_subjects, unique_sessions, unique_reward_codes) = md.extract_subjects_sessions(raw_data_path,
     reward_task=1)


    start_time = time.time()

    for subj_id in unique_subjects:
        for session_n in unique_sessions:

            _, _, reward_code = ep.find_data_files(subj_id=subj_id,
            session_n=session_n, reward_task=1, lum_task=0,
            raw_data_path=raw_data_path)


            reward_samples = ep.read_hdf5('samples', subj_id, session_n,
            processed_data_path, reward_code=reward_code, id_str='zscored')
            reward_messages = ep.read_hdf5('messages', subj_id, session_n,
            processed_data_path, reward_code=reward_code, id_str='zscored')
            reward_events = ep.read_hdf5('events', subj_id, session_n,
            processed_data_path, reward_code=reward_code, id_str='zscored')

            if (np.isnan(reward_samples.z_pupil_diameter).sum() == 0) != 1:
                print('This session has no data.')
                continue

            peak_df = amp.locate_peaks(reward_samples, subj_id,
            session_n, reward_code, save=True)

            mean_df = amp.find_mean(reward_samples, subj_id,
            session_n, reward_code, save=True)


    end_time = time.time()

    time_elapsed = end_time - start_time
    print('time elapsed: ', time_elapsed)
Exemple #10
0
def main():

    (_, _, _, figure_path, simulated_data_path) = cf.path_config()
    learning_signals_fn = 'learning_signals.csv'

    (unique_subjects, unique_sessions,
     unique_reward_codes) = md.extract_subjects_sessions(raw_data_path,
                                                         reward_task=1)

    start_time = time.time()

    learning_signals_df = pd.read_csv(
        os.path.join(simulated_data_path, learning_signals_fn))

    print(learning_signals_df.head())
    print(learning_signals_df.subj_id.unique() + '\n',
          learning_signals_df.reward_code.unique())

    end_time = time.time()

    time_elapsed = end_time - start_time
    print('time elapsed: ', time_elapsed)
def main():

    (raw_data_path, intermediate_data_path, processed_data_path,
     figure_path) = cf.path_config()

    (unique_subjects, unique_sessions,
     unique_reward_codes) = md.extract_subjects_sessions(raw_data_path,
                                                         reward_task=1)

    n_trial_samples = 30

    start_time = time.time()

    for subj_id in unique_subjects:
        for session_n in unique_sessions:

            print('plotting subject {}'.format(subj_id) +
                  ' session {}'.format(session_n))

            _, _, reward_code = ep.find_data_files(subj_id=subj_id,
                                                   session_n=session_n,
                                                   reward_task=1,
                                                   lum_task=0,
                                                   raw_data_path=raw_data_path)

            reward_samples = ep.read_hdf5('samples',
                                          subj_id,
                                          session_n,
                                          processed_data_path,
                                          reward_code=reward_code,
                                          id_str='bandpass')
            reward_messages = ep.read_hdf5('messages',
                                           subj_id,
                                           session_n,
                                           processed_data_path,
                                           reward_code=reward_code,
                                           id_str='bandpass')
            reward_events = ep.read_hdf5('events',
                                         subj_id,
                                         session_n,
                                         processed_data_path,
                                         reward_code=reward_code,
                                         id_str='bandpass')

            # reward_samples = ep.read_hdf5('samples', subj_id, session_n,
            # intermediate_data_path, reward_code=reward_code, id_str='seg')
            # reward_messages = ep.read_hdf5('messages', subj_id, session_n,
            # intermediate_data_path, reward_code=reward_code, id_str='seg')
            # reward_events = ep.read_hdf5('events', subj_id, session_n,
            # intermediate_data_path, reward_code=reward_code, id_str='seg')
            #
            # if reward_samples is None:
            #     print('check the data. a lot is missing.')

            reward_samples = vz.indicate_blinks(reward_samples, reward_events,
                                                subj_id, session_n,
                                                reward_code)
            fig, figname = vz.raster_plot(reward_samples,
                                          subj_id,
                                          session_n,
                                          reward_code,
                                          n_trial_samples,
                                          id_str='raw')
            vz.save(fig, figname)

    end_time = time.time()

    time_elapsed = end_time - start_time
    print('time elapsed: ', time_elapsed)
def main():

    (raw_data_path, intermediate_data_path, processed_data_path,
     figure_path) = cf.path_config()

    (unique_subjects, unique_sessions,
     unique_reward_codes) = md.extract_subjects_sessions(raw_data_path,
                                                         reward_task=1)

    start_time = time.time()

    for subj_id in unique_subjects:
        for session_n in unique_sessions:

            print('bandpass filtering data for subject {}'.format(subj_id) +
                  ' session {}'.format(session_n))

            _, _, reward_code = ep.find_data_files(subj_id=subj_id,
                                                   session_n=session_n,
                                                   reward_task=1,
                                                   lum_task=0,
                                                   raw_data_path=raw_data_path)

            reward_samples = ep.read_hdf5('samples',
                                          subj_id,
                                          session_n,
                                          processed_data_path,
                                          reward_code=reward_code,
                                          id_str='clean')
            reward_messages = ep.read_hdf5('messages',
                                           subj_id,
                                           session_n,
                                           processed_data_path,
                                           reward_code=reward_code,
                                           id_str='clean')
            reward_events = ep.read_hdf5('events',
                                         subj_id,
                                         session_n,
                                         processed_data_path,
                                         reward_code=reward_code,
                                         id_str='clean')

            reward_samples = bp.high_bandpass_filter(reward_samples)
            reward_samples = bp.low_bandpass_filter(reward_samples)

            lp_fig, lp_figname = vz.visualize(
                reward_samples.trial_sample,
                reward_samples.lowpass_pupil_diameter,
                subj_id,
                session_n,
                reward_code,
                id_str='lowpass')
            vz.save(lp_fig, lp_figname)
            hp_fig, hp_figname = vz.visualize(
                reward_samples.trial_sample,
                reward_samples.highpass_pupil_diameter,
                subj_id,
                session_n,
                reward_code,
                id_str='highpass')
            vz.save(hp_fig, hp_figname)

            hdf = ep.save_hdf5(reward_samples,
                               reward_events,
                               reward_messages,
                               subj_id,
                               session_n,
                               processed_data_path,
                               reward_code=reward_code,
                               id_str='bandpass')

    end_time = time.time()

    time_elapsed = end_time - start_time
    print('time elapsed: ', time_elapsed)
def main():

    (raw_data_path, _, _, figure_path, simulated_data_path) = cf.path_config()
    learning_signals_fn = 'learning_signals.csv'
    base_fn = 'sub-*_cond-*_learning_signals.csv'

    (unique_subjects, unique_sessions,
     unique_reward_codes) = md.extract_subjects_sessions(raw_data_path,
                                                         reward_task=1)

    start_time = time.time()

    learning_signals_df = pd.read_csv(
        os.path.join(simulated_data_path, learning_signals_fn))

    n_trials = 400
    n_subjects = learning_signals_df.subj_id.nunique()
    expected_len = n_trials - 2

    unique_reward_codes = np.repeat(
        learning_signals_df.reward_code.sort_values(ascending=True).unique(),
        n_subjects)
    unique_reward_values = np.repeat(
        [6510, 6520, 6530, 7510, 7520, 7530, 8510, 8520, 8530], n_subjects)

    def split(df, group):
        gb = df.groupby(group)
        return [gb.get_group(x) for x in gb.groups]

    def remove_first_last_trials(df_list, expected_len=expected_len):

        df_lens = []

        sliced_dfs = [df.iloc[1:-1] for df in df_list]
        [df_lens.append(len(df))
         for df in sliced_dfs]  # check lengths are expected

        assert np.unique(
            df_lens) == expected_len, 'check length of sliced dfs!'

        return sliced_dfs

    def extract_fns(df_list):

        fns = [('sub-' + str(df.subj_id.unique()[0]) + '_cond-' +
                str(df.reward_code.unique()[0]) + '_learning_signals.csv')
               for df in df_list]

        return fns

    def save_split_dfs(df_list, fn_list, data_path=simulated_data_path):

        [
            df.to_csv(os.path.join(data_path, fn), index=False)
            for df, fn in zip(df_list, fn_list)
        ]

        return print('dfs saved')

    def decode_condition(base_fn=base_fn,
                         data_path=simulated_data_path,
                         reward_values=unique_reward_values,
                         reward_codes=unique_reward_codes):
        def get_cond(elem):
            return elem[-22:-21]  # get the condition code for each file

        learning_signals_fns = glob.glob(os.path.join(data_path, base_fn))
        learning_signals_fns.sort(key=get_cond)

        [
            print(reward_code, reward_val,
                  fn) for fn, reward_val, reward_code in zip(
                      learning_signals_fns, reward_values, unique_reward_codes)
        ]

        learning_signals_fns_decoded = [
            re.sub('cond-' + str(reward_code), 'cond-' + str(reward_val), fn)
            for fn, reward_val, reward_code in zip(
                learning_signals_fns, reward_values, unique_reward_codes)
        ]

        [
            os.rename(original_fn,
                      decoded_fn) for original_fn, decoded_fn in zip(
                          learning_signals_fns, learning_signals_fns_decoded)
        ]

        os.listdir(simulated_data_path)  # check the renaming

        return None

    dfs = split(learning_signals_df, ['subj_id', 'reward_code'])
    sliced_dfs = remove_first_last_trials(dfs)
    parsed_learning_signals_fns = extract_fns(sliced_dfs)

    save_split_dfs(sliced_dfs, parsed_learning_signals_fns)
    decode_condition()

    end_time = time.time()

    time_elapsed = end_time - start_time
    print('time elapsed: ', time_elapsed)
def main():

    (raw_data_path, intermediate_data_path, processed_data_path,
     figure_path) = cf.path_config()

    (unique_subjects, unique_sessions,
     unique_reward_codes) = md.extract_subjects_sessions(raw_data_path,
                                                         reward_task=1)

    start_time = time.time()

    lp_min = []
    lp_max = []

    hp_min = []
    hp_max = []

    for subj_id in unique_subjects:
        for session_n in unique_sessions:

            print('bandpass filtering data for subject {}'.format(subj_id) +
                  ' session {}'.format(session_n))

            _, _, reward_code = ep.find_data_files(subj_id=subj_id,
                                                   session_n=session_n,
                                                   reward_task=1,
                                                   lum_task=0,
                                                   raw_data_path=raw_data_path)

            reward_samples = ep.read_hdf5('samples',
                                          subj_id,
                                          session_n,
                                          processed_data_path,
                                          reward_code=reward_code,
                                          id_str='corr')
            reward_messages = ep.read_hdf5('messages',
                                           subj_id,
                                           session_n,
                                           processed_data_path,
                                           reward_code=reward_code,
                                           id_str='corr')
            reward_events = ep.read_hdf5('events',
                                         subj_id,
                                         session_n,
                                         processed_data_path,
                                         reward_code=reward_code,
                                         id_str='corr')

            reward_samples = bp.high_bandpass_filter(reward_samples)
            reward_samples = bp.low_bandpass_filter(reward_samples)

            lp_min.append(np.nanmin(reward_samples.lowpass_pupil_diameter))
            lp_max.append(np.nanmax(reward_samples.lowpass_pupil_diameter))

            hp_min.append(np.nanmin(reward_samples.highpass_pupil_diameter))
            hp_max.append(np.nanmax(reward_samples.highpass_pupil_diameter))

    end_time = time.time()

    time_elapsed = end_time - start_time
    print('time elapsed: ', time_elapsed)

    print('min lowpass values: ', np.nanmin(lp_min))
    print('max lowpass values: ', np.nanmax(lp_max))

    print('min highpass values: ', np.nanmin(hp_min))
    print('max highpass values: ', np.nanmax(hp_max))
Exemple #15
0
def main():

    (raw_data_path, intermediate_data_path, processed_data_path,
     figure_path) = cf.path_config()

    (unique_subjects, unique_sessions,
     unique_reward_codes) = md.extract_subjects_sessions(raw_data_path,
                                                         reward_task=1)

    start_time = time.time()

    for subj_id in unique_subjects:
        for session_n in unique_sessions:

            print('processing subject {}'.format(subj_id) +
                  ' session {}'.format(session_n))

            time.sleep(1)

            _, _, reward_code = ep.find_data_files(subj_id=subj_id,
                                                   session_n=session_n,
                                                   reward_task=1,
                                                   lum_task=0,
                                                   raw_data_path=raw_data_path)

            reward_samples = ep.read_hdf5('samples',
                                          subj_id,
                                          session_n,
                                          intermediate_data_path,
                                          reward_code=reward_code,
                                          id_str='seg')
            reward_messages = ep.read_hdf5('messages',
                                           subj_id,
                                           session_n,
                                           intermediate_data_path,
                                           reward_code=reward_code,
                                           id_str='seg')
            reward_events = ep.read_hdf5('events',
                                         subj_id,
                                         session_n,
                                         intermediate_data_path,
                                         reward_code=reward_code,
                                         id_str='seg')

            deblinked_reward_samples = dd.deblink(reward_samples,
                                                  reward_events)
            print('deblinking complete')
            outlier_removed_reward_samples = dd.outlier_removal(
                deblinked_reward_samples)
            print('outliers removed')
            interpolated_reward_samples = dd.interpolate(
                outlier_removed_reward_samples)
            print('data interpolated')

            hdf = ep.save_hdf5(interpolated_reward_samples,
                               reward_events,
                               reward_messages,
                               subj_id,
                               session_n,
                               processed_data_path,
                               reward_code=reward_code,
                               id_str='clean')
            print('clean data saved')

    end_time = time.time()

    time_elapsed = end_time - start_time
    print('time elapsed: ', time_elapsed)