def convolve_column(df, col): categorical = df[col].dtype.name == 'category' arr = np.array(df[col]) time = np.array(df.timestamp) time_cur = 0. start = 0 out = [] for i in range(len(df)): if i > 0 and time[i] < time[i-1]: start = i time_cur = 0. if time[i] > time_cur: time_cur += step if categorical: if i > start: out.append(arr[i-1]) else: ## Take the initial category value from the first row in the series out.append(arr[i]) else: convolve = np.vectorize(lambda x: hrf(time_cur - x)) if i > start: t_conv = convolve(time[start:i]) p_conv = (t_conv*np.nan_to_num(arr[start:i])).sum() else: p_conv = 0 out.append(p_conv) if categorical: out = pd.Series(out, dtype='category') else: out = pd.Series(np.array(out), dtype='float') return out
def convolve(X, y, first_obs, last_obs, columns): """ Convolve predictors in **X** into timestamps in **y** using canonical HRF. :param X: ``pandas`` ``DataFrame``; impulse (predictor) data. :param y: ``pandas`` ``DataFrame``; response data. :param y: ``numpy`` ``array``; integer indices of first observation in X from time series in y. :param y: ``numpy`` ``array``; integer indices of last observation in X preceding each y in the same time series.. :param series_ids: ``list`` of ``str``; column names whose jointly unique values define unique time series. :param columns: ``list`` of ``str``; column names to convolve. :return: ``pandas`` ``DataFrame``; matrix of convolved predictors """ time_X = np.array(X.time) time_y = np.array(y.time) X = np.array(X[columns]) X_conv = np.zeros((y.shape[0], X.shape[1])) for i in range(len(y)): s, e = first_obs[i], last_obs[i] hrf_weights = hrf(time_y[i] - time_X[s:e])[..., None] X_conv[i] = (X[s:e] * hrf_weights).sum(axis=0) X_conv = pd.DataFrame(X_conv, columns=columns, index=y.index) y = pd.concat([y, X_conv], axis=1) return y
def convolve_column(df, col): categorical = df[col].dtype.name == 'category' arr = np.array(df[col]) time = np.array(df.timestamp) time_cur = 0. start = 0 out = [] for i in range(len(df)): if i > 0 and time[i] < time[i - 1]: start = i time_cur = 0. if time[i] > time_cur: time_cur += step if categorical: if i > start: out.append(arr[i - 1]) else: ## Take the initial category value from the first row in the series out.append(arr[i]) else: convolve = np.vectorize(lambda x: hrf(time_cur - x)) if i > start: t_conv = convolve(time[start:i]) p_conv = (t_conv * np.nan_to_num(arr[start:i])).sum() else: p_conv = 0 out.append(p_conv) if categorical: out = pd.Series(out, dtype='category') else: out = pd.Series(np.array(out), dtype='float') return out
def convolve(X, y, first_obs, last_obs, columns): time_X = np.array(X.time) time_y = np.array(y.time) X = np.array(X[columns]) X_conv = np.zeros((y.shape[0], X.shape[1])) for i in range(len(y)): if i == 0 or i % 1000 == 999 or i == len(y) - 1: sys.stderr.write('\r%d/%d' % (i+1, len(y))) sys.stderr.flush() s, e = first_obs[i], last_obs[i] hrf_weights = hrf(time_y[i] - time_X[s:e])[..., None] X_conv[i] = (X[s:e] * hrf_weights).sum(axis=0) X_conv = pd.DataFrame(X_conv, columns=columns, index=y.index) y = pd.concat([y, X_conv], axis=1) sys.stderr.write('\n') return y
for i in range(len(series)): df_cur = series[i] if 'duration' in df_cur.columns: duration = df_cur['duration'] else: duration = None X = df_cur[cols] impulse_times = df_cur.time.values max_response_time = int(np.ceil(df_cur.time.max())) if max_response_time % 2 != 0: max_response_time += 1 tr = np.arange(0, max_response_time // args.step) response_times = tr * args.step + args.start D = response_times[..., None] - impulse_times[None, ...] G_mask = D >= 0 G = hrf(D) G = np.where(G_mask, G, 0) if duration is not None: X = X.multiply(duration, axis=0) X_conv = np.dot(G, X) X_conv = pd.DataFrame(X_conv, columns=cols) X_conv['time'] = response_times X_conv['tr'] = tr for col, val in zip(args.grouping_columns, series_names[i]): X_conv[col] = val out.append(X_conv) out = pd.concat(out, axis=0) out.reset_index(drop=True, inplace=True) out['sampleid'] = 1 out.sampleid = out.groupby(args.grouping_columns).sampleid.cumsum().apply(
t = np.array(df.time)[..., None] sys.stderr.write('Convolving sound power with canonical HRF...\n') soundPowerHRF = [] for i in range(0, len(t), step): sys.stderr.write('\rRows completed: %d/%d' % (i, len(t))) sys.stderr.flush() doc_ix_cur = docid_ix[i:i + step] soundPowerHRF_cur = np.zeros((doc_ix_cur.shape[0], )) impulse = power_padded[doc_ix_cur] tau = tau_padded[doc_ix_cur] t_cur = t[i:i + step] valid = np.where(tau.sum(axis=1) > 0) impulse = impulse[valid] tau = tau[valid] t_cur = t_cur[valid] soundPowerHRF_cur[valid] = np.nan_to_num( impulse * hrf(t_cur - tau)).sum(axis=1) / 441. soundPowerHRF.append(soundPowerHRF_cur) soundPowerHRF = np.concatenate(soundPowerHRF, axis=0) sys.stderr.write('\n') df['soundPowerHRF'] = soundPowerHRF df.to_csv(sys.stdout, sep=' ', na_rep='NaN', index=False)
import sys, argparse, pandas as pd, numpy as np from mvpa2.misc.data_generators import double_gamma_hrf as hrf argparser = argparse.ArgumentParser(description='Convolve data table using HRF') argparser.add_argument('data', type=str, help='Path to data table') argparser.add_argument('-s', '--step', type=float, default=2.0, help='Step size (in seconds) between fMRI samples') argparser.add_argument('-d', '--doc_names', nargs='+', default=['Boar', 'Aqua', 'MatchstickSeller', 'KingOfBirds', 'Elvis', 'MrSticky', 'HighSchool', 'Roswell', 'Tulips', 'Tourettes'], help='List of document names in input data') args, unknown = argparser.parse_known_args() step = float(args.step) convolve = np.vectorize(lambda x: hrf(time_cur + step - x)) doc_names = args.doc_names def get_docid(timeseries): timeseries = np.array(timeseries) docid = [] docix = 0 for i in range(len(timeseries)): if i > 0 and timeseries[i] < timeseries[i-1]: docix += 1 docid.append(doc_names[docix]) return pd.Series(docid).astype('category') def convolve_column(df, col): categorical = df[col].dtype.name == 'category' arr = np.array(df[col]) time = np.array(df.timestamp) time_cur = 0. start = 0
default=2.0, help='Step size (in seconds) between fMRI samples') argparser.add_argument('-d', '--doc_names', nargs='+', default=[ 'Boar', 'Aqua', 'MatchstickSeller', 'KingOfBirds', 'Elvis', 'MrSticky', 'HighSchool', 'Roswell', 'Tulips', 'Tourettes' ], help='List of document names in input data') args, unknown = argparser.parse_known_args() step = float(args.step) convolve = np.vectorize(lambda x: hrf(time_cur + step - x)) doc_names = args.doc_names def get_docid(timeseries): timeseries = np.array(timeseries) docid = [] docix = 0 for i in range(len(timeseries)): if i > 0 and timeseries[i] < timeseries[i - 1]: docix += 1 docid.append(doc_names[docix]) return pd.Series(docid).astype('category')
t = np.array(df.time)[..., None] sys.stderr.write('Convolving sound power with canonical HRF...\n') soundPowerHRF = [] for i in range(0, len(t), step): sys.stderr.write('\rRows completed: %d/%d' %(i, len(t))) sys.stderr.flush() doc_ix_cur = docid_ix[i:i+step] soundPowerHRF_cur = np.zeros((doc_ix_cur.shape[0],)) impulse = power_padded[doc_ix_cur] tau = tau_padded[doc_ix_cur] t_cur = t[i:i+step] valid = np.where(tau.sum(axis=1) > 0) impulse = impulse[valid] tau = tau[valid] t_cur = t_cur[valid] soundPowerHRF_cur[valid] = np.nan_to_num(impulse * hrf(t_cur - tau)).sum(axis=1) / 441. soundPowerHRF.append(soundPowerHRF_cur) soundPowerHRF = np.concatenate(soundPowerHRF, axis=0) sys.stderr.write('\n') df['soundPowerHRF'] = soundPowerHRF df.to_csv(sys.stdout, sep=' ', na_rep='nan', index=False)