def convolve_column(df, col):
    categorical = df[col].dtype.name == 'category'
    arr = np.array(df[col])
    time = np.array(df.timestamp)
    time_cur = 0.
    start = 0
    out = [] 
    for i in range(len(df)):
        if i > 0 and time[i] < time[i-1]:
            start = i
            time_cur = 0.
        if time[i] > time_cur:
            time_cur += step
            if categorical:
                if i > start:
                    out.append(arr[i-1])
                else:
                    ## Take the initial category value from the first row in the series
                    out.append(arr[i])
            else:
                convolve = np.vectorize(lambda x: hrf(time_cur - x))
                if i > start:
                    t_conv = convolve(time[start:i])
                    p_conv = (t_conv*np.nan_to_num(arr[start:i])).sum()
                else:
                    p_conv = 0
                out.append(p_conv)
    if categorical:
        out = pd.Series(out, dtype='category')
    else:
        out = pd.Series(np.array(out), dtype='float')
    return out
Example #2
0
def convolve(X, y, first_obs, last_obs, columns):
    """
    Convolve predictors in **X** into timestamps in **y** using canonical HRF.

    :param X: ``pandas`` ``DataFrame``; impulse (predictor) data.
    :param y: ``pandas`` ``DataFrame``; response data.
    :param y: ``numpy`` ``array``; integer indices of first observation in X from time series in y.
    :param y: ``numpy`` ``array``; integer indices of last observation in X preceding each y in the same time series..
    :param series_ids: ``list`` of ``str``; column names whose jointly unique values define unique time series.
    :param columns: ``list`` of ``str``; column names to convolve.
    :return: ``pandas`` ``DataFrame``; matrix of convolved predictors
    """

    time_X = np.array(X.time)
    time_y = np.array(y.time)

    X = np.array(X[columns])
    X_conv = np.zeros((y.shape[0], X.shape[1]))

    for i in range(len(y)):
        s, e = first_obs[i], last_obs[i]
        hrf_weights = hrf(time_y[i] - time_X[s:e])[..., None]
        X_conv[i] = (X[s:e] * hrf_weights).sum(axis=0)

    X_conv = pd.DataFrame(X_conv, columns=columns, index=y.index)
    y = pd.concat([y, X_conv], axis=1)

    return y
Example #3
0
def convolve_column(df, col):
    categorical = df[col].dtype.name == 'category'
    arr = np.array(df[col])
    time = np.array(df.timestamp)
    time_cur = 0.
    start = 0
    out = []
    for i in range(len(df)):
        if i > 0 and time[i] < time[i - 1]:
            start = i
            time_cur = 0.
        if time[i] > time_cur:
            time_cur += step
            if categorical:
                if i > start:
                    out.append(arr[i - 1])
                else:
                    ## Take the initial category value from the first row in the series
                    out.append(arr[i])
            else:
                convolve = np.vectorize(lambda x: hrf(time_cur - x))
                if i > start:
                    t_conv = convolve(time[start:i])
                    p_conv = (t_conv * np.nan_to_num(arr[start:i])).sum()
                else:
                    p_conv = 0
                out.append(p_conv)
    if categorical:
        out = pd.Series(out, dtype='category')
    else:
        out = pd.Series(np.array(out), dtype='float')
    return out
Example #4
0
def convolve(X, y, first_obs, last_obs, columns):
    time_X = np.array(X.time)
    time_y = np.array(y.time)

    X = np.array(X[columns])
    X_conv = np.zeros((y.shape[0], X.shape[1]))

    for i in range(len(y)):
        if i == 0 or i % 1000 == 999  or i == len(y) - 1:
            sys.stderr.write('\r%d/%d' % (i+1, len(y)))
            sys.stderr.flush()
        s, e = first_obs[i], last_obs[i]
        hrf_weights = hrf(time_y[i] - time_X[s:e])[..., None]
        X_conv[i] = (X[s:e] * hrf_weights).sum(axis=0)

    X_conv = pd.DataFrame(X_conv, columns=columns, index=y.index)
    y = pd.concat([y, X_conv], axis=1)

    sys.stderr.write('\n')

    return y
    for i in range(len(series)):
        df_cur = series[i]
        if 'duration' in df_cur.columns:
            duration = df_cur['duration']
        else:
            duration = None
        X = df_cur[cols]
        impulse_times = df_cur.time.values
        max_response_time = int(np.ceil(df_cur.time.max()))
        if max_response_time % 2 != 0:
            max_response_time += 1
        tr = np.arange(0, max_response_time // args.step)
        response_times = tr * args.step + args.start
        D = response_times[..., None] - impulse_times[None, ...]
        G_mask = D >= 0
        G = hrf(D)
        G = np.where(G_mask, G, 0)
        if duration is not None:
            X = X.multiply(duration, axis=0)
        X_conv = np.dot(G, X)
        X_conv = pd.DataFrame(X_conv, columns=cols)
        X_conv['time'] = response_times
        X_conv['tr'] = tr
        for col, val in zip(args.grouping_columns, series_names[i]):
            X_conv[col] = val
        out.append(X_conv)

    out = pd.concat(out, axis=0)
    out.reset_index(drop=True, inplace=True)
    out['sampleid'] = 1
    out.sampleid = out.groupby(args.grouping_columns).sampleid.cumsum().apply(
Example #6
0
    t = np.array(df.time)[..., None]
    sys.stderr.write('Convolving sound power with canonical HRF...\n')
    soundPowerHRF = []
    for i in range(0, len(t), step):
        sys.stderr.write('\rRows completed: %d/%d' % (i, len(t)))
        sys.stderr.flush()

        doc_ix_cur = docid_ix[i:i + step]
        soundPowerHRF_cur = np.zeros((doc_ix_cur.shape[0], ))

        impulse = power_padded[doc_ix_cur]
        tau = tau_padded[doc_ix_cur]
        t_cur = t[i:i + step]

        valid = np.where(tau.sum(axis=1) > 0)
        impulse = impulse[valid]
        tau = tau[valid]
        t_cur = t_cur[valid]

        soundPowerHRF_cur[valid] = np.nan_to_num(
            impulse * hrf(t_cur - tau)).sum(axis=1) / 441.
        soundPowerHRF.append(soundPowerHRF_cur)

    soundPowerHRF = np.concatenate(soundPowerHRF, axis=0)

    sys.stderr.write('\n')

    df['soundPowerHRF'] = soundPowerHRF

df.to_csv(sys.stdout, sep=' ', na_rep='NaN', index=False)
import sys, argparse, pandas as pd, numpy as np
from mvpa2.misc.data_generators import double_gamma_hrf as hrf

argparser = argparse.ArgumentParser(description='Convolve data table using HRF')
argparser.add_argument('data', type=str, help='Path to data table')
argparser.add_argument('-s', '--step', type=float, default=2.0, help='Step size (in seconds) between fMRI samples')
argparser.add_argument('-d', '--doc_names', nargs='+', default=['Boar', 'Aqua', 'MatchstickSeller', 'KingOfBirds', 'Elvis', 'MrSticky', 'HighSchool', 'Roswell', 'Tulips', 'Tourettes'], help='List of document names in input data')
args, unknown = argparser.parse_known_args()

step = float(args.step)

convolve = np.vectorize(lambda x: hrf(time_cur + step - x))

doc_names = args.doc_names

def get_docid(timeseries):
    timeseries = np.array(timeseries)
    docid = [] 
    docix = 0
    for i in range(len(timeseries)):
        if i > 0 and timeseries[i] < timeseries[i-1]:
            docix += 1
        docid.append(doc_names[docix])
    return pd.Series(docid).astype('category')

def convolve_column(df, col):
    categorical = df[col].dtype.name == 'category'
    arr = np.array(df[col])
    time = np.array(df.timestamp)
    time_cur = 0.
    start = 0
Example #8
0
                       default=2.0,
                       help='Step size (in seconds) between fMRI samples')
argparser.add_argument('-d',
                       '--doc_names',
                       nargs='+',
                       default=[
                           'Boar', 'Aqua', 'MatchstickSeller', 'KingOfBirds',
                           'Elvis', 'MrSticky', 'HighSchool', 'Roswell',
                           'Tulips', 'Tourettes'
                       ],
                       help='List of document names in input data')
args, unknown = argparser.parse_known_args()

step = float(args.step)

convolve = np.vectorize(lambda x: hrf(time_cur + step - x))

doc_names = args.doc_names


def get_docid(timeseries):
    timeseries = np.array(timeseries)
    docid = []
    docix = 0
    for i in range(len(timeseries)):
        if i > 0 and timeseries[i] < timeseries[i - 1]:
            docix += 1
        docid.append(doc_names[docix])
    return pd.Series(docid).astype('category')

    t = np.array(df.time)[..., None]
    sys.stderr.write('Convolving sound power with canonical HRF...\n')
    soundPowerHRF = []
    for i in range(0, len(t), step):
        sys.stderr.write('\rRows completed: %d/%d' %(i, len(t)))
        sys.stderr.flush()

        doc_ix_cur = docid_ix[i:i+step]
        soundPowerHRF_cur = np.zeros((doc_ix_cur.shape[0],))

        impulse = power_padded[doc_ix_cur]
        tau = tau_padded[doc_ix_cur]
        t_cur = t[i:i+step]

        valid = np.where(tau.sum(axis=1) > 0)
        impulse = impulse[valid]
        tau = tau[valid]
        t_cur = t_cur[valid]

        soundPowerHRF_cur[valid] = np.nan_to_num(impulse * hrf(t_cur - tau)).sum(axis=1) / 441.
        soundPowerHRF.append(soundPowerHRF_cur)

    soundPowerHRF = np.concatenate(soundPowerHRF, axis=0)

    sys.stderr.write('\n')

    df['soundPowerHRF'] = soundPowerHRF

df.to_csv(sys.stdout, sep=' ', na_rep='nan', index=False)