Python multiindex_preproc Examples, tvc_benchmarker.multiindex_preproc Python Examples

Example #1

0

Show file

File: plot.py Project: 62442katieb/tvc_benchmarker

def plot_method_correlation(dfc, cmap='RdBu_r', fig_dir=None, fig_prefix=None, mi=[]):


    if isinstance(mi,str):
        mi = [mi]

    if fig_prefix:
        fig_prefix += '_'
    else:
        fig_prefix = ''

    if not fig_dir:
        fig_dir = './'

    if not os.path.exists(fig_dir):
        os.makedirs(fig_dir,exist_ok=True)

    params = {}
    for m in mi:
        params[m] = np.unique(dfc.index.get_level_values(m))
    mi,mi_num,mi_parameters,mi_param_list = tvc_benchmarker.multiindex_preproc(params,mi)

    for sim_it, mi_params in enumerate(mi_parameters):

        param_sname = [p[0] + '-' + str(p[1]) for p in list(zip(mi,mi_params))]
        param_sname = '_'.join(param_sname)
        if param_sname:
            param_sname = '_' + param_sname.replace(' ','')

        param_title = [p[0] + '=' + str(p[1]) for p in list(zip(mi,mi_params))]
        param_title = ','.join(param_title)
        param_title = param_title.replace(' ','').replace(',',', ')

        if mi_params == ():
            mi_params = np.arange(0,len(dfc))

        R=np.zeros([len(dfc.columns),len(dfc.columns)])
        for i,m1 in enumerate(sorted(dfc.columns)):
            for j,m2 in enumerate(sorted(dfc.columns)):
                notnan = np.intersect1d(np.where(np.isnan(dfc[m1][mi_params])==0),np.where(np.isnan(dfc[m2][mi_params])==0))
                R[i,j]= sps.spearmanr(dfc[m1][mi_params][notnan],dfc[m2][mi_params][notnan])[0]


        fig,ax=plt.subplots(1)

        pax=ax.pcolormesh(R,vmin=-1,vmax=1,cmap=cmap)
        tvc_benchmarker.square_axis(ax)

        ax.set_xticks(np.arange(0.5,len(dfc.columns)-0.49,1))
        ax.set_xticklabels(sorted(dfc.columns))
        ax.set_yticks(np.arange(0.5,len(dfc.columns)-0.49,1))
        ax.set_yticklabels(sorted(dfc.columns))
        ax.axis([0,len(dfc.columns),len(dfc.columns),0])

        plt.suptitle(param_title,fontsize=11)
        plt.tight_layout(rect=[0, 0, 1, 0.95])
        fig.colorbar(pax)
        plt.savefig(fig_dir + '/' + fig_prefix + 'dfc-method-correlation' + param_sname + '.pdf',r=600)

    plt.close('all')

Example #2

0

Show file

def gen_data_sim1(params, mi=None):
    """
    *INPUT*

    params is a dictionary which must contain the following:

    :n_samples: length of time series. Default=10,000
    :alpha: auto-correlation of time series. Can be single integer or np.array with length of mu
    :mu: Mean of auto-correlated time-series sampled from a multivariate Gaussian distribution. Must be of length 2 or greater.
    :sigma: Covariance matrix for multivariate Gaussian distribution. Array or list with shape of (len(mu),len(mu)).
    :randomseed: set random seed

    *LIMITATIONS*

    Mean and sigma must always stay the same.
    n_samples cannot be multiindex

    *RETURNS*

    :df: pandas dataframe with timeseries_1, timeseries_2.

    """

    np.random.seed(params['randomseed'])
    # generate data

    # Check multiindex and get number of each multiindex
    mi, mi_num, mi_parameters, mi_param_list = tvc_benchmarker.multiindex_preproc(
        params, mi)

    x = np.zeros([2, params['n_samples']] + mi_num)
    x = x.reshape([2, int(np.prod(x.shape) / 2)])

    for sim_it, mi_params in enumerate(mi_parameters):

        d = dict(params)
        for i in range(0, len(mi)):
            d[mi[i]] = mi_params[i]

        x_start = d['n_samples'] * sim_it
        x_end = d['n_samples'] * (sim_it + 1)
        w = np.random.multivariate_normal(d['mu'], d['sigma'],
                                          d['n_samples']).transpose()
        x[:, x_start:x_end] = np.array(w)
        for t in range(1, d['n_samples']):
            x[:, (d['n_samples'] * sim_it) +
              t] = d['alpha'] * x[:,
                                  (d['n_samples'] * sim_it) + t - 1] + w[:, t]

    multi_ind = pd.MultiIndex.from_product(
        (mi_param_list) + [np.arange(0, d['n_samples'])], names=mi + ['time'])
    df = pd.DataFrame(data={
        'timeseries_1': x[0, :],
        'timeseries_2': x[1, :]
    },
                      index=multi_ind)
    return df

Example #3

0

Show file

File: plot.py Project: 62442katieb/tvc_benchmarker

def plot_dfc_timeseries(dfc, limitaxis=500, cm='Set2', fig_dir = None, fig_prefix=None,mi=[]):

    if isinstance(mi,str):
        mi = [mi]

    if fig_prefix:
        fig_prefix += '_'
    else:
        fig_prefix = ''

    if not fig_dir:
        fig_dir = './'

    if not os.path.exists(fig_dir):
        os.makedirs(fig_dir,exist_ok=True)

    params = {}
    for m in mi:
        params[m] = np.unique(dfc.index.get_level_values(m))
    mi,mi_num,mi_parameters,mi_param_list = tvc_benchmarker.multiindex_preproc(params,mi)

    colormap=tvc_benchmarker.get_discrete_colormap(cm)

    for sim_it, mi_params in enumerate(mi_parameters):

        param_sname = [p[0] + '-' + str(p[1]) for p in list(zip(mi,mi_params))]
        param_sname = '_'.join(param_sname)
        if param_sname:
            param_sname = '_' + param_sname.replace(' ','')

        param_title = [p[0] + '=' + str(p[1]) for p in list(zip(mi,mi_params))]
        param_title = ','.join(param_title)
        param_title = param_title.replace(' ','').replace(',',', ')

        if mi_params == ():
            mi_params = np.arange(0,len(dfc))

        fig,ax=plt.subplots(len(dfc.columns), 1, sharex=True,sharey=True, figsize=(5,len(dfc.columns)*2))

        for i,dfc_method in enumerate(sorted(dfc.columns)):

            ax[i].plot(dfc[dfc_method][mi_params][:limitaxis].values,color=colormap(i),alpha=0.5,linewidth=2)
            ax[i].set_ylabel('DFC ('+ dfc_method + ')')
            ax[i].get_yaxis().set_major_locator(LinearLocator(numticks=5))
            ax[i].set_xlim(1,limitaxis)

        ax[-1].set_xlabel('time')

        plt.suptitle(param_title,fontsize=11)
        plt.tight_layout(rect=[0, 0, 1, 0.95])

        plt.savefig(fig_dir + '/' + fig_prefix + 'dfc-timeseries' + param_sname + '.pdf',r=600)

    plt.close('all')

Example #4

0

Show file

def calc_new_method(x, new_method, params_new_method=None, mi='alpha'):

    if params_new_method['name'] == None:
        params_new_method['name'] = new_method.__name__

    mi = list(x.index.names)
    if len(x.index.names) > 1:
        mi.remove('time')
    elif len(x.index.names) == 1 and x.index.names == [None]:
        x.index.names = ['time']
    if x.index.names == ['time']:
        mi = []

    params = {}
    for m in mi:
        params[m] = np.unique(x.index.get_level_values(m))
    mi, mi_num, mi_parameters, mi_param_list = tvc_benchmarker.multiindex_preproc(
        params, mi)

    dfc_estimate = []

    for sim_it, mi_params in enumerate(mi_parameters):

        if mi_params == ():
            mi_params = np.arange(0, len(x))

        time_points = int(
            len(x.index.get_level_values('time')) / len(mi_parameters))

        tmp = np.array(
            new_method(
                np.array([
                    x['timeseries_1'][mi_params], x['timeseries_2'][mi_params]
                ]), **params_new_method['params']))
        # Fix the output in case it is no node,node,time (for full entire timeseries)
        if len(tmp) != time_points:
            window = int((time_points - len(tmp)) / 2)
            tmp = np.lib.pad(tmp,
                             window,
                             mode='constant',
                             constant_values=np.nan)
        if len(tmp) == time_points - 1:
            tmp = np.hstack([np.nan, tmp])
        if len(tmp.shape) == 3:
            connectivity = tmp[0, 1, :]
        else:
            connectivity = tmp

        dfc_estimate.append(connectivity)

    dfc_estimate = np.concatenate(dfc_estimate)

    return dfc_estimate

Example #5

0

Show file

File: dfc_evaluate.py Project: 62442katieb/tvc_benchmarker

def model_dfc(x,
              dfc,
              dat_dir,
              model_prefix,
              bayes_model='bayes_model',
              mi='alpha',
              model_params={}):
    """
    General stats functions that calls the bayes_model, saves the output.

    **Input**

    :x: raw time series (DF)
    :dfc: dynamic connectivity estimates (DF)
    :dat_dir: Place to save the stats data.
    :model_predix: Prefix name for saved file
    :model_params: string of parameters for bayes_model function
    """
    if model_params == None:
        model_params = ''

    if isinstance(mi, str):
        mi = [mi]

    params = {}
    for m in mi:
        params[m] = np.unique(dfc.index.get_level_values(m))
    mi, mi_num, mi_parameters, mi_param_list = tvc_benchmarker.multiindex_preproc(
        params, mi)

    for sim_it, mi_params in enumerate(mi_parameters):
        for method in dfc.columns:
            plt.close('all')
            X = dfc[method][mi_params]
            Y = x['covariance_parameter'][mi_params][X.index]
            trace_and_model = tvc_benchmarker.bayes_model(X, Y, **model_params)
            #Save data
            param_sname = [
                p[0] + '-' + str(p[1]) for p in list(zip(mi, mi_params))
            ]
            param_sname = '_'.join(param_sname)
            param_sname = '_' + param_sname.replace(' ', '')
            file_name = model_prefix + '_' + 'method-' + method + param_sname
            tvc_benchmarker.trace_plot(dat_dir, file_name, trace_and_model[0])
            tvc_benchmarker.save_bayes_model(dat_dir, file_name,
                                             trace_and_model)

Example #6

0

Show file

File: plot.py Project: 62442katieb/tvc_benchmarker

def plot_fluctuating_covariance(x, fig_dir = None, lags=10,limitaxis=500,cm = 'Set2',mi='alpha', fig_prefix=None):

#    if labels == None:
#        labels=np.unique(x.index.get_level_values(mi))

    if isinstance(mi,str):
        mi = [mi]

    if not fig_dir:
        fig_dir = './'

    if fig_prefix:
        fig_prefix += '_'
    else:
        fig_prefix = ''

    if not os.path.exists(fig_dir):
        os.makedirs(fig_dir,exist_ok=True)

    params = {}
    for m in mi:
        params[m] = np.unique(x.index.get_level_values(m))
    mi,mi_num,mi_parameters,mi_param_list = tvc_benchmarker.multiindex_preproc(params,mi)

    colormap=tvc_benchmarker.get_discrete_colormap(cm)

    for sim_it, mi_params in enumerate(mi_parameters):

        param_sname = [p[0] + '-' + str(p[1]) for p in list(zip(mi,mi_params))]
        param_sname = '_'.join(param_sname)
        if param_sname:
            param_sname = '_' + param_sname.replace(' ','')

        param_title = [p[0] + '=' + str(p[1]) for p in list(zip(mi,mi_params))]
        param_title = ','.join(param_title)
        param_title = param_title.replace(' ','').replace(',',', ')

        if mi_params == ():
            mi_params = np.arange(0,len(x))

        covariance_autocorrelation = tvc_benchmarker.autocorr(x['covariance_parameter'][mi_params],lags=lags)

        # Create grid
        fig = plt.figure()
        ax = []
        ax.append(plt.subplot2grid((2,2),(0,0),colspan=2))
        ax.append(plt.subplot2grid((2,2),(1,0)))
        ax.append(plt.subplot2grid((2,2),(1,1)))

        ax[0].plot(np.arange(1,limitaxis+1),x['covariance_parameter'][mi_params][:limitaxis],color=colormap(0),alpha=0.5,linewidth=2)
        ax[0].set_xlabel('Time')
        ax[0].set_ylabel(r'Covariance ($r_t$)')

        ymin = x['covariance_parameter'][mi_params][:limitaxis].min()
        ymax = x['covariance_parameter'][mi_params][:limitaxis].max()
        ax[0].axis([1,limitaxis+1,np.around(ymin-0.05,1),np.around(ymax+0.05,1)])


        ax[1].hist(x['covariance_parameter'][mi_params],np.arange(-.1,1,0.02),color=colormap(1),alpha=0.9,linewidth=0,histtype='stepfilled',normed='true')
        ax[1].set_xlabel('Covariance')
        ax[1].set_ylabel('Frequency')
        xmin = x['covariance_parameter'][mi_params].min()
        xmax = x['covariance_parameter'][mi_params].max()
        ax[1].axis([np.around(xmin-0.05,1),np.around(xmax+0.05,1),0,np.ceil(ax[1].get_ylim()[-1])])

        tvc_benchmarker.square_axis(ax[1])

        ax[2].plot(np.arange(0,11),covariance_autocorrelation,color=colormap(2),alpha=0.9,linewidth=2)
        ax[2].set_ylabel('Correlation (r)')
        ax[2].set_xlabel('Lag')
        ymin = covariance_autocorrelation.min()
        ymax = 1
        ax[2].axis([0,10,np.around(ymin-0.05,1),np.around(ymax+0.05,1)])

        tvc_benchmarker.square_axis(ax[2])
        plt.suptitle(param_title,fontsize=11)
        plt.tight_layout(rect=[0, 0, 1, 0.95])

        plt.savefig(fig_dir + '/' + fig_prefix + 'fluctuating-covariance' + param_sname + '.pdf',r=600)

    plt.close('all')

Example #7

0

Show file

File: plot.py Project: 62442katieb/tvc_benchmarker

def plot_betadfc_distribution(dfc, dat_dir, fig_dir = None, model_prefix=None, burn=1000, mi='alpha', cm='Set2'):

    if isinstance(mi,str):
        mi = [mi]

    if model_prefix:
        model_prefix += '_'

    if not fig_dir:
        fig_dir = './'

    if not os.path.exists(fig_dir):
        os.makedirs(fig_dir,exist_ok=True)

    params = {}
    for m in mi:
        params[m] = np.unique(dfc.index.get_level_values(m))
    mi,mi_num,mi_parameters,mi_param_list = tvc_benchmarker.multiindex_preproc(params,mi)

    colormap=tvc_benchmarker.get_discrete_colormap(cm)

    for sim_it, mi_params in enumerate(mi_parameters):

        param_sname = [p[0] + '-' + str(p[1]) for p in list(zip(mi,mi_params))]
        param_sname = '_'.join(param_sname)
        if param_sname:
            param_sname = '_' + param_sname.replace(' ','')

        param_title = [p[0] + '=' + str(p[1]) for p in list(zip(mi,mi_params))]
        param_title = ','.join(param_title)
        param_title = param_title.replace(' ','').replace(',',', ')

        if mi_params == ():
            mi_params = np.arange(0,len(dfc))

        fig,ax=plt.subplots(len(dfc.columns),sharex=True,sharey=True,figsize=(5,len(dfc.columns)))

        beta_col = []
        lines = []
        for i,method in enumerate(sorted(dfc.columns)):
            beta_dfc=tvc_benchmarker.load_bayes_model(dat_dir,model_prefix + 'method-' + method + param_sname)[0][burn:].get_values('beta')
            #Plot
            ltmp = ax[i].hist(beta_dfc,np.arange(-1,1,0.001),histtype='stepfilled',color=colormap(i),density=True,alpha=0.4, linewidth=2,label=method)
            lines.append(ltmp)
            ax[i].set_yticklabels([])
            ax[i].set_ylabel(method)
            beta_col.append(beta_dfc)
            #ax[i].set_ylabel('Posterior Frequency (' + method + ')')

        beta_col = np.vstack(beta_col)

        xmin = beta_col.min()
        xmax = beta_col.max()
        ax[0].get_yaxis().set_major_locator(LinearLocator(numticks=4))
        ax[0].set_xlim([np.around(xmin-0.005,2),np.around(xmax+0.005,2)])

        ax[-1].set_xlabel('Posterior (' + r'$β$' + ')')

        fig.suptitle(param_title,fontsize=11)
        fig.tight_layout(rect=[0, 0, 1, 0.95])

        plt.savefig(fig_dir + '/' + model_prefix + 'beta-posterior' + param_sname + '.pdf',r=600)

    plt.close('all')

Example #8

0

Show file

File: plot.py Project: 62442katieb/tvc_benchmarker

def plot_timeseries(x,plot_autocorr='no',fig_dir=None,fig_prefix=None,cm='Set2',limitaxis=100,mi='alpha'):


    if isinstance(mi,str):
        mi = [mi]

    if fig_prefix:
        fig_prefix += '_'
    else:
        fig_prefix = ''

    if not fig_dir:
        fig_dir = './'

    if not os.path.exists(fig_dir):
        os.makedirs(fig_dir,exist_ok=True)

    params = {}
    for m in mi:
        params[m] = np.unique(x.index.get_level_values(m))
    mi,mi_num,mi_parameters,mi_param_list = tvc_benchmarker.multiindex_preproc(params,mi)

    colormap=tvc_benchmarker.get_discrete_colormap(cm)

    for sim_it, mi_params in enumerate(mi_parameters):

        param_sname = [p[0] + '-' + str(p[1]) for p in list(zip(mi,mi_params))]
        param_sname = '_'.join(param_sname)
        if param_sname:
            param_sname = '_' + param_sname.replace(' ','')

        param_title = [p[0] + '=' + str(p[1]) for p in list(zip(mi,mi_params))]
        param_title = ','.join(param_title)
        param_title = param_title.replace(' ','').replace(',',', ')

        if mi_params == ():
            mi_params = np.arange(0,len(x))

        if plot_autocorr == 'no':

            fig,ax=plt.subplots(1)
            ax.plot(np.arange(1,limitaxis+1),x['timeseries_1'][mi_params][:limitaxis],color=colormap(0),alpha=0.9,linewidth=2)
            ax.plot(np.arange(1,limitaxis+1),x['timeseries_2'][mi_params][:limitaxis],color=colormap(1),alpha=0.9,linewidth=2)
            ax.set_xlim(1,limitaxis)
            ax.set_ylabel('Signal Amplitude')
            ax.set_xlabel('Time')

        else:

            autocorrelation = np.array([tvc_benchmarker.autocorr(x[ts][mi_params]) for ts in ['timeseries_1','timeseries_2']])

            fig=plt.figure()
            ax=[]
            ax.append(plt.subplot2grid((2,3),(0,0),colspan=3))
            for n in range(0,3):
                ax.append(plt.subplot2grid((2,3),(1,n)))

            # Plot 1: raw time series
            ax[0].plot(np.arange(1,limitaxis+1),x['timeseries_1'][mi_params][:limitaxis],color=colormap(0),alpha=0.9,linewidth=2)
            ax[0].plot(np.arange(1,limitaxis+1),x['timeseries_2'][mi_params][:limitaxis],color=colormap(1),alpha=.9,linewidth=2)
            ax[0].set_xlim(1,limitaxis)
            ax[0].set_ylabel('Signal Amplitude')
            ax[0].set_xlabel('Time')

            # Plot 2 and 3: autocorrelation of timeseries 1 and 2
            for p in range(1,3):
                ax[p].plot(np.arange(0,autocorrelation.shape[1]),autocorrelation[p-1,:],color=colormap(p-1),alpha=0.9,linewidth=2)
                ax[p].set_ylabel('Correlation (r)')
                ax[p].set_xlabel('Lag')
                ax[p].axis([0,autocorrelation.shape[1]-1,0,1])
                ax[p].set_yticks(np.arange(0,1.05,0.25))
                ax[p].set_xticks(np.arange(0,autocorrelation.shape[1],2))

            # Plot 4: correlation of timeseries 1 and 2
            cmap = sns.cubehelix_palette(start=1/3, light=1, as_cmap=True)
            ax[3] = sns.kdeplot(x['timeseries_1'][mi_params], x['timeseries_2'][mi_params], shade=True,cmap=cmap)
            ax[3].set_xlabel('Signal 1 amplitude')
            ax[3].set_ylabel('Signal 2 amplitude')

            [tvc_benchmarker.square_axis(ax[n]) for n in [1,2,3]]

        plt.suptitle(param_title,fontsize=11)
        plt.tight_layout(rect=[0, 0, 1, 0.95])
        plt.savefig(fig_dir + '/' + fig_prefix + 'raw-timeseries' + param_sname + '.pdf',r=600)

    plt.close('all')

Example #9

0

Show file

File: dfc_evaluate.py Project: 62442katieb/tvc_benchmarker

def calc_waic(dfc,
              model_dir,
              save_dir,
              file_prefix=None,
              mi='alpha',
              burn=1000):
    """
    Calculates WAIC of a bayes model, saves table


    **Input**

    :dfc: dfc dataframe
    :model_dir: where the bayesian models are saved
    :save_dir: where to save the tables
    :file_prefix: appended model name
    :mi: Loop over multiple models of this multiindex
    :burn: how many from start of trace to discard.

    **Returns**

    :waic: (numpy array)
    """
    if file_prefix:
        file_prefix += '_'

    if isinstance(mi, str):
        mi = [mi]

    params = {}
    for m in mi:
        params[m] = np.unique(dfc.index.get_level_values(m))
    mi, mi_num, mi_parameters, mi_param_list = tvc_benchmarker.multiindex_preproc(
        params, mi)

    for sim_it, mi_params in enumerate(mi_parameters):

        param_sname = [
            p[0] + '-' + str(p[1]) for p in list(zip(mi, mi_params))
        ]
        param_sname = '_'.join(param_sname)
        param_sname = '_' + param_sname.replace(' ', '')

        waic = np.zeros([len(dfc.columns), 3])
        for i, method in enumerate(dfc.columns):
            file_name = file_prefix + 'method-' + method + param_sname
            tm = tvc_benchmarker.load_bayes_model(model_dir, file_name)
            waic[i, :] = np.array(pm.stats.waic(tm[0][burn:], tm[1])[0:3])

        odr = np.argsort(waic[:, 0])
        delta_waic = waic[:, 0] - waic[odr[0], 0]

        #Create table for tabulate
        tablelst = [["Model", "WAIC", "WAIC SE", "$\Delta$ WAIC"]]
        for i in odr:
            tablelst.append(
                [dfc.columns[i], waic[i, 0], waic[i, 1], delta_waic[i]])
        #Make markdown table and save
        mdtable = tabulate.tabulate(tablelst,
                                    headers="firstrow",
                                    tablefmt='simple')
        with open(
                save_dir + '/' + file_prefix + 'waictable' + param_sname +
                '.md', 'w') as f:
            f.write(mdtable)
        f.close()
        print(mdtable)

    return waic

Example #10

0

Show file

def dfc_calc(data,methods=['SW','TSW','SD','JC','TD'],sw_window=63,taper_name='norm',taper_properties=[0,10],sd_distance='euclidean',mtd_window=7,mi='alpha',colind=None):
    """
    Required parameters for the various differnet methods:

    If method == 'SW'
        sw_window = [Integer]
            Length of sliding window

    If method == 'TSW'
        sw_window = [Integer]
            Length of sliding window
        taper_name = [string]
            Name of scipy.stats distribution used (see teneto.derive.derive for more information)
        taper_properties = [list]
            List of the different scipy.stats.[taper_name] properties. E.g. if taper_name = 'norm'; taper_properties = [0,10] with me the mean and standard deviation of the distribution.
    If method == 'SD'
        sd_distance = [string]
            Distance funciton used to calculate the similarity between time-points. Can be any of the distances functions in scipy.spatial.distance.
    if method == 'JC'
        There are no parmaeters, have empty dictionary as parameter input.
    if method == 'MTD'
        mtd_window= [Integer]
            Length of window

    # mi='alpha'
    """

    # If data is a string, load precalcuated data
    if isinstance(data, str):

        if data == 'sim-1' and not colind:
            colind = 1
        elif (data == 'sim-2' or data == 'sim-3' or data == 'sim-4') and not colind:
            colind = 2
        elif colind:
            pass
        else:
            raise ValueError('unknown simulation. Input must be  "sim-1", "sim-2", "sim-3" or "sim-4"')
        df = pd.read_csv(tvc_benchmarker.__path__[0] + '/data/dfc/' + data + '_dfc.csv',index_col=np.arange(0,colind))
        # Get methods
        requested_methods = list(set(methods).intersection(df.columns))
        df[requested_methods]

    #Otherwise calculate
    else:

        # Make methods variable a list if single string is given
        if isinstance(methods,str):
            methods = [methods]

        if isinstance(mi,str):
            mi = [mi]

        dfc={}
        params = {}
        for m in mi:
            params[m] = np.unique(data.index.get_level_values(m))
        mi,mi_num,mi_parameters,mi_param_list = tvc_benchmarker.multiindex_preproc(params,mi)


        #Sliding window
        if 'SW' in methods:

            dfc['SW'] = []

            dfc_params={}
            dfc_params['windowsize']=sw_window
            dfc_params['method'] = 'slidingwindow'
            dfc_params['dimord'] = 'node,time'
            dfc_params['postpro'] = 'fisher'
            dfc_params['report'] = 'no'

            if mi_parameters[0]:
                # Do this if there are multiple mi parameters
                for sim_it, mi_params in enumerate(mi_parameters):
                    ts1 = data['timeseries_1'][mi_params]
                    ts2 = data['timeseries_2'][mi_params]
                    connectivity = teneto.derive.derive(np.array([ts1,ts2]),dfc_params)[0,1,:]
                    dfc['SW'].append(np.lib.pad(connectivity,int((sw_window-1)/2),mode='constant',constant_values=np.nan))
            # Otherwise do this
            else:
                ts1 = data['timeseries_1']
                ts2 = data['timeseries_2']
                connectivity = teneto.derive.derive(np.array([ts1,ts2]),dfc_params)[0,1,:]
                dfc['SW'].append(np.lib.pad(connectivity,int((sw_window-1)/2),mode='constant',constant_values=np.nan))
            # Line up all appened arrays
            dfc['SW'] = np.concatenate(dfc['SW'])


        #Tapered sliding window
        if 'TSW' in methods:

            dfc['TSW'] = []

            dfc_params={}
            dfc_params['windowsize']=sw_window
            dfc_params['distribution']=taper_name
            dfc_params['distribution_params']=taper_properties
            dfc_params['method'] = 'taperedslidingwindow'
            dfc_params['dimord'] = 'node,time'
            dfc_params['postpro'] = 'fisher'
            dfc_params['report'] = 'no'

            if mi_parameters[0]:
                # Do this if there are multiple mi parameters
                for sim_it, mi_params in enumerate(mi_parameters):
                    ts1 = data['timeseries_1'][mi_params]
                    ts2 = data['timeseries_2'][mi_params]
                    connectivity = teneto.derive.derive(np.array([ts1,ts2]),dfc_params)[0,1,:]
                    dfc['TSW'].append(np.lib.pad(connectivity,int((sw_window-1)/2),mode='constant',constant_values=np.nan))
            # Otherwise do this
            else:
                ts1 = data['timeseries_1']
                ts2 = data['timeseries_2']
                connectivity = teneto.derive.derive(np.array([ts1,ts2]),dfc_params)[0,1,:]
                dfc['TSW'].append(np.lib.pad(connectivity,int((sw_window-1)/2),mode='constant',constant_values=np.nan))
            # Line up all appened arrays
            dfc['TSW'] = np.concatenate(dfc['TSW'])

        #Spatial distance
        if 'SD' in methods:

            dfc['SD'] = []

            dfc_params={}
            dfc_params['distance']='euclidean'
            dfc_params['method'] = 'spatialdistance'
            dfc_params['dimord'] = 'node,time'
            dfc_params['postpro'] = 'fisher'
            dfc_params['report'] = 'no'

            if mi_parameters[0]:
                # Do this if there are multiple mi parameters
                for sim_it, mi_params in enumerate(mi_parameters):
                    ts1 = data['timeseries_1'][mi_params]
                    ts2 = data['timeseries_2'][mi_params]
                    connectivity = teneto.derive.derive(np.array([ts1,ts2]),dfc_params)[0,1,:]
                    dfc['SD'].append(connectivity)
            # Otherwise do this
            else:
                ts1 = data['timeseries_1']
                ts2 = data['timeseries_2']
                connectivity = teneto.derive.derive(np.array([ts1,ts2]),dfc_params)[0,1,:]
                dfc['SD'].append(connectivity)
            # Line up all appened arrays
            dfc['SD'] = np.concatenate(dfc['SD'])


        #Jackknife
        if 'JC' in methods:

            dfc['JC'] = []

            dfc_params={}
            dfc_params['method'] = 'jackknife'
            dfc_params['dimord'] = 'node,time'
            dfc_params['postpro'] = 'fisher'
            dfc_params['report'] = 'no'

            if mi_parameters[0]:
                # Do this if there are multiple mi parameters
                for sim_it, mi_params in enumerate(mi_parameters):
                    ts1 = data['timeseries_1'][mi_params]
                    ts2 = data['timeseries_2'][mi_params]
                    connectivity = teneto.derive.derive(np.array([ts1,ts2]),dfc_params)[0,1,:]
                    dfc['JC'].append(connectivity)
            # Otherwise do this
            else:
                ts1 = data['timeseries_1']
                ts2 = data['timeseries_2']
                connectivity = teneto.derive.derive(np.array([ts1,ts2]),dfc_params)[0,1,:]
                dfc['JC'].append(connectivity)
            # Line up all appened arrays
            dfc['JC'] = np.concatenate(dfc['JC'])


        #Temporal derivative
        if 'MTD' in methods:

            dfc['TD'] = []

            dfc_params={}
            dfc_params['method'] = 'mtd'
            dfc_params['dimord'] = 'node,time'
            dfc_params['postpro'] = 'no'
            dfc_params['windowsize'] = mtd_window
            dfc_params['report'] = 'no'

            if mi_parameters[0]:
                # Do this if there are multiple mi parameters
                for sim_it, mi_params in enumerate(mi_parameters):
                    ts1 = data['timeseries_1'][mi_params]
                    ts2 = data['timeseries_2'][mi_params]
                    connectivity = teneto.derive.derive(np.array([ts1,ts2]),dfc_params)[0,1,:]
                    dfc['TD'].append(np.lib.pad(np.hstack([np.nan,connectivity]),int((mtd_window-1)/2),mode='constant',constant_values=np.nan))
            # Otherwise do this
            else:
                ts1 = data['timeseries_1']
                ts2 = data['timeseries_2']
                connectivity = teneto.derive.derive(np.array([ts1,ts2]),dfc_params)[0,1,:]
                dfc['TD'].append(np.lib.pad(np.hstack([np.nan,connectivity]),int((mtd_window-1)/2),mode='constant',constant_values=np.nan))
            # Line up all appened arrays
            dfc['TD'] = np.concatenate(dfc['TD'])




        df = pd.DataFrame(data=dfc, index=data.index)
    return df

Example #11

0

Show file

def gen_data_sim2(params, mi='alpha'):
    """
    *INPUT*

    Params is a dictionary which contains the following

    :n_samples: length of time series. Default=10,000
    :alpha: auto-correlation of time series. Can be single integer or np.array with length of mu
    :mu: Mean of auto-correlated time-series sampled from a multivariate Gaussian distribution. Must be a array/list of length 2 or 2xn_samples.
    :var: Variance of the time series. Integer or np.array with length of mu
    :covar_mu: Mean of the covariance of the time series.
    :covar_sigma: Variance of the covariance of the time series.
    :randomseed: set random seed

    Additionally, if there is a multi_index variable, this should be specified differently

    :mi: multi_index. list of variable names which have multiple parameters. These parameters should be in a list. E.g. if mi='mu', then mu becomes a list surrounding its contents. e.g. mu=[[0,0],[1,1]]

    *LIMITATIONS*

    As the parameters stand, only 2 time series can be generated (some minor modifications are needed to generate more).
    Input `var` must be integer and cannot vary between the time series.

    *RETURNS*

    :df: pandas dataframe with timeseries_1, timeseries_2, covariance_parameter. Table is multiindexed with alpha and time as the two indexes.

    """
    # Random seed
    np.random.seed(params['randomseed'])

    # Check multiindex and get number of each multiindex
    mi, mi_num, mi_parameters, mi_param_list = tvc_benchmarker.multiindex_preproc(
        params, mi)

    # Pre allocate output
    x = np.zeros([2, params['n_samples']] + mi_num)
    fluct_cv = np.zeros([params['n_samples']] + mi_num)

    x = x.reshape([2, int(np.prod(x.shape) / 2)])
    fluct_cv = fluct_cv.flatten()

    # Set preliminary arguments
    for sim_it, mi_params in enumerate(mi_parameters):

        d = dict(params)
        for i in range(0, len(mi)):
            d[mi[i]] = mi_params[i]

        # extend mu through timeseries if it is (list of) integers
        d['mu'] = np.array(d['mu'], ndmin=2)
        if d['mu'].shape[-1] != d['n_samples']:
            d['mu'] = np.tile(d['mu'].transpose(), d['n_samples'])

        # extend covar_mu through timeseries if is integer
        d['covar_mu'] = np.array(d['covar_mu'], ndmin=1)
        if d['covar_mu'].shape[-1] != d['n_samples']:
            d['covar_mu'] = np.tile(d['covar_mu'].transpose(), d['n_samples'])

        for t in range(0, d['n_samples']):
            # At first time point, no autocorrelation of covariance
            if t == 0:
                covar = np.random.normal(d['covar_mu'][t], d['covar_sigma'])
            else:
                covar = np.random.normal(
                    d['covar_mu'][t],
                    d['covar_sigma']) + d['alpha'] * fluct_cv[
                        (d['n_samples'] * sim_it) + t - 1]
            x[:,
              (d['n_samples'] * sim_it) + t] = np.random.multivariate_normal(
                  d['mu'][:, t], [[d['var'], covar], [covar, d['var']]], 1)
            fluct_cv[(d['n_samples'] * sim_it) + t] = covar

    #x=np.reshape(x,[x.shape[0],np.prod(x.shape[1:])],order='F')
    #fluct_cv=np.reshape(fluct_cv,[np.prod(fluct_cv.shape)],order='F')

    if any(np.abs(fluct_cv) > 1):
        print(
            'TVC BENCHMARKER WARNING: some value(s) of r_t>1 or r_t<-1. Consider changing parameters.'
        )

    multi_ind = pd.MultiIndex.from_product(
        (mi_param_list) + [np.arange(0, d['n_samples'])], names=mi + ['time'])
    df = pd.DataFrame(data={
        'timeseries_1': x[0, :],
        'timeseries_2': x[1, :],
        'covariance_parameter': fluct_cv
    },
                      index=multi_ind)
    return df

Example #12

0

Show file

def gen_data_sim4(params, mi=None):
    """
    *INPUT*

    No input runs the default simulation.

    :n_samples: length of time series. Default=10,000
    :mu: Mean of auto-correlated time-series sampled from a multivariate Gaussian distribution. Must be a array/list of length 2 or 2xn_samples.
    :var: Variance of the time series. Integer or np.array with length of mu
    :covar_range: list of possible covariance
    :state_length: List of lists of possible times before covariance changes.
    :state_length_name: Name of each stat
    :randomseed: set random seed

    *LIMITATIONS*

    As the parameters stand, only 2 time series can be generated (some minor modifications are needed to generate more).
    Input `var` must be integer and cannot vary between the time series.

    *RETURNS*

    :df: pandas dataframe with timeseries_1, timeseries_2, covariance_parameter. Table is multiindexed with alpha and time as the two indexes.

    """

    # Random seed
    np.random.seed(params['randomseed'])

    # Check multiindex and get number of each multiindex
    mi, mi_num, mi_parameters, mi_param_list = tvc_benchmarker.multiindex_preproc(
        params, mi)

    # Pre allocate output
    x = np.zeros([2, params['n_samples']] + mi_num)
    x = x.reshape([2, int(np.prod(x.shape) / 2)])

    fluct_cv = np.zeros([params['n_samples'] * len(mi_parameters)])
    fluct_cv_state = np.zeros([params['n_samples'] * len(mi_parameters)])

    # Set preliminary arguments
    for sim_it, mi_params in enumerate(mi_parameters):

        d = dict(params)
        for i in range(0, len(mi)):
            d[mi[i]] = mi_params[i]

        # extend mu through timeseries if it is (list of) integers
        d['mu'] = np.array(d['mu'], ndmin=2)
        if d['mu'].shape[-1] != d['n_samples']:
            d['mu'] = np.tile(d['mu'].transpose(), d['n_samples'])

        covar_mu = np.array([])
        while len(covar_mu) < d['n_samples']:
            new_covariance = np.random.permutation(d['covar_range'])[0]
            covariance_length = np.random.permutation(d['state_length'])[0]
            covar_mu = np.hstack(
                [covar_mu,
                 np.tile(new_covariance, covariance_length)])

        covar_mu = covar_mu[:d['n_samples']]
        fluct_cv_state[(d['n_samples'] * sim_it):(d['n_samples'] *
                                                  (sim_it + 1))] = covar_mu
        for t in range(0, d['n_samples']):
            covar = np.random.normal(covar_mu[t], d['covar_sigma'])
            x[:,
              (d['n_samples'] * sim_it) + t] = np.random.multivariate_normal(
                  d['mu'][:, t], [[d['var'], covar], [covar, d['var']]], 1)
            fluct_cv[(d['n_samples'] * sim_it) + t] = covar

    # Reshape for pandas dataframe
    x = np.reshape(x, [x.shape[0], np.prod(x.shape[1:])], order='F')
    fluct_cv = np.reshape(fluct_cv, [np.prod(fluct_cv.shape)], order='F')
    fluct_cv_state = np.reshape(fluct_cv_state,
                                [np.prod(fluct_cv_state.shape)],
                                order='F')
    multi_ind = pd.MultiIndex.from_product(
        (mi_param_list + [np.arange(0, d['n_samples'])]), names=mi + ['time'])
    df = pd.DataFrame(data={
        'timeseries_1': x[0, :],
        'timeseries_2': x[1, :],
        'covariance_parameter': fluct_cv,
        'covariance_mean': fluct_cv_state
    },
                      index=multi_ind)

    return df