def plot_method_correlation(dfc, cmap='RdBu_r', fig_dir=None, fig_prefix=None, mi=[]): if isinstance(mi,str): mi = [mi] if fig_prefix: fig_prefix += '_' else: fig_prefix = '' if not fig_dir: fig_dir = './' if not os.path.exists(fig_dir): os.makedirs(fig_dir,exist_ok=True) params = {} for m in mi: params[m] = np.unique(dfc.index.get_level_values(m)) mi,mi_num,mi_parameters,mi_param_list = tvc_benchmarker.multiindex_preproc(params,mi) for sim_it, mi_params in enumerate(mi_parameters): param_sname = [p[0] + '-' + str(p[1]) for p in list(zip(mi,mi_params))] param_sname = '_'.join(param_sname) if param_sname: param_sname = '_' + param_sname.replace(' ','') param_title = [p[0] + '=' + str(p[1]) for p in list(zip(mi,mi_params))] param_title = ','.join(param_title) param_title = param_title.replace(' ','').replace(',',', ') if mi_params == (): mi_params = np.arange(0,len(dfc)) R=np.zeros([len(dfc.columns),len(dfc.columns)]) for i,m1 in enumerate(sorted(dfc.columns)): for j,m2 in enumerate(sorted(dfc.columns)): notnan = np.intersect1d(np.where(np.isnan(dfc[m1][mi_params])==0),np.where(np.isnan(dfc[m2][mi_params])==0)) R[i,j]= sps.spearmanr(dfc[m1][mi_params][notnan],dfc[m2][mi_params][notnan])[0] fig,ax=plt.subplots(1) pax=ax.pcolormesh(R,vmin=-1,vmax=1,cmap=cmap) tvc_benchmarker.square_axis(ax) ax.set_xticks(np.arange(0.5,len(dfc.columns)-0.49,1)) ax.set_xticklabels(sorted(dfc.columns)) ax.set_yticks(np.arange(0.5,len(dfc.columns)-0.49,1)) ax.set_yticklabels(sorted(dfc.columns)) ax.axis([0,len(dfc.columns),len(dfc.columns),0]) plt.suptitle(param_title,fontsize=11) plt.tight_layout(rect=[0, 0, 1, 0.95]) fig.colorbar(pax) plt.savefig(fig_dir + '/' + fig_prefix + 'dfc-method-correlation' + param_sname + '.pdf',r=600) plt.close('all')
def gen_data_sim1(params, mi=None): """ *INPUT* params is a dictionary which must contain the following: :n_samples: length of time series. Default=10,000 :alpha: auto-correlation of time series. Can be single integer or np.array with length of mu :mu: Mean of auto-correlated time-series sampled from a multivariate Gaussian distribution. Must be of length 2 or greater. :sigma: Covariance matrix for multivariate Gaussian distribution. Array or list with shape of (len(mu),len(mu)). :randomseed: set random seed *LIMITATIONS* Mean and sigma must always stay the same. n_samples cannot be multiindex *RETURNS* :df: pandas dataframe with timeseries_1, timeseries_2. """ np.random.seed(params['randomseed']) # generate data # Check multiindex and get number of each multiindex mi, mi_num, mi_parameters, mi_param_list = tvc_benchmarker.multiindex_preproc( params, mi) x = np.zeros([2, params['n_samples']] + mi_num) x = x.reshape([2, int(np.prod(x.shape) / 2)]) for sim_it, mi_params in enumerate(mi_parameters): d = dict(params) for i in range(0, len(mi)): d[mi[i]] = mi_params[i] x_start = d['n_samples'] * sim_it x_end = d['n_samples'] * (sim_it + 1) w = np.random.multivariate_normal(d['mu'], d['sigma'], d['n_samples']).transpose() x[:, x_start:x_end] = np.array(w) for t in range(1, d['n_samples']): x[:, (d['n_samples'] * sim_it) + t] = d['alpha'] * x[:, (d['n_samples'] * sim_it) + t - 1] + w[:, t] multi_ind = pd.MultiIndex.from_product( (mi_param_list) + [np.arange(0, d['n_samples'])], names=mi + ['time']) df = pd.DataFrame(data={ 'timeseries_1': x[0, :], 'timeseries_2': x[1, :] }, index=multi_ind) return df
def plot_dfc_timeseries(dfc, limitaxis=500, cm='Set2', fig_dir = None, fig_prefix=None,mi=[]): if isinstance(mi,str): mi = [mi] if fig_prefix: fig_prefix += '_' else: fig_prefix = '' if not fig_dir: fig_dir = './' if not os.path.exists(fig_dir): os.makedirs(fig_dir,exist_ok=True) params = {} for m in mi: params[m] = np.unique(dfc.index.get_level_values(m)) mi,mi_num,mi_parameters,mi_param_list = tvc_benchmarker.multiindex_preproc(params,mi) colormap=tvc_benchmarker.get_discrete_colormap(cm) for sim_it, mi_params in enumerate(mi_parameters): param_sname = [p[0] + '-' + str(p[1]) for p in list(zip(mi,mi_params))] param_sname = '_'.join(param_sname) if param_sname: param_sname = '_' + param_sname.replace(' ','') param_title = [p[0] + '=' + str(p[1]) for p in list(zip(mi,mi_params))] param_title = ','.join(param_title) param_title = param_title.replace(' ','').replace(',',', ') if mi_params == (): mi_params = np.arange(0,len(dfc)) fig,ax=plt.subplots(len(dfc.columns), 1, sharex=True,sharey=True, figsize=(5,len(dfc.columns)*2)) for i,dfc_method in enumerate(sorted(dfc.columns)): ax[i].plot(dfc[dfc_method][mi_params][:limitaxis].values,color=colormap(i),alpha=0.5,linewidth=2) ax[i].set_ylabel('DFC ('+ dfc_method + ')') ax[i].get_yaxis().set_major_locator(LinearLocator(numticks=5)) ax[i].set_xlim(1,limitaxis) ax[-1].set_xlabel('time') plt.suptitle(param_title,fontsize=11) plt.tight_layout(rect=[0, 0, 1, 0.95]) plt.savefig(fig_dir + '/' + fig_prefix + 'dfc-timeseries' + param_sname + '.pdf',r=600) plt.close('all')
def calc_new_method(x, new_method, params_new_method=None, mi='alpha'): if params_new_method['name'] == None: params_new_method['name'] = new_method.__name__ mi = list(x.index.names) if len(x.index.names) > 1: mi.remove('time') elif len(x.index.names) == 1 and x.index.names == [None]: x.index.names = ['time'] if x.index.names == ['time']: mi = [] params = {} for m in mi: params[m] = np.unique(x.index.get_level_values(m)) mi, mi_num, mi_parameters, mi_param_list = tvc_benchmarker.multiindex_preproc( params, mi) dfc_estimate = [] for sim_it, mi_params in enumerate(mi_parameters): if mi_params == (): mi_params = np.arange(0, len(x)) time_points = int( len(x.index.get_level_values('time')) / len(mi_parameters)) tmp = np.array( new_method( np.array([ x['timeseries_1'][mi_params], x['timeseries_2'][mi_params] ]), **params_new_method['params'])) # Fix the output in case it is no node,node,time (for full entire timeseries) if len(tmp) != time_points: window = int((time_points - len(tmp)) / 2) tmp = np.lib.pad(tmp, window, mode='constant', constant_values=np.nan) if len(tmp) == time_points - 1: tmp = np.hstack([np.nan, tmp]) if len(tmp.shape) == 3: connectivity = tmp[0, 1, :] else: connectivity = tmp dfc_estimate.append(connectivity) dfc_estimate = np.concatenate(dfc_estimate) return dfc_estimate
def model_dfc(x, dfc, dat_dir, model_prefix, bayes_model='bayes_model', mi='alpha', model_params={}): """ General stats functions that calls the bayes_model, saves the output. **Input** :x: raw time series (DF) :dfc: dynamic connectivity estimates (DF) :dat_dir: Place to save the stats data. :model_predix: Prefix name for saved file :model_params: string of parameters for bayes_model function """ if model_params == None: model_params = '' if isinstance(mi, str): mi = [mi] params = {} for m in mi: params[m] = np.unique(dfc.index.get_level_values(m)) mi, mi_num, mi_parameters, mi_param_list = tvc_benchmarker.multiindex_preproc( params, mi) for sim_it, mi_params in enumerate(mi_parameters): for method in dfc.columns: plt.close('all') X = dfc[method][mi_params] Y = x['covariance_parameter'][mi_params][X.index] trace_and_model = tvc_benchmarker.bayes_model(X, Y, **model_params) #Save data param_sname = [ p[0] + '-' + str(p[1]) for p in list(zip(mi, mi_params)) ] param_sname = '_'.join(param_sname) param_sname = '_' + param_sname.replace(' ', '') file_name = model_prefix + '_' + 'method-' + method + param_sname tvc_benchmarker.trace_plot(dat_dir, file_name, trace_and_model[0]) tvc_benchmarker.save_bayes_model(dat_dir, file_name, trace_and_model)
def plot_fluctuating_covariance(x, fig_dir = None, lags=10,limitaxis=500,cm = 'Set2',mi='alpha', fig_prefix=None): # if labels == None: # labels=np.unique(x.index.get_level_values(mi)) if isinstance(mi,str): mi = [mi] if not fig_dir: fig_dir = './' if fig_prefix: fig_prefix += '_' else: fig_prefix = '' if not os.path.exists(fig_dir): os.makedirs(fig_dir,exist_ok=True) params = {} for m in mi: params[m] = np.unique(x.index.get_level_values(m)) mi,mi_num,mi_parameters,mi_param_list = tvc_benchmarker.multiindex_preproc(params,mi) colormap=tvc_benchmarker.get_discrete_colormap(cm) for sim_it, mi_params in enumerate(mi_parameters): param_sname = [p[0] + '-' + str(p[1]) for p in list(zip(mi,mi_params))] param_sname = '_'.join(param_sname) if param_sname: param_sname = '_' + param_sname.replace(' ','') param_title = [p[0] + '=' + str(p[1]) for p in list(zip(mi,mi_params))] param_title = ','.join(param_title) param_title = param_title.replace(' ','').replace(',',', ') if mi_params == (): mi_params = np.arange(0,len(x)) covariance_autocorrelation = tvc_benchmarker.autocorr(x['covariance_parameter'][mi_params],lags=lags) # Create grid fig = plt.figure() ax = [] ax.append(plt.subplot2grid((2,2),(0,0),colspan=2)) ax.append(plt.subplot2grid((2,2),(1,0))) ax.append(plt.subplot2grid((2,2),(1,1))) ax[0].plot(np.arange(1,limitaxis+1),x['covariance_parameter'][mi_params][:limitaxis],color=colormap(0),alpha=0.5,linewidth=2) ax[0].set_xlabel('Time') ax[0].set_ylabel(r'Covariance ($r_t$)') ymin = x['covariance_parameter'][mi_params][:limitaxis].min() ymax = x['covariance_parameter'][mi_params][:limitaxis].max() ax[0].axis([1,limitaxis+1,np.around(ymin-0.05,1),np.around(ymax+0.05,1)]) ax[1].hist(x['covariance_parameter'][mi_params],np.arange(-.1,1,0.02),color=colormap(1),alpha=0.9,linewidth=0,histtype='stepfilled',normed='true') ax[1].set_xlabel('Covariance') ax[1].set_ylabel('Frequency') xmin = x['covariance_parameter'][mi_params].min() xmax = x['covariance_parameter'][mi_params].max() ax[1].axis([np.around(xmin-0.05,1),np.around(xmax+0.05,1),0,np.ceil(ax[1].get_ylim()[-1])]) tvc_benchmarker.square_axis(ax[1]) ax[2].plot(np.arange(0,11),covariance_autocorrelation,color=colormap(2),alpha=0.9,linewidth=2) ax[2].set_ylabel('Correlation (r)') ax[2].set_xlabel('Lag') ymin = covariance_autocorrelation.min() ymax = 1 ax[2].axis([0,10,np.around(ymin-0.05,1),np.around(ymax+0.05,1)]) tvc_benchmarker.square_axis(ax[2]) plt.suptitle(param_title,fontsize=11) plt.tight_layout(rect=[0, 0, 1, 0.95]) plt.savefig(fig_dir + '/' + fig_prefix + 'fluctuating-covariance' + param_sname + '.pdf',r=600) plt.close('all')
def plot_betadfc_distribution(dfc, dat_dir, fig_dir = None, model_prefix=None, burn=1000, mi='alpha', cm='Set2'): if isinstance(mi,str): mi = [mi] if model_prefix: model_prefix += '_' if not fig_dir: fig_dir = './' if not os.path.exists(fig_dir): os.makedirs(fig_dir,exist_ok=True) params = {} for m in mi: params[m] = np.unique(dfc.index.get_level_values(m)) mi,mi_num,mi_parameters,mi_param_list = tvc_benchmarker.multiindex_preproc(params,mi) colormap=tvc_benchmarker.get_discrete_colormap(cm) for sim_it, mi_params in enumerate(mi_parameters): param_sname = [p[0] + '-' + str(p[1]) for p in list(zip(mi,mi_params))] param_sname = '_'.join(param_sname) if param_sname: param_sname = '_' + param_sname.replace(' ','') param_title = [p[0] + '=' + str(p[1]) for p in list(zip(mi,mi_params))] param_title = ','.join(param_title) param_title = param_title.replace(' ','').replace(',',', ') if mi_params == (): mi_params = np.arange(0,len(dfc)) fig,ax=plt.subplots(len(dfc.columns),sharex=True,sharey=True,figsize=(5,len(dfc.columns))) beta_col = [] lines = [] for i,method in enumerate(sorted(dfc.columns)): beta_dfc=tvc_benchmarker.load_bayes_model(dat_dir,model_prefix + 'method-' + method + param_sname)[0][burn:].get_values('beta') #Plot ltmp = ax[i].hist(beta_dfc,np.arange(-1,1,0.001),histtype='stepfilled',color=colormap(i),density=True,alpha=0.4, linewidth=2,label=method) lines.append(ltmp) ax[i].set_yticklabels([]) ax[i].set_ylabel(method) beta_col.append(beta_dfc) #ax[i].set_ylabel('Posterior Frequency (' + method + ')') beta_col = np.vstack(beta_col) xmin = beta_col.min() xmax = beta_col.max() ax[0].get_yaxis().set_major_locator(LinearLocator(numticks=4)) ax[0].set_xlim([np.around(xmin-0.005,2),np.around(xmax+0.005,2)]) ax[-1].set_xlabel('Posterior (' + r'$β$' + ')') fig.suptitle(param_title,fontsize=11) fig.tight_layout(rect=[0, 0, 1, 0.95]) plt.savefig(fig_dir + '/' + model_prefix + 'beta-posterior' + param_sname + '.pdf',r=600) plt.close('all')
def plot_timeseries(x,plot_autocorr='no',fig_dir=None,fig_prefix=None,cm='Set2',limitaxis=100,mi='alpha'): if isinstance(mi,str): mi = [mi] if fig_prefix: fig_prefix += '_' else: fig_prefix = '' if not fig_dir: fig_dir = './' if not os.path.exists(fig_dir): os.makedirs(fig_dir,exist_ok=True) params = {} for m in mi: params[m] = np.unique(x.index.get_level_values(m)) mi,mi_num,mi_parameters,mi_param_list = tvc_benchmarker.multiindex_preproc(params,mi) colormap=tvc_benchmarker.get_discrete_colormap(cm) for sim_it, mi_params in enumerate(mi_parameters): param_sname = [p[0] + '-' + str(p[1]) for p in list(zip(mi,mi_params))] param_sname = '_'.join(param_sname) if param_sname: param_sname = '_' + param_sname.replace(' ','') param_title = [p[0] + '=' + str(p[1]) for p in list(zip(mi,mi_params))] param_title = ','.join(param_title) param_title = param_title.replace(' ','').replace(',',', ') if mi_params == (): mi_params = np.arange(0,len(x)) if plot_autocorr == 'no': fig,ax=plt.subplots(1) ax.plot(np.arange(1,limitaxis+1),x['timeseries_1'][mi_params][:limitaxis],color=colormap(0),alpha=0.9,linewidth=2) ax.plot(np.arange(1,limitaxis+1),x['timeseries_2'][mi_params][:limitaxis],color=colormap(1),alpha=0.9,linewidth=2) ax.set_xlim(1,limitaxis) ax.set_ylabel('Signal Amplitude') ax.set_xlabel('Time') else: autocorrelation = np.array([tvc_benchmarker.autocorr(x[ts][mi_params]) for ts in ['timeseries_1','timeseries_2']]) fig=plt.figure() ax=[] ax.append(plt.subplot2grid((2,3),(0,0),colspan=3)) for n in range(0,3): ax.append(plt.subplot2grid((2,3),(1,n))) # Plot 1: raw time series ax[0].plot(np.arange(1,limitaxis+1),x['timeseries_1'][mi_params][:limitaxis],color=colormap(0),alpha=0.9,linewidth=2) ax[0].plot(np.arange(1,limitaxis+1),x['timeseries_2'][mi_params][:limitaxis],color=colormap(1),alpha=.9,linewidth=2) ax[0].set_xlim(1,limitaxis) ax[0].set_ylabel('Signal Amplitude') ax[0].set_xlabel('Time') # Plot 2 and 3: autocorrelation of timeseries 1 and 2 for p in range(1,3): ax[p].plot(np.arange(0,autocorrelation.shape[1]),autocorrelation[p-1,:],color=colormap(p-1),alpha=0.9,linewidth=2) ax[p].set_ylabel('Correlation (r)') ax[p].set_xlabel('Lag') ax[p].axis([0,autocorrelation.shape[1]-1,0,1]) ax[p].set_yticks(np.arange(0,1.05,0.25)) ax[p].set_xticks(np.arange(0,autocorrelation.shape[1],2)) # Plot 4: correlation of timeseries 1 and 2 cmap = sns.cubehelix_palette(start=1/3, light=1, as_cmap=True) ax[3] = sns.kdeplot(x['timeseries_1'][mi_params], x['timeseries_2'][mi_params], shade=True,cmap=cmap) ax[3].set_xlabel('Signal 1 amplitude') ax[3].set_ylabel('Signal 2 amplitude') [tvc_benchmarker.square_axis(ax[n]) for n in [1,2,3]] plt.suptitle(param_title,fontsize=11) plt.tight_layout(rect=[0, 0, 1, 0.95]) plt.savefig(fig_dir + '/' + fig_prefix + 'raw-timeseries' + param_sname + '.pdf',r=600) plt.close('all')
def calc_waic(dfc, model_dir, save_dir, file_prefix=None, mi='alpha', burn=1000): """ Calculates WAIC of a bayes model, saves table **Input** :dfc: dfc dataframe :model_dir: where the bayesian models are saved :save_dir: where to save the tables :file_prefix: appended model name :mi: Loop over multiple models of this multiindex :burn: how many from start of trace to discard. **Returns** :waic: (numpy array) """ if file_prefix: file_prefix += '_' if isinstance(mi, str): mi = [mi] params = {} for m in mi: params[m] = np.unique(dfc.index.get_level_values(m)) mi, mi_num, mi_parameters, mi_param_list = tvc_benchmarker.multiindex_preproc( params, mi) for sim_it, mi_params in enumerate(mi_parameters): param_sname = [ p[0] + '-' + str(p[1]) for p in list(zip(mi, mi_params)) ] param_sname = '_'.join(param_sname) param_sname = '_' + param_sname.replace(' ', '') waic = np.zeros([len(dfc.columns), 3]) for i, method in enumerate(dfc.columns): file_name = file_prefix + 'method-' + method + param_sname tm = tvc_benchmarker.load_bayes_model(model_dir, file_name) waic[i, :] = np.array(pm.stats.waic(tm[0][burn:], tm[1])[0:3]) odr = np.argsort(waic[:, 0]) delta_waic = waic[:, 0] - waic[odr[0], 0] #Create table for tabulate tablelst = [["Model", "WAIC", "WAIC SE", "$\Delta$ WAIC"]] for i in odr: tablelst.append( [dfc.columns[i], waic[i, 0], waic[i, 1], delta_waic[i]]) #Make markdown table and save mdtable = tabulate.tabulate(tablelst, headers="firstrow", tablefmt='simple') with open( save_dir + '/' + file_prefix + 'waictable' + param_sname + '.md', 'w') as f: f.write(mdtable) f.close() print(mdtable) return waic
def dfc_calc(data,methods=['SW','TSW','SD','JC','TD'],sw_window=63,taper_name='norm',taper_properties=[0,10],sd_distance='euclidean',mtd_window=7,mi='alpha',colind=None): """ Required parameters for the various differnet methods: If method == 'SW' sw_window = [Integer] Length of sliding window If method == 'TSW' sw_window = [Integer] Length of sliding window taper_name = [string] Name of scipy.stats distribution used (see teneto.derive.derive for more information) taper_properties = [list] List of the different scipy.stats.[taper_name] properties. E.g. if taper_name = 'norm'; taper_properties = [0,10] with me the mean and standard deviation of the distribution. If method == 'SD' sd_distance = [string] Distance funciton used to calculate the similarity between time-points. Can be any of the distances functions in scipy.spatial.distance. if method == 'JC' There are no parmaeters, have empty dictionary as parameter input. if method == 'MTD' mtd_window= [Integer] Length of window # mi='alpha' """ # If data is a string, load precalcuated data if isinstance(data, str): if data == 'sim-1' and not colind: colind = 1 elif (data == 'sim-2' or data == 'sim-3' or data == 'sim-4') and not colind: colind = 2 elif colind: pass else: raise ValueError('unknown simulation. Input must be "sim-1", "sim-2", "sim-3" or "sim-4"') df = pd.read_csv(tvc_benchmarker.__path__[0] + '/data/dfc/' + data + '_dfc.csv',index_col=np.arange(0,colind)) # Get methods requested_methods = list(set(methods).intersection(df.columns)) df[requested_methods] #Otherwise calculate else: # Make methods variable a list if single string is given if isinstance(methods,str): methods = [methods] if isinstance(mi,str): mi = [mi] dfc={} params = {} for m in mi: params[m] = np.unique(data.index.get_level_values(m)) mi,mi_num,mi_parameters,mi_param_list = tvc_benchmarker.multiindex_preproc(params,mi) #Sliding window if 'SW' in methods: dfc['SW'] = [] dfc_params={} dfc_params['windowsize']=sw_window dfc_params['method'] = 'slidingwindow' dfc_params['dimord'] = 'node,time' dfc_params['postpro'] = 'fisher' dfc_params['report'] = 'no' if mi_parameters[0]: # Do this if there are multiple mi parameters for sim_it, mi_params in enumerate(mi_parameters): ts1 = data['timeseries_1'][mi_params] ts2 = data['timeseries_2'][mi_params] connectivity = teneto.derive.derive(np.array([ts1,ts2]),dfc_params)[0,1,:] dfc['SW'].append(np.lib.pad(connectivity,int((sw_window-1)/2),mode='constant',constant_values=np.nan)) # Otherwise do this else: ts1 = data['timeseries_1'] ts2 = data['timeseries_2'] connectivity = teneto.derive.derive(np.array([ts1,ts2]),dfc_params)[0,1,:] dfc['SW'].append(np.lib.pad(connectivity,int((sw_window-1)/2),mode='constant',constant_values=np.nan)) # Line up all appened arrays dfc['SW'] = np.concatenate(dfc['SW']) #Tapered sliding window if 'TSW' in methods: dfc['TSW'] = [] dfc_params={} dfc_params['windowsize']=sw_window dfc_params['distribution']=taper_name dfc_params['distribution_params']=taper_properties dfc_params['method'] = 'taperedslidingwindow' dfc_params['dimord'] = 'node,time' dfc_params['postpro'] = 'fisher' dfc_params['report'] = 'no' if mi_parameters[0]: # Do this if there are multiple mi parameters for sim_it, mi_params in enumerate(mi_parameters): ts1 = data['timeseries_1'][mi_params] ts2 = data['timeseries_2'][mi_params] connectivity = teneto.derive.derive(np.array([ts1,ts2]),dfc_params)[0,1,:] dfc['TSW'].append(np.lib.pad(connectivity,int((sw_window-1)/2),mode='constant',constant_values=np.nan)) # Otherwise do this else: ts1 = data['timeseries_1'] ts2 = data['timeseries_2'] connectivity = teneto.derive.derive(np.array([ts1,ts2]),dfc_params)[0,1,:] dfc['TSW'].append(np.lib.pad(connectivity,int((sw_window-1)/2),mode='constant',constant_values=np.nan)) # Line up all appened arrays dfc['TSW'] = np.concatenate(dfc['TSW']) #Spatial distance if 'SD' in methods: dfc['SD'] = [] dfc_params={} dfc_params['distance']='euclidean' dfc_params['method'] = 'spatialdistance' dfc_params['dimord'] = 'node,time' dfc_params['postpro'] = 'fisher' dfc_params['report'] = 'no' if mi_parameters[0]: # Do this if there are multiple mi parameters for sim_it, mi_params in enumerate(mi_parameters): ts1 = data['timeseries_1'][mi_params] ts2 = data['timeseries_2'][mi_params] connectivity = teneto.derive.derive(np.array([ts1,ts2]),dfc_params)[0,1,:] dfc['SD'].append(connectivity) # Otherwise do this else: ts1 = data['timeseries_1'] ts2 = data['timeseries_2'] connectivity = teneto.derive.derive(np.array([ts1,ts2]),dfc_params)[0,1,:] dfc['SD'].append(connectivity) # Line up all appened arrays dfc['SD'] = np.concatenate(dfc['SD']) #Jackknife if 'JC' in methods: dfc['JC'] = [] dfc_params={} dfc_params['method'] = 'jackknife' dfc_params['dimord'] = 'node,time' dfc_params['postpro'] = 'fisher' dfc_params['report'] = 'no' if mi_parameters[0]: # Do this if there are multiple mi parameters for sim_it, mi_params in enumerate(mi_parameters): ts1 = data['timeseries_1'][mi_params] ts2 = data['timeseries_2'][mi_params] connectivity = teneto.derive.derive(np.array([ts1,ts2]),dfc_params)[0,1,:] dfc['JC'].append(connectivity) # Otherwise do this else: ts1 = data['timeseries_1'] ts2 = data['timeseries_2'] connectivity = teneto.derive.derive(np.array([ts1,ts2]),dfc_params)[0,1,:] dfc['JC'].append(connectivity) # Line up all appened arrays dfc['JC'] = np.concatenate(dfc['JC']) #Temporal derivative if 'MTD' in methods: dfc['TD'] = [] dfc_params={} dfc_params['method'] = 'mtd' dfc_params['dimord'] = 'node,time' dfc_params['postpro'] = 'no' dfc_params['windowsize'] = mtd_window dfc_params['report'] = 'no' if mi_parameters[0]: # Do this if there are multiple mi parameters for sim_it, mi_params in enumerate(mi_parameters): ts1 = data['timeseries_1'][mi_params] ts2 = data['timeseries_2'][mi_params] connectivity = teneto.derive.derive(np.array([ts1,ts2]),dfc_params)[0,1,:] dfc['TD'].append(np.lib.pad(np.hstack([np.nan,connectivity]),int((mtd_window-1)/2),mode='constant',constant_values=np.nan)) # Otherwise do this else: ts1 = data['timeseries_1'] ts2 = data['timeseries_2'] connectivity = teneto.derive.derive(np.array([ts1,ts2]),dfc_params)[0,1,:] dfc['TD'].append(np.lib.pad(np.hstack([np.nan,connectivity]),int((mtd_window-1)/2),mode='constant',constant_values=np.nan)) # Line up all appened arrays dfc['TD'] = np.concatenate(dfc['TD']) df = pd.DataFrame(data=dfc, index=data.index) return df
def gen_data_sim2(params, mi='alpha'): """ *INPUT* Params is a dictionary which contains the following :n_samples: length of time series. Default=10,000 :alpha: auto-correlation of time series. Can be single integer or np.array with length of mu :mu: Mean of auto-correlated time-series sampled from a multivariate Gaussian distribution. Must be a array/list of length 2 or 2xn_samples. :var: Variance of the time series. Integer or np.array with length of mu :covar_mu: Mean of the covariance of the time series. :covar_sigma: Variance of the covariance of the time series. :randomseed: set random seed Additionally, if there is a multi_index variable, this should be specified differently :mi: multi_index. list of variable names which have multiple parameters. These parameters should be in a list. E.g. if mi='mu', then mu becomes a list surrounding its contents. e.g. mu=[[0,0],[1,1]] *LIMITATIONS* As the parameters stand, only 2 time series can be generated (some minor modifications are needed to generate more). Input `var` must be integer and cannot vary between the time series. *RETURNS* :df: pandas dataframe with timeseries_1, timeseries_2, covariance_parameter. Table is multiindexed with alpha and time as the two indexes. """ # Random seed np.random.seed(params['randomseed']) # Check multiindex and get number of each multiindex mi, mi_num, mi_parameters, mi_param_list = tvc_benchmarker.multiindex_preproc( params, mi) # Pre allocate output x = np.zeros([2, params['n_samples']] + mi_num) fluct_cv = np.zeros([params['n_samples']] + mi_num) x = x.reshape([2, int(np.prod(x.shape) / 2)]) fluct_cv = fluct_cv.flatten() # Set preliminary arguments for sim_it, mi_params in enumerate(mi_parameters): d = dict(params) for i in range(0, len(mi)): d[mi[i]] = mi_params[i] # extend mu through timeseries if it is (list of) integers d['mu'] = np.array(d['mu'], ndmin=2) if d['mu'].shape[-1] != d['n_samples']: d['mu'] = np.tile(d['mu'].transpose(), d['n_samples']) # extend covar_mu through timeseries if is integer d['covar_mu'] = np.array(d['covar_mu'], ndmin=1) if d['covar_mu'].shape[-1] != d['n_samples']: d['covar_mu'] = np.tile(d['covar_mu'].transpose(), d['n_samples']) for t in range(0, d['n_samples']): # At first time point, no autocorrelation of covariance if t == 0: covar = np.random.normal(d['covar_mu'][t], d['covar_sigma']) else: covar = np.random.normal( d['covar_mu'][t], d['covar_sigma']) + d['alpha'] * fluct_cv[ (d['n_samples'] * sim_it) + t - 1] x[:, (d['n_samples'] * sim_it) + t] = np.random.multivariate_normal( d['mu'][:, t], [[d['var'], covar], [covar, d['var']]], 1) fluct_cv[(d['n_samples'] * sim_it) + t] = covar #x=np.reshape(x,[x.shape[0],np.prod(x.shape[1:])],order='F') #fluct_cv=np.reshape(fluct_cv,[np.prod(fluct_cv.shape)],order='F') if any(np.abs(fluct_cv) > 1): print( 'TVC BENCHMARKER WARNING: some value(s) of r_t>1 or r_t<-1. Consider changing parameters.' ) multi_ind = pd.MultiIndex.from_product( (mi_param_list) + [np.arange(0, d['n_samples'])], names=mi + ['time']) df = pd.DataFrame(data={ 'timeseries_1': x[0, :], 'timeseries_2': x[1, :], 'covariance_parameter': fluct_cv }, index=multi_ind) return df
def gen_data_sim4(params, mi=None): """ *INPUT* No input runs the default simulation. :n_samples: length of time series. Default=10,000 :mu: Mean of auto-correlated time-series sampled from a multivariate Gaussian distribution. Must be a array/list of length 2 or 2xn_samples. :var: Variance of the time series. Integer or np.array with length of mu :covar_range: list of possible covariance :state_length: List of lists of possible times before covariance changes. :state_length_name: Name of each stat :randomseed: set random seed *LIMITATIONS* As the parameters stand, only 2 time series can be generated (some minor modifications are needed to generate more). Input `var` must be integer and cannot vary between the time series. *RETURNS* :df: pandas dataframe with timeseries_1, timeseries_2, covariance_parameter. Table is multiindexed with alpha and time as the two indexes. """ # Random seed np.random.seed(params['randomseed']) # Check multiindex and get number of each multiindex mi, mi_num, mi_parameters, mi_param_list = tvc_benchmarker.multiindex_preproc( params, mi) # Pre allocate output x = np.zeros([2, params['n_samples']] + mi_num) x = x.reshape([2, int(np.prod(x.shape) / 2)]) fluct_cv = np.zeros([params['n_samples'] * len(mi_parameters)]) fluct_cv_state = np.zeros([params['n_samples'] * len(mi_parameters)]) # Set preliminary arguments for sim_it, mi_params in enumerate(mi_parameters): d = dict(params) for i in range(0, len(mi)): d[mi[i]] = mi_params[i] # extend mu through timeseries if it is (list of) integers d['mu'] = np.array(d['mu'], ndmin=2) if d['mu'].shape[-1] != d['n_samples']: d['mu'] = np.tile(d['mu'].transpose(), d['n_samples']) covar_mu = np.array([]) while len(covar_mu) < d['n_samples']: new_covariance = np.random.permutation(d['covar_range'])[0] covariance_length = np.random.permutation(d['state_length'])[0] covar_mu = np.hstack( [covar_mu, np.tile(new_covariance, covariance_length)]) covar_mu = covar_mu[:d['n_samples']] fluct_cv_state[(d['n_samples'] * sim_it):(d['n_samples'] * (sim_it + 1))] = covar_mu for t in range(0, d['n_samples']): covar = np.random.normal(covar_mu[t], d['covar_sigma']) x[:, (d['n_samples'] * sim_it) + t] = np.random.multivariate_normal( d['mu'][:, t], [[d['var'], covar], [covar, d['var']]], 1) fluct_cv[(d['n_samples'] * sim_it) + t] = covar # Reshape for pandas dataframe x = np.reshape(x, [x.shape[0], np.prod(x.shape[1:])], order='F') fluct_cv = np.reshape(fluct_cv, [np.prod(fluct_cv.shape)], order='F') fluct_cv_state = np.reshape(fluct_cv_state, [np.prod(fluct_cv_state.shape)], order='F') multi_ind = pd.MultiIndex.from_product( (mi_param_list + [np.arange(0, d['n_samples'])]), names=mi + ['time']) df = pd.DataFrame(data={ 'timeseries_1': x[0, :], 'timeseries_2': x[1, :], 'covariance_parameter': fluct_cv, 'covariance_mean': fluct_cv_state }, index=multi_ind) return df