def ar_testerr_non_uniform(dataLst, settings): testFrac = settings['testfrac'] if 'testfrac' in settings.keys() else 0.1 dataLst2D = _preprocess_ar_non_uniform(dataLst, settings) x, y = drop_nan_rows( splitter.split2D_non_unifrom(dataLst2D, settings['hist'])) return _ar_2D_testerr(x, y, testFrac)
def plot_metric_bulk_1D(dataDB, ds, metricName, nameSuffix, prepFunc=None, xlim=None, ylim=None, yscale=None, verbose=True, xFunc=None, haveTimeLabels=False):#, dropCols=None): # 1. Extract all results for this test dfAll = ds.list_dsets_pd().fillna('None') # if dropCols is not None: # dfAll = dfAll.drop(dropCols, axis=1) dfAnalysis = pd_query(dfAll, {'metric' : metricName, "name" : nameSuffix}) dfAnalysis = pd_move_cols_front(dfAnalysis, ['metric', 'name', 'mousename']) # Move leading columns forwards for more informative printing/saving dfAnalysis = dfAnalysis.drop(['target_dim', 'datetime', 'shape'], axis=1) # Loop over all other columns except mousename colsExcl = list(set(dfAnalysis.columns) - {'mousename', 'dset'}) for colVals, dfSub in dfAnalysis.groupby(colsExcl): fig, ax = plt.subplots(figsize=(4, 4)) if verbose: print(list(colVals)) for idxMouse, rowMouse in dfSub.sort_values(by='mousename').iterrows(): print(list(rowMouse.values)) dataThis = ds.get_data(rowMouse['dset']) assert dataThis.ndim == 1, 'Only using 1D data for this plot function' if prepFunc is not None: dataThis = prepFunc(dataThis) # if datatype == 'raw': # nTrialThis = dataDB.get_ntrial_bytype({'mousename' : row['mousename']}, trialType=trialType, performance=performance) # dataThis *= np.sqrt(48*nTrialThis) # print('--', row['mousename'], nTrialThis) x = np.arange(len(dataThis)) if xFunc is None else np.array(xFunc(rowMouse['mousename'], len(dataThis))) x, dataThis = drop_nan_rows([x, dataThis]) ax.plot(x, dataThis, label=rowMouse['mousename']) if yscale is not None: ax.set_yscale(yscale) if haveTimeLabels: dataDB.label_plot_timestamps(ax, linecolor='y', textcolor='k', shX=-0.5, shY=0.05) dataName = rowMouse.drop(['dset', 'mousename']) dataName = '_'.join([str(el) for el in dataName]) prefixPath = 'pics/bulk/' + metricName + '/' make_path(prefixPath) ax.legend() ax.set_xlim(xlim) ax.set_ylim(ylim) ax.set_xlabel(nameSuffix) ax.set_ylabel(metricName) plt.savefig(prefixPath + dataName + '.png', dpi=200) plt.close()
def average_predictive_info(data, settings): x, y = drop_nan_rows(split3D(data, settings['max_lag'])) nSample, nProcess = x.shape if nSample < 5 + 5 * nProcess: # If there are too few samples, there is no point to calculate anything return np.array(np.nan) else: return ee.mi(x, y) / nProcess
def _preprocess_mar_inp(data, inp, nHist): x, y = splitter.split3D(data, nHist) assert inp.ndim == 3, "Input matrix must be a 3D matrix" assert np.prod(inp.shape) != 0, "Input matrix is degenerate" nTr, nCh, nT = data.shape nTrInp, nChInp, nTInp = inp.shape assert nTr == nTrInp, "Input shape must be consistent with data shape" assert nT == nTInp, "Input shape must be consistent with data shape" # Convert input into the form (rps) -> (r*s, p) inpCanon = numpy_transpose_byorder(inp, 'rps', 'rsp') u = numpy_merge_dimensions(inpCanon[:, nHist:], 0, 2) # Drop any nan rows that are present in the data or input return drop_nan_rows([x, y, u])
def _preprocess_mar_inp_non_uniform(dataLst, inpLst, nHist): x, y = splitter.split3D_non_uniform(dataLst, nHist) assert len(dataLst) == len( inpLst), "Input must have same number of trials as data" for data, inp in zip(dataLst, inpLst): assert inp.ndim == 2, "Input must be a list of 2D matrices" assert inp.shape[1] == data.shape[ 1], "Input must have same number of timesteps as data" # Test that input has the same number of features for each trial nChInp = list_assert_get_uniform_shape(inpLst, axis=1) # shape transform for y :: (rps) -> (r*s, p) u = [inp[:, nHist:].T for inp in inpLst] # (rps) -> (rsp) u = np.concatenate(u, axis=0) # (rsp) -> (r*s, p) # Drop any nan rows that are present in the data or input return drop_nan_rows([x, y, u])
def average_predictive_info_non_uniform(dataLst, settings): # Test that all trials have sufficient timesteps for lag estimation nSampleMin = np.min(set_list_shapes(dataLst, axis=1)) if nSampleMin <= settings['max_lag']: raise ValueError('lag', settings['max_lag'], 'cannot be estimated for number of timesteps', nSampleMin) xLst = [] yLst = [] for dataTrial in dataLst: x, y = drop_nan_rows(split3D(dataTrial, settings['max_lag'])) xLst += [x] yLst += [y] xArr = np.vstack(xLst) yArr = np.vstack(yLst) nSample, nProcess = xArr.shape if nSample < 4 * nProcess: # If there are too few samples, there is no point to calculate anything return np.array(np.nan) else: return ee.mi(xArr, yArr) / nProcess
def mar_testerr_non_uniform(dataLst, settings): testFrac = settings['testfrac'] if 'testfrac' in settings.keys() else 0.1 x, y = drop_nan_rows( splitter.split3D_non_uniform(dataLst, settings['hist'])) return _mar3D_testerr(x, y, testFrac)
def mar1_coeff_non_uniform(dataLst, settings): x, y = drop_nan_rows(splitter.split3D_non_uniform(dataLst, 1)) return _mar3D_alpha(x, y)
def ar1_coeff_non_uniform(dataLst3D, settings): dataLst2D = _preprocess_ar_non_uniform(dataLst3D, settings) x, y = drop_nan_rows(splitter.split2D_non_unifrom(dataLst2D, 1)) return _ar_2D_alpha(x, y)
def mar1_testerr(data, settings): testFrac = settings['testfrac'] if 'testfrac' in settings.keys() else 0.1 x, y = drop_nan_rows(splitter.split3D(data, 1)) return _mar3D_testerr(x, y, testFrac)
def mar1_coeff(data, settings): x, y = drop_nan_rows(splitter.split3D(data, 1)) return _mar3D_alpha(x, y)
def ar1_testerr(data, settings): testFrac = settings['testfrac'] if 'testfrac' in settings.keys() else 0.1 data2D = _preprocess_ar(data, settings) x, y = drop_nan_rows(splitter.split2D(data2D, 1)) return _ar_2D_testerr(x, y, testFrac)
def ar1_coeff(data, settings): data2D = _preprocess_ar(data, settings) x, y = drop_nan_rows(splitter.split2D(data2D, 1)) return _ar_2D_alpha(x, y)
def poly_fit_transform(x, y, ord): xEff, yEff = drop_nan_rows([x, y]) coeff = np.polyfit(xEff, yEff, ord) # Fit to data without nans p = np.poly1d(coeff) return p(x) # Evaluate for original data
def binary_classifier(data1, data2, classifier, method="kfold", k=10, balancing=False, pcaThr=None, havePVal=False): # Convert data to labeled form labels = [-1, 1] x, y = label_binary_data(data1, data2, *labels) # Drop NAN values xNoNan, yNoNan = drop_nan_rows([x, y]) if pcaThr is not None: xNoNan = dim_reduction(xNoNan, pcaThr) print('Reduced number of dimensions to', xNoNan.shape[1]) # map labels to binary variable nData = len(yNoNan) if nData == 0: print("Warning: dataset had zero non-nan rows") return {"acc_train": 0, "acc_test": 0, "acc_naive": 0, "p-value": 1} nA = np.sum(yNoNan == 1) # Number of points with label 1 nB = nData - nA # Number of points with label -1 if (nA < 2) or (nB < 2): print("Warning: unexpected number of labels", nA, nB, "; aborting classification") return 0, 0 # Add extra dimension if X is 1D if xNoNan.ndim == 1: xNoNan = xNoNan[:, None] print('Warning: Got 1D data, had to add extra dimension') cmTrain = np.zeros((2, 2), dtype=int) cmTest = np.zeros((2, 2), dtype=int) cvfunc = select_cv_iterator(method, xNoNan, yNoNan, k) for xTrain, yTrain, xTest, yTest in cvfunc: if balancing: xTrainEff, yTrainEff = balance_oversample(xTrain, yTrain, labels) else: xTrainEff, yTrainEff = xTrain, yTrain clf = classifier.fit(xTrainEff, yTrainEff) # LogisticRegression(max_iter=1000) cmTrain += confusion_matrix(clf.predict(xTrain), yTrain, labels) cmTest += confusion_matrix(clf.predict(xTest), yTest, labels) # print('cmTrain\n', cmTrain) # print('cmTest\n', cmTest) # Accuracy accTrain = weighted_accuracy(cmTrain) accTest = weighted_accuracy(cmTest) rez = {"accTrain": accTrain, "accTest": accTest} if havePVal: rez = {**rez, **test_classifier_significance(nA, nB, cmTest)} # rez = {**rez, **test_classifier_significance(nA, nB, len(yTest), accTest)} return rez
def scatter_metric_bulk(ds, metricName, nameSuffix, prepFunc=None, xlim=None, ylim=None, yscale=None, verbose=True, xFunc=None, haveRegression=False):#, dropCols=None): # 1. Extract all results for this test dfAll = ds.list_dsets_pd().fillna('None') # if dropCols is not None: # dfAll = dfAll.drop(dropCols, axis=1) dfAnalysis = pd_query(dfAll, {'metric' : metricName, "name" : nameSuffix}) dfAnalysis = pd_move_cols_front(dfAnalysis, ['metric', 'name', 'mousename']) # Move leading columns forwards for more informative printing/saving dfAnalysis = dfAnalysis.drop(['target_dim', 'datetime', 'shape'], axis=1) if 'performance' in dfAnalysis.columns: dfAnalysis = dfAnalysis[dfAnalysis['performance'] == 'None'].drop(['performance'], axis=1) # Loop over all other columns except mousename colsExcl = list(set(dfAnalysis.columns) - {'mousename', 'dset'}) for colVals, dfSub in dfAnalysis.groupby(colsExcl): fig, ax = plt.subplots() if verbose: print(list(colVals)) xLst = [] yLst = [] for idxMouse, rowMouse in dfSub.sort_values(by='mousename').iterrows(): print(list(rowMouse.values)) dataThis = ds.get_data(rowMouse['dset']) if prepFunc is not None: dataThis = prepFunc(dataThis) # if datatype == 'raw': # nTrialThis = dataDB.get_ntrial_bytype({'mousename' : row['mousename']}, trialType=trialType, performance=performance) # dataThis *= np.sqrt(48*nTrialThis) # print('--', row['mousename'], nTrialThis) x = np.arange(len(dataThis)) if xFunc is None else np.array(xFunc(rowMouse['mousename'], len(dataThis))) print(dataThis.shape) x, dataThis = drop_nan_rows([x, dataThis]) print(dataThis.shape) ax.plot(x, dataThis, '.', label=rowMouse['mousename']) xLst += [x] yLst += [dataThis] if yscale is not None: plt.yscale(yscale) dataName = rowMouse.drop(['dset', 'mousename']) dataName = '_'.join([str(el) for el in dataName]) ax.legend() ax.set_xlim(xlim) ax.set_ylim(ylim) if haveRegression: sns.regplot(ax=ax, x=np.hstack(xLst), y=np.hstack(yLst), scatter=False) prefixPath = 'pics/bulk/' + metricName + '/' make_path(prefixPath) fig.savefig(prefixPath + dataName + '.png') plt.close()