Python drop_nan_rows Beispiele, mesostat.stat.machinelearning.drop_nan_rows Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: autoregression.py Projekt: HelmchenLabSoftware/mesostat-dev

def ar_testerr_non_uniform(dataLst, settings):
    testFrac = settings['testfrac'] if 'testfrac' in settings.keys() else 0.1

    dataLst2D = _preprocess_ar_non_uniform(dataLst, settings)
    x, y = drop_nan_rows(
        splitter.split2D_non_unifrom(dataLst2D, settings['hist']))
    return _ar_2D_testerr(x, y, testFrac)

Beispiel #2

0

Datei anzeigen

def plot_metric_bulk_1D(dataDB, ds, metricName, nameSuffix, prepFunc=None, xlim=None, ylim=None, yscale=None,
                     verbose=True, xFunc=None, haveTimeLabels=False):#, dropCols=None):
    # 1. Extract all results for this test
    dfAll = ds.list_dsets_pd().fillna('None')
    # if dropCols is not None:
    #     dfAll = dfAll.drop(dropCols, axis=1)

    dfAnalysis = pd_query(dfAll, {'metric' : metricName, "name" : nameSuffix})
    dfAnalysis = pd_move_cols_front(dfAnalysis, ['metric', 'name', 'mousename'])  # Move leading columns forwards for more informative printing/saving
    dfAnalysis = dfAnalysis.drop(['target_dim', 'datetime', 'shape'], axis=1)

    # Loop over all other columns except mousename
    colsExcl = list(set(dfAnalysis.columns) - {'mousename', 'dset'})

    for colVals, dfSub in dfAnalysis.groupby(colsExcl):
        fig, ax = plt.subplots(figsize=(4, 4))

        if verbose:
            print(list(colVals))

        for idxMouse, rowMouse in dfSub.sort_values(by='mousename').iterrows():
            print(list(rowMouse.values))

            dataThis = ds.get_data(rowMouse['dset'])
            assert dataThis.ndim == 1, 'Only using 1D data for this plot function'

            if prepFunc is not None:
                dataThis = prepFunc(dataThis)

            #                     if datatype == 'raw':
            #                         nTrialThis = dataDB.get_ntrial_bytype({'mousename' : row['mousename']}, trialType=trialType, performance=performance)
            #                         dataThis *= np.sqrt(48*nTrialThis)
            #                         print('--', row['mousename'], nTrialThis)

            x = np.arange(len(dataThis)) if xFunc is None else np.array(xFunc(rowMouse['mousename'], len(dataThis)))
            x, dataThis = drop_nan_rows([x, dataThis])

            ax.plot(x, dataThis, label=rowMouse['mousename'])

        if yscale is not None:
            ax.set_yscale(yscale)

        if haveTimeLabels:
            dataDB.label_plot_timestamps(ax, linecolor='y', textcolor='k', shX=-0.5, shY=0.05)

        dataName = rowMouse.drop(['dset', 'mousename'])
        dataName = '_'.join([str(el) for el in dataName])

        prefixPath = 'pics/bulk/' + metricName + '/'
        make_path(prefixPath)

        ax.legend()
        ax.set_xlim(xlim)
        ax.set_ylim(ylim)
        ax.set_xlabel(nameSuffix)
        ax.set_ylabel(metricName)
        plt.savefig(prefixPath + dataName + '.png', dpi=200)
        plt.close()

Beispiel #3

0

Datei anzeigen

Datei: npeet.py Projekt: HelmchenLabSoftware/mesostat-dev

def average_predictive_info(data, settings):
    x, y = drop_nan_rows(split3D(data, settings['max_lag']))

    nSample, nProcess = x.shape
    if nSample < 5 + 5 * nProcess:
        # If there are too few samples, there is no point to calculate anything
        return np.array(np.nan)
    else:
        return ee.mi(x, y) / nProcess

Beispiel #4

0

Datei anzeigen

Datei: autoregression.py Projekt: HelmchenLabSoftware/mesostat-dev

def _preprocess_mar_inp(data, inp, nHist):
    x, y = splitter.split3D(data, nHist)

    assert inp.ndim == 3, "Input matrix must be a 3D matrix"
    assert np.prod(inp.shape) != 0, "Input matrix is degenerate"
    nTr, nCh, nT = data.shape
    nTrInp, nChInp, nTInp = inp.shape
    assert nTr == nTrInp, "Input shape must be consistent with data shape"
    assert nT == nTInp, "Input shape must be consistent with data shape"

    # Convert input into the form (rps) -> (r*s, p)
    inpCanon = numpy_transpose_byorder(inp, 'rps', 'rsp')
    u = numpy_merge_dimensions(inpCanon[:, nHist:], 0, 2)

    # Drop any nan rows that are present in the data or input
    return drop_nan_rows([x, y, u])

Beispiel #5

0

Datei anzeigen

Datei: autoregression.py Projekt: HelmchenLabSoftware/mesostat-dev

def _preprocess_mar_inp_non_uniform(dataLst, inpLst, nHist):
    x, y = splitter.split3D_non_uniform(dataLst, nHist)

    assert len(dataLst) == len(
        inpLst), "Input must have same number of trials as data"
    for data, inp in zip(dataLst, inpLst):
        assert inp.ndim == 2, "Input must be a list of 2D matrices"
        assert inp.shape[1] == data.shape[
            1], "Input must have same number of timesteps as data"

    # Test that input has the same number of features for each trial
    nChInp = list_assert_get_uniform_shape(inpLst, axis=1)

    # shape transform for y :: (rps) -> (r*s, p)
    u = [inp[:, nHist:].T for inp in inpLst]  # (rps) -> (rsp)
    u = np.concatenate(u, axis=0)  # (rsp) -> (r*s, p)

    # Drop any nan rows that are present in the data or input
    return drop_nan_rows([x, y, u])

Beispiel #6

0

Datei anzeigen

Datei: npeet.py Projekt: HelmchenLabSoftware/mesostat-dev

def average_predictive_info_non_uniform(dataLst, settings):
    # Test that all trials have sufficient timesteps for lag estimation
    nSampleMin = np.min(set_list_shapes(dataLst, axis=1))
    if nSampleMin <= settings['max_lag']:
        raise ValueError('lag', settings['max_lag'], 'cannot be estimated for number of timesteps', nSampleMin)

    xLst = []
    yLst = []
    for dataTrial in dataLst:
        x, y = drop_nan_rows(split3D(dataTrial, settings['max_lag']))
        xLst += [x]
        yLst += [y]
    xArr = np.vstack(xLst)
    yArr = np.vstack(yLst)

    nSample, nProcess = xArr.shape
    if nSample < 4 * nProcess:
        # If there are too few samples, there is no point to calculate anything
        return np.array(np.nan)
    else:
        return ee.mi(xArr, yArr) / nProcess

Beispiel #7

0

Datei anzeigen

Datei: autoregression.py Projekt: HelmchenLabSoftware/mesostat-dev

def mar_testerr_non_uniform(dataLst, settings):
    testFrac = settings['testfrac'] if 'testfrac' in settings.keys() else 0.1

    x, y = drop_nan_rows(
        splitter.split3D_non_uniform(dataLst, settings['hist']))
    return _mar3D_testerr(x, y, testFrac)

Beispiel #8

0

Datei anzeigen

Datei: autoregression.py Projekt: HelmchenLabSoftware/mesostat-dev

def mar1_coeff_non_uniform(dataLst, settings):
    x, y = drop_nan_rows(splitter.split3D_non_uniform(dataLst, 1))
    return _mar3D_alpha(x, y)

Beispiel #9

0

Datei anzeigen

Datei: autoregression.py Projekt: HelmchenLabSoftware/mesostat-dev

def ar1_coeff_non_uniform(dataLst3D, settings):
    dataLst2D = _preprocess_ar_non_uniform(dataLst3D, settings)
    x, y = drop_nan_rows(splitter.split2D_non_unifrom(dataLst2D, 1))
    return _ar_2D_alpha(x, y)

Beispiel #10

0

Datei anzeigen

Datei: autoregression.py Projekt: HelmchenLabSoftware/mesostat-dev

def mar1_testerr(data, settings):
    testFrac = settings['testfrac'] if 'testfrac' in settings.keys() else 0.1
    x, y = drop_nan_rows(splitter.split3D(data, 1))
    return _mar3D_testerr(x, y, testFrac)

Beispiel #11

0

Datei anzeigen

Datei: autoregression.py Projekt: HelmchenLabSoftware/mesostat-dev

def mar1_coeff(data, settings):
    x, y = drop_nan_rows(splitter.split3D(data, 1))
    return _mar3D_alpha(x, y)

Beispiel #12

0

Datei anzeigen

Datei: autoregression.py Projekt: HelmchenLabSoftware/mesostat-dev

def ar1_testerr(data, settings):
    testFrac = settings['testfrac'] if 'testfrac' in settings.keys() else 0.1
    data2D = _preprocess_ar(data, settings)
    x, y = drop_nan_rows(splitter.split2D(data2D, 1))
    return _ar_2D_testerr(x, y, testFrac)

Beispiel #13

0

Datei anzeigen

Datei: autoregression.py Projekt: HelmchenLabSoftware/mesostat-dev

def ar1_coeff(data, settings):
    data2D = _preprocess_ar(data, settings)
    x, y = drop_nan_rows(splitter.split2D(data2D, 1))
    return _ar_2D_alpha(x, y)

Beispiel #14

0

Datei anzeigen

def poly_fit_transform(x, y, ord):
    xEff, yEff = drop_nan_rows([x, y])
    coeff = np.polyfit(xEff, yEff, ord)  # Fit to data without nans
    p = np.poly1d(coeff)
    return p(x)  # Evaluate for original data

Beispiel #15

0

Datei anzeigen

Datei: classification.py Projekt: HelmchenLabSoftware/mesostat-dev

def binary_classifier(data1,
                      data2,
                      classifier,
                      method="kfold",
                      k=10,
                      balancing=False,
                      pcaThr=None,
                      havePVal=False):
    # Convert data to labeled form
    labels = [-1, 1]
    x, y = label_binary_data(data1, data2, *labels)

    # Drop NAN values
    xNoNan, yNoNan = drop_nan_rows([x, y])

    if pcaThr is not None:
        xNoNan = dim_reduction(xNoNan, pcaThr)
        print('Reduced number of dimensions to', xNoNan.shape[1])

    # map labels to binary variable
    nData = len(yNoNan)
    if nData == 0:
        print("Warning: dataset had zero non-nan rows")
        return {"acc_train": 0, "acc_test": 0, "acc_naive": 0, "p-value": 1}

    nA = np.sum(yNoNan == 1)  # Number of points with label 1
    nB = nData - nA  # Number of points with label -1

    if (nA < 2) or (nB < 2):
        print("Warning: unexpected number of labels", nA, nB,
              "; aborting classification")
        return 0, 0

    # Add extra dimension if X is 1D
    if xNoNan.ndim == 1:
        xNoNan = xNoNan[:, None]
        print('Warning: Got 1D data, had to add extra dimension')

    cmTrain = np.zeros((2, 2), dtype=int)
    cmTest = np.zeros((2, 2), dtype=int)

    cvfunc = select_cv_iterator(method, xNoNan, yNoNan, k)
    for xTrain, yTrain, xTest, yTest in cvfunc:
        if balancing:
            xTrainEff, yTrainEff = balance_oversample(xTrain, yTrain, labels)
        else:
            xTrainEff, yTrainEff = xTrain, yTrain

        clf = classifier.fit(xTrainEff, yTrainEff)
        # LogisticRegression(max_iter=1000)

        cmTrain += confusion_matrix(clf.predict(xTrain), yTrain, labels)
        cmTest += confusion_matrix(clf.predict(xTest), yTest, labels)

    # print('cmTrain\n', cmTrain)
    # print('cmTest\n', cmTest)

    # Accuracy
    accTrain = weighted_accuracy(cmTrain)
    accTest = weighted_accuracy(cmTest)
    rez = {"accTrain": accTrain, "accTest": accTest}
    if havePVal:
        rez = {**rez, **test_classifier_significance(nA, nB, cmTest)}
        # rez = {**rez, **test_classifier_significance(nA, nB, len(yTest), accTest)}

    return rez

Beispiel #16

0

Datei anzeigen

def scatter_metric_bulk(ds, metricName, nameSuffix, prepFunc=None, xlim=None, ylim=None, yscale=None,
                        verbose=True, xFunc=None, haveRegression=False):#, dropCols=None):
    # 1. Extract all results for this test
    dfAll = ds.list_dsets_pd().fillna('None')
    # if dropCols is not None:
    #     dfAll = dfAll.drop(dropCols, axis=1)

    dfAnalysis = pd_query(dfAll, {'metric' : metricName, "name" : nameSuffix})
    dfAnalysis = pd_move_cols_front(dfAnalysis, ['metric', 'name', 'mousename'])  # Move leading columns forwards for more informative printing/saving
    dfAnalysis = dfAnalysis.drop(['target_dim', 'datetime', 'shape'], axis=1)

    if 'performance' in dfAnalysis.columns:
        dfAnalysis = dfAnalysis[dfAnalysis['performance'] == 'None'].drop(['performance'], axis=1)

    # Loop over all other columns except mousename
    colsExcl = list(set(dfAnalysis.columns) - {'mousename', 'dset'})

    for colVals, dfSub in dfAnalysis.groupby(colsExcl):
        fig, ax = plt.subplots()

        if verbose:
            print(list(colVals))

        xLst = []
        yLst = []
        for idxMouse, rowMouse in dfSub.sort_values(by='mousename').iterrows():
            print(list(rowMouse.values))

            dataThis = ds.get_data(rowMouse['dset'])

            if prepFunc is not None:
                dataThis = prepFunc(dataThis)

            #                     if datatype == 'raw':
            #                         nTrialThis = dataDB.get_ntrial_bytype({'mousename' : row['mousename']}, trialType=trialType, performance=performance)
            #                         dataThis *= np.sqrt(48*nTrialThis)
            #                         print('--', row['mousename'], nTrialThis)

            x = np.arange(len(dataThis)) if xFunc is None else np.array(xFunc(rowMouse['mousename'], len(dataThis)))
            print(dataThis.shape)

            x, dataThis = drop_nan_rows([x, dataThis])
            print(dataThis.shape)

            ax.plot(x, dataThis, '.', label=rowMouse['mousename'])
            xLst += [x]
            yLst += [dataThis]

        if yscale is not None:
            plt.yscale(yscale)

        dataName = rowMouse.drop(['dset', 'mousename'])
        dataName = '_'.join([str(el) for el in dataName])

        ax.legend()
        ax.set_xlim(xlim)
        ax.set_ylim(ylim)

        if haveRegression:
            sns.regplot(ax=ax, x=np.hstack(xLst), y=np.hstack(yLst), scatter=False)

        prefixPath = 'pics/bulk/' + metricName + '/'
        make_path(prefixPath)

        fig.savefig(prefixPath + dataName + '.png')
        plt.close()