Example #1
0
def test_corr():
    "Test stats.corr"
    ds = datasets.get_uts()
    y = ds.eval("uts.x[:,:3]")
    x = ds.eval('Y.x')
    n_cases = len(y)
    df = n_cases - 2

    corr = stats.corr(y, x)
    p = stats.rtest_p(corr, df)
    for i in range(len(corr)):
        r_sp, p_sp = scipy.stats.pearsonr(y[:, i], x)
        assert corr[i] == pytest.approx(r_sp)
        assert p[i] == pytest.approx(p_sp)

    # NaN
    with warnings.catch_warnings():  # divide by 0
        warnings.simplefilter("ignore")
        assert stats.corr(np.arange(10), np.zeros(10)) == 0

    # perm
    y_perm = np.empty_like(y)
    for perm in permute_order(n_cases, 2):
        y_perm[perm] = y
        stats.corr(y, x, corr, perm)
        for i in range(len(corr)):
            r_sp, _ = scipy.stats.pearsonr(y_perm[:, i], x)
            assert corr[i] == pytest.approx(r_sp)
Example #2
0
def test_corr():
    "Test stats.corr"
    ds = datasets.get_uts()
    y = ds.eval("uts.x[:,:3]")
    x = ds.eval('Y.x')
    n_cases = len(y)
    df = n_cases - 2

    corr = stats.corr(y, x)
    p = stats.rtest_p(corr, df)
    for i in xrange(len(corr)):
        r_sp, p_sp = scipy.stats.pearsonr(y[:, i], x)
        assert_almost_equal(corr[i], r_sp)
        assert_almost_equal(p[i], p_sp)

    # NaN
    r = stats.corr(np.arange(10), np.zeros(10))
    eq_(r, 0)

    # perm
    y_perm = np.empty_like(y)
    for perm in permute_order(n_cases, 2):
        y_perm[perm] = y
        stats.corr(y, x, corr, perm)
        for i in xrange(len(corr)):
            r_sp, _ = scipy.stats.pearsonr(y_perm[:, i], x)
            assert_almost_equal(corr[i], r_sp)
Example #3
0
def test_corr():
    "Test stats.corr"
    ds = datasets.get_uts()
    y = ds.eval("uts.x[:,:3]")
    x = ds.eval('Y.x')
    n_cases = len(y)
    df = n_cases - 2

    corr = stats.corr(y, x)
    p = stats.rtest_p(corr, df)
    for i in range(len(corr)):
        r_sp, p_sp = scipy.stats.pearsonr(y[:, i], x)
        assert_almost_equal(corr[i], r_sp)
        assert_almost_equal(p[i], p_sp)

    # NaN
    with warnings.catch_warnings():  # divide by 0
        warnings.simplefilter("ignore")
        eq_(stats.corr(np.arange(10), np.zeros(10)), 0)

    # perm
    y_perm = np.empty_like(y)
    for perm in permute_order(n_cases, 2):
        y_perm[perm] = y
        stats.corr(y, x, corr, perm)
        for i in range(len(corr)):
            r_sp, _ = scipy.stats.pearsonr(y_perm[:, i], x)
            assert_almost_equal(corr[i], r_sp)
Example #4
0
 def _corr_all_orig(self, pref):
     df = []
     for dim in self.myexp.dims:
         dim_data = load(pref='dis', exp=self.myexp.exp, suffix=dim)[dim]
         if dim_data.ndim == 3:
             dim_data = np.mean(dim_data, axis=0)
         for depth, model_name in self.myexp.models:
             self.myexp.set_model(model_name)
             dis = self.myexp.dissimilarity()
             layer = dis.keys()[-1]
             dis = dis[layer]
             corr = stats.corr(dis, dim_data, sel='upper')
             if self.myexp.bootstrap:
                 print('bootstrapping stats...')
                 bf = stats.bootstrap_resample(
                     dis,
                     dim_data,
                     func=stats.corr,
                     ci=None,
                     seed=0,
                     sel='upper',
                     struct=self.dims[dim].ravel())
                 for i, b in enumerate(bf):
                     df.append([dim, depth, model_name, layer, corr, i, b])
             else:
                 df.append([dim, depth, model_name, layer, corr, 0, np.nan])
     df = pandas.DataFrame(df,
                           columns=[
                               'kind', 'depth', 'models', 'layer',
                               'correlation', 'iter', 'bootstrap'
                           ])
     self.save(df, pref=pref)
     return df
Example #5
0
def calc(names, data, item, r, d, model=RandomForestRegressor()):
    model = make_model(data, names, False, r, d, model)
    graph = util.sequence_dotbracket_to_graph(data[item][1], data[item][2])
    res = np.array(predict(model, graph))
    other = np.array(data[item][0])

    res, other = mask(res, other)
    value = corr(res, other)[0]
    #print '\t',len(data[item][1]),"\t", value
    return value
Example #6
0
def correlation_all(trees):
    from scipy.stats import pearsonr as corr

    #We are assuming each data set is normally distributed
    tscores_all = np.array([])
    rscores_all = np.array([])

    for tree in trees:
        tscores, rscores = tree.scores()

        tscores_all = np.concatenate((tscores_all, tscores))
        rscores_all = np.concatenate((rscores_all, rscores))

    return corr(tscores_all, rscores_all)
Example #7
0
def correlation_all(trees):
    from scipy.stats import pearsonr as corr

    # We are assuming each data set is normally distributed
    tscores_all = np.array([])
    rscores_all = np.array([])

    for tree in trees:
        tscores, rscores = tree.scores()

        tscores_all = np.concatenate((tscores_all, tscores))
        rscores_all = np.concatenate((rscores_all, rscores))

    return corr(tscores_all, rscores_all)
Example #8
0
    def corr(self):
        dis = self.dissimilarity()
        df = []
        nname = models.NICE_NAMES[self.model_name].lower()
        for dim in self.dims:
            dim_data = load(pref='dis', exp=self.exp, suffix=dim)
            if dim_data is None:
                name = self.model_name
                self.set_model(dim)
                dim_data = self.dissimilarity()
                self.set_model(name)
                if dim_data is None:
                    raise Exception('dimension data %s cannot be obtained' %
                                    dim)

            dim_data = dim_data[dim]
            if dim_data.ndim == 3:
                dim_data = np.mean(dim_data, axis=0)
            struct = self.dims[dim] if self.exp in ['fonts', 'stefania'
                                                    ] else None
            if self.filter:
                dim_data = dim_data[self.sel][:, self.sel]
                struct = None
            for layer, data in dis.items():
                d = data[self.sel][:, self.sel] if self.filter else data
                corr = stats.corr(d, dim_data, sel='upper')
                if self.bootstrap:
                    print('bootstrapping stats...')
                    bf = stats.bootstrap_resample(d,
                                                  dim_data,
                                                  func=stats.corr,
                                                  ci=None,
                                                  seed=0,
                                                  sel='upper',
                                                  struct=struct)
                    for i, b in enumerate(bf):
                        df.append([dim, nname, layer, corr, i, b])
                else:
                    df.append([dim, nname, layer, corr, 0, np.nan])
        df = pandas.DataFrame(df,
                              columns=[
                                  'kind', 'models', 'layer', 'correlation',
                                  'iter', 'bootstrap'
                              ])
        self.save(df, pref='corr')
        if self.task == 'run':
            self.plot_single(df, 'corr')
        return df
Example #9
0
def get_corr(ts1, ts2, period):
    '''
        ts1 and ts2 are the two traces for computing the correlation
        period is how long the trace is in unit of day
    '''
    # resample the time-series to have equal length
    if len(ts1)==len(ts2):
        pass
    elif len(ts1)>len(ts2):
        ts1 = resample(ts1,len(ts2))
    else:
        ts2 = resample(ts2,len(ts1))

    # parameter for filter
    fs = float(len(ts1))/(period*24*3600)
    lowcut = 1.0/(6*3600) #6hrs
    highcut = 1.0/(20*60) #20mins
    out_ts1 = butter_bandpass_filter(ts1, lowcut, highcut, fs, order=3)
    out_ts2 = butter_bandpass_filter(ts2, lowcut, highcut, fs, order=3)

    #return the corr-coef
    return corr(out_ts1, out_ts2)[0]
def write_sig_data(round, clim_data, hindcast, ensemble):
    from scipy.stats import pearsonr as corr
    rho, p = corr(clim_data, hindcast)

    #_Write the round and model correlation
    line = '%i,%.2f,' % (round, rho)

    #_Get whole model hss and rpss, write to file.
    hss = HSS(clim_data, hindcast)
    rpss = RPSS(clim_data, ensemble)
    line = line + '%.2f,%.2f,' % (rpss['all'], hss['all'])

    #_Loop through driest/wettest 10/20/30 years and write information
    for n in [10, 20, 30]:
        hss = HSS(clim_data, hindcast, n, n )
        rpss = RPSS(clim_data, ensemble, n, n )
        line = line +   '%.2f,%.2f,%.2f,%.2f,' % \
                        (rpss['dry'], rpss['wet'], hss['dry'], hss['wet'])
    #_Remove the last comma and return to newline
    line = line[:-1] + '\n'

    return line
Example #11
0
    def corr(self):
        dis = self.dissimilarity()
        df = []
        nname = models.NICE_NAMES[self.model_name].lower()
        for dim in self.dims:
            dim_data = load(pref="dis", exp=self.exp, suffix=dim)
            if dim_data is None:
                name = self.model_name
                self.set_model(dim)
                dim_data = self.dissimilarity()
                self.set_model(name)
                if dim_data is None:
                    raise Exception("dimension data %s cannot be obtained" % dim)

            dim_data = dim_data[dim]
            if dim_data.ndim == 3:
                dim_data = np.mean(dim_data, axis=0)
            struct = self.dims[dim] if self.exp in ["fonts", "stefania"] else None
            if self.filter:
                dim_data = dim_data[self.sel][:, self.sel]
                struct = None
            for layer, data in dis.items():
                d = data[self.sel][:, self.sel] if self.filter else data
                corr = stats.corr(d, dim_data, sel="upper")
                if self.bootstrap:
                    print("bootstrapping stats...")
                    bf = stats.bootstrap_resample(
                        d, dim_data, func=stats.corr, ci=None, seed=0, sel="upper", struct=struct
                    )
                    for i, b in enumerate(bf):
                        df.append([dim, nname, layer, corr, i, b])
                else:
                    df.append([dim, nname, layer, corr, 0, np.nan])
        df = pandas.DataFrame(df, columns=["kind", "models", "layer", "correlation", "iter", "bootstrap"])
        self.save(df, pref="corr")
        if self.task == "run":
            self.plot_single(df, "corr")
        return df
Example #12
0
 def _corr_all_orig(self, pref):
     df = []
     for dim in self.myexp.dims:
         dim_data = load(pref="dis", exp=self.myexp.exp, suffix=dim)[dim]
         if dim_data.ndim == 3:
             dim_data = np.mean(dim_data, axis=0)
         for depth, model_name in self.myexp.models:
             self.myexp.set_model(model_name)
             dis = self.myexp.dissimilarity()
             layer = dis.keys()[-1]
             dis = dis[layer]
             corr = stats.corr(dis, dim_data, sel="upper")
             if self.myexp.bootstrap:
                 print("bootstrapping stats...")
                 bf = stats.bootstrap_resample(
                     dis, dim_data, func=stats.corr, ci=None, seed=0, sel="upper", struct=self.dims[dim].ravel()
                 )
                 for i, b in enumerate(bf):
                     df.append([dim, depth, model_name, layer, corr, i, b])
             else:
                 df.append([dim, depth, model_name, layer, corr, 0, np.nan])
     df = pandas.DataFrame(df, columns=["kind", "depth", "models", "layer", "correlation", "iter", "bootstrap"])
     self.save(df, pref=pref)
     return df
Example #13
0
def test_ml(stock='F',
            forecast_out=5,
            month=None,
            day=None,
            year=2019,
            plot=False,
            volume=False):
    # Assume input day is valid trading day
    # Want to separate 1 percent of the data to forecast
    # Today info
    if (month == None or day == None):
        today = datetime.datetime.now()
        month = today.month
        day = today.day

    end_date = dt(year, month, day)
    trading_days = get_trading_days([2017, 2018, 2019])

    end_idx = np.where(end_date == trading_days)[0][0]
    end = trading_days[end_idx - forecast_out]
    new_start = trading_days[end_idx - forecast_out]
    new_end = trading_days[end_idx]

    # For prediction
    start = datetime.datetime(2016, 4, 1)

    df = read_data(stock, start, end)

    #df = web.DataReader(stock, 'yahoo', start, end)
    #print(df.index)
    df = read_data(stock, start, end)
    if (df.empty):
        #print("SHOULD BE EMPTY")
        return [0] * 10, "ERROR"

    df = df[df.index <= end]
    #print(df.tail(forecast_out))
    dfreg = df.loc[:, ['adjusted close', 'volume']]
    dfreg['HL_PCT'] = (df['high'] - df['low']) / df['adjusted close'] * 100.0
    dfreg['PCT_change'] = (df['adjusted close'] -
                           df['open']) / df['open'] * 100.0

    # For volume testing
    if (volume):
        dfreg['adjusted close'] = dfreg['volume']

    dfreg['EMA'] = get_ema(dfreg, forecast_out)
    if (dfreg['EMA'].empty):
        return [0] * 10, "ERROR"

    dfreg['old close'] = dfreg['adjusted close']
    dfreg['adjusted close'] = dfreg['EMA']

    # For validation
    #print("NEW START: \t{}".format(new_start))
    #print("NEW END: \t{}".format(new_end))
    #print("VALIDATION START: {} END: {}\n".format(new_start, new_end))
    #new_df = web.DataReader(stock, 'yahoo', new_start, new_end)
    new_df = read_data(stock, new_start, new_end)
    #print("TESTING VALIDATION DATA")
    if (new_df.empty):
        return [0] * 10, "ERROR"
    #print(new_end)
    new_df = new_df[new_df.index <= new_end]
    #print(new_df)
    #exit(1)
    new_dfreg = new_df.loc[:, ['adjusted close', 'volume']]
    new_dfreg['HL_PCT'] = (new_df['high'] -
                           new_df['low']) / new_df['adjusted close'] * 100.0
    new_dfreg['PCT_change'] = (new_df['adjusted close'] -
                               new_df['open']) / new_df['open'] * 100.0

    # Drop missing value
    dfreg.fillna(value=-99999, inplace=True)
    new_dfreg.fillna(value=-99999, inplace=True)

    # Searating the label here, we want to predict the Adjclose
    forecast_col = 'adjusted close'
    dfreg['label'] = dfreg[forecast_col].shift(-forecast_out)
    X = np.array(dfreg.drop(['label'], 1))

    # Scale X for linear regression
    X = preprocessing.scale(X)

    # Finally want late X and early X for model
    X_lately = X[-forecast_out:]
    X = X[:-forecast_out]

    # Separate label and identify it as y
    y = np.array(dfreg['label'])
    y = y[:-forecast_out]

    # Training and testing sets
    X_train = X[:len(X) - forecast_out]
    X_test = X[len(X) - forecast_out:]

    y_train = y[:len(y) - forecast_out]
    y_test = y[len(y) - forecast_out:]

    # LinReg
    clfreg = LinearRegression(n_jobs=-1)

    # QuadReg2
    clfpoly2 = make_pipeline(PolynomialFeatures(2), Ridge())

    # QuadReg3
    clfpoly3 = make_pipeline(PolynomialFeatures(3), Ridge())

    # QuadReg4
    clfpoly4 = make_pipeline(PolynomialFeatures(4), Ridge())

    # QuadReg5
    clfpoly5 = make_pipeline(PolynomialFeatures(5), Ridge())

    # KNN Regression
    clfknn = KNeighborsRegressor(n_neighbors=2)

    # Bayesian Ridge
    clfbayr = BayesianRidge()

    # Neural Network
    clfmlp = MLPRegressor(hidden_layer_sizes=(100, 100, 100),
                          learning_rate='adaptive',
                          solver='adam',
                          max_iter=5,
                          verbose=False)

    # Random Forest Regressor
    clfrfr = RFR(n_estimators=15)

    # Support Vector Regressor
    clfsvr = SVR(gamma='auto')

    threads = []
    models = [
        clfreg, clfpoly2, clfpoly3, clfpoly4, clfpoly5, clfknn, clfbayr,
        clfrfr, clfsvr
    ]
    fits = [''] * len(models)
    for i in range(len(models)):
        process = Thread(target=fitting,
                         args=[models[i], X_train, y_train, fits, i],
                         name=stock)
        process.start()
        threads.append(process)

    for process in threads:
        process.join()

    start = time.time()
    try:
        reg_forecast = fits[0].predict(X_lately)
        poly2_forecast = fits[1].predict(X_lately)
        poly3_forecast = fits[2].predict(X_lately)
        poly4_forecast = fits[3].predict(X_lately)
        poly5_forecast = fits[4].predict(X_lately)
        try:
            knn_forecast = fits[5].predict(X_lately)
        except ValueError:
            #print("KNN ERROR: {}".format(stock))
            #print("F*****g really: {}".format(stock))
            #print(X_lately)
            #print(X_lately.shape)
            knn_forecast = np.zeros(poly5_forecast.shape)
            #exit(1)
        bayr_forecast = fits[6].predict(X_lately)
        rfr_forecast = fits[7].predict(X_lately)
        svr_forecast = fits[8].predict(X_lately)
        mlp_forecast = fits[6].predict(X_lately)
    except AttributeError:
        #print("ISSUES WITH {}".format(stock))
        return [0] * 10, {}
        #print(fits)
        #print(threads)
        #print(X_train, y_train)
        #print(X, y)
        #print(stock)
        #print(dfreg)
        #exit(1)
    #mlp_forecast = clfmlp.predict(X_lately)

    # Set up dataframe
    dfreg['reg_forecast'] = np.nan
    dfreg['poly2_forecast'] = np.nan
    dfreg['poly3_forecast'] = np.nan
    dfreg['poly4_forecast'] = np.nan
    dfreg['poly5_forecast'] = np.nan
    dfreg['knn_forecast'] = np.nan
    dfreg['bayr_forecast'] = np.nan
    dfreg['mlp_forecast'] = np.nan
    dfreg['rfr_forecast'] = np.nan
    dfreg['svr_forecast'] = np.nan

    last_date = dfreg.iloc[-1].name
    last_unix = last_date
    next_unix = last_unix + datetime.timedelta(days=1)
    for i in zip(reg_forecast, poly2_forecast, poly3_forecast, poly4_forecast,
                 poly5_forecast, knn_forecast, bayr_forecast, mlp_forecast,
                 rfr_forecast, svr_forecast):
        next_date = next_unix
        next_unix += datetime.timedelta(days=1)
        dfreg.loc[next_date] = list(
            [np.nan for _ in range(len(dfreg.columns) - 10)] + list(i))

    #dfreg['mean_forecast'] = dfreg[['poly2_forecast', 'poly3_forecast']].mean(axis=1)
    #print(dfreg.tail(forecast_out+1))
    dfreg['mean_forecast'] = dfreg[[
        'reg_forecast',
        'poly2_forecast',
        'poly3_forecast',
        'knn_forecast',
        'bayr_forecast',  # mlp_forecast,
        'rfr_forecast',
        'svr_forecast'
    ]].mean(axis=1)

    as_list = dfreg.index.tolist()
    # I THINK THIS IS FIXED
    #print(as_list[-forecast_out-5:])
    #for asd in as_list[-forecast_out-1:]:
    #    print(asd)
    #print()
    #for asd in new_df.index.tolist():#[:forecast_out]:
    #    print(asd)
    as_list[-forecast_out:] = new_df.index.tolist()[1:]
    try:
        dfreg.index = as_list
    except:
        print("DATA MISALIGNMENT FOR: {}".format(stock))
        #print(new_df)
        #print(dfreg.tail(forecast_out+1))
        #exit(1)
        return [0] * 10, {}
    #for asd in as_list[-forecast_out-5:]:
    #    print(asd)
    dfreg[-forecast_out:].index = new_df.index.tolist()[:forecast_out]
    #print(dfreg.tail(forecast_out+1))
    #return [None]*10, None
    #exit(1)

    #
    # Trying to do all combinations
    #
    forecasts = [
        'reg_forecast', 'poly2_forecast', 'poly3_forecast', 'poly4_forecast',
        'poly5_forecast', 'knn_forecast', 'bayr_forecast', 'rfr_forecast',
        'svr_forecast'
    ]

    if (plot):
        dfreg['old close'].tail(20).plot(figsize=(20, 12), lw=2)
        dfreg['adjusted close'].tail(20).plot(figsize=(20, 12), lw=2)
        dfreg['reg_forecast'].tail(20).plot(lw=0.5)
        dfreg['poly2_forecast'].tail(20).plot(lw=0.5)
        dfreg['poly3_forecast'].tail(20).plot(lw=0.5)
        dfreg['poly4_forecast'].tail(20).plot(lw=0.5)
        dfreg['poly5_forecast'].tail(20).plot(lw=0.5)
        dfreg['knn_forecast'].tail(20).plot(lw=0.5)
        dfreg['bayr_forecast'].tail(20).plot(lw=0.5)
        dfreg['mean_forecast'].tail(20).plot(c='k')
        #dfreg['mlp_forecast'].tail(20).plot()
        dfreg['rfr_forecast'].tail(20).plot(lw=0.5)
        dfreg['svr_forecast'].tail(20).plot(lw=0.5)

    new_dfreg['Actual close'] = new_df['adjusted close']
    if (plot):
        new_dfreg['Actual close'].tail(20).plot(c='g', lw=2)
    fit = np.polyfit([i for i in range(forecast_out)],
                     dfreg['mean_forecast'].values[-forecast_out:],
                     deg=1)

    #print("CALCULATING CORRELATION BETWEEN METHOD AND ACTUAL")
    actual = new_dfreg['Actual close'].tail(forecast_out)

    highest_corr = 0
    best_comb = ''
    num_combs = 0
    correlations = []
    good_combinations = []
    #for j in range(1,9):
    #    for comb in combinations(forecasts, j):
    #        num_combs += 1
    #        comb_dat = dfreg[[*list(comb)]].mean(axis=1).tail(forecast_out)
    #        new_correlation = corr(comb_dat, actual)[0]
    #        correlations.append(new_correlation)
    #        if(new_correlation > 0.4):
    #            good_combinations.append(comb)

    #        if(new_correlation > highest_corr):
    #            highest_corr = new_correlation
    #            best_comb = comb
    for comb in all_combinations:
        num_combs += 1
        comb_dat = dfreg[[*list(comb)]].mean(axis=1).tail(forecast_out)
        new_correlation = corr(comb_dat, actual)[0]
        correlations.append(new_correlation)
        if (new_correlation > 0.4):
            good_combinations.append(comb)

        if (new_correlation > highest_corr):
            highest_corr = new_correlation
            best_comb = comb

    reg_dat = dfreg['reg_forecast'].tail(forecast_out)
    reg_corr = corr(reg_dat, actual)
    #print("Linear Regression: {}".format(reg_corr))

    poly2_dat = dfreg['poly2_forecast'].tail(forecast_out)
    poly2_corr = corr(poly2_dat, actual)
    #print("Poly2: {}".format(poly2_corr))

    poly3_dat = dfreg['poly3_forecast'].tail(forecast_out)
    poly3_corr = corr(poly3_dat, actual)
    #print("Poly3: {}".format(poly3_corr))

    poly4_dat = dfreg['poly4_forecast'].tail(forecast_out)
    poly4_corr = corr(poly4_dat, actual)
    #print("Poly3: {}".format(poly3_corr))

    poly5_dat = dfreg['poly5_forecast'].tail(forecast_out)
    poly5_corr = corr(poly5_dat, actual)
    #print("Poly3: {}".format(poly3_corr))

    knn_dat = dfreg['knn_forecast'].tail(forecast_out)
    knn_corr = corr(knn_dat, actual)
    #print("K Nearest Neighbors: {}".format(knn_corr))

    bayr_dat = dfreg['bayr_forecast'].tail(forecast_out)
    bayr_corr = corr(bayr_dat, actual)
    #print("Bayesian: {}".format(bayr_corr))

    rfr_dat = dfreg['rfr_forecast'].tail(forecast_out)
    rfr_corr = corr(rfr_dat, actual)
    #print("Random Forest: {}".format(rfr_corr))

    svr_dat = dfreg['svr_forecast'].tail(forecast_out)
    svr_corr = corr(svr_dat, actual)
    #print("Support Vector: {}".format(rfr_corr))

    mean_dat = dfreg['mean_forecast'].tail(forecast_out)
    mean_corr = corr(mean_dat, actual)

    if (plot):
        plt.legend(loc='best')
        plt.xlabel('Date')
        plt.ylabel('Price')
        plt.title(stock)
        plt.savefig("./test_plots/{1}_{2}/{0}_{1}_{2}_{3}".format(
            stock, month, day, forecast_out))
        plt.close()

    return (reg_corr[0], poly2_corr[0], poly3_corr[0], poly4_corr[0], poly5_corr[0],\
           knn_corr[0], bayr_corr[0], rfr_corr[0], mean_corr[0], svr_corr[0]), good_combinations
Example #14
0
	def crossvalpcr(self, xval = True, debug = False):
		#Must set phase with bootcorr, and then use crossvalpcr, as it just uses the corr_grid attribute
		import numpy as np
		from numpy import array
		from scipy.stats import pearsonr as corr
		from scipy.stats import linregress
		from matplotlib import pyplot as plt
		from atmos_ocean_data import weightsst
		predictand = self.clim_data

		if self.corr_grid.mask.sum() >= len(self.sst.lat) * len(self.sst.lon) - 4:
			yhat = np.nan
			e = np.nan
			index = self.clim_data.index
			hindcast = pd.Series(data = yhat, index = index)
			error = pd.Series(data = e, index = index)
			self.correlation = np.nan
			self.hindcast = np.nan
			self.hindcast_error = np.nan
			self.flags['noSST'] = True
			return

		self.flags['noSST'] = False
		sstidx = self.corr_grid.mask == False
		n = len(predictand)
		yhat = np.zeros(n)
		e = np.zeros(n)
		idx = np.arange(n)

		params = []
		std_errs = []
		p_vals = []
		t_vals = []
		if not xval:
			rawSSTdata = weightsst(self.sst).data
			rawdata = rawSSTdata[:, sstidx]
			cvr = np.cov(rawdata.T)
			eigval, eigvec = np.linalg.eig(cvr)
			eigvalsort = np.argsort(eigval)[::-1]
			eigval = eigval[eigvalsort]
			eigval = np.real(eigval)
			ncomp = 1
			eof_1 = eigvec[:,:ncomp] #_fv stands for Feature Vector, in this case EOF-1
			eof_1 = np.real(eof_1)
			pc_1 = eof_1.T.dot(rawdata.T).squeeze()
			self.pc1 = pc_1
			return pc_1

		for i in idx:
			test = idx == i
			train = idx != i
			rawSSTdata = weightsst(self.sst).data[train]
			droppedSSTdata = weightsst(self.sst).data[test]
			rawdata = rawSSTdata[:, sstidx]#
			dropped_data = droppedSSTdata[:,sstidx].squeeze()

			#U, s, V = np.linalg.svd(rawdata)
			#pc_1 = V[0,:] #_Rows of V are principal components
			#eof_1 = U[:,0].squeeze() #_Columns are EOFS
			#EIGs = s**2 #_s is square root of eigenvalues

			cvr = np.cov(rawdata.T)
			eigval, eigvec = np.linalg.eig(cvr)
			eigvalsort = np.argsort(eigval)[::-1]
			eigval = eigval[eigvalsort]
			eigval = np.real(eigval)
			ncomp = 1
			eof_1 = eigvec[:,:ncomp] #_fv stands for Feature Vector, in this case EOF-1
			eof_1 = np.real(eof_1)
			pc_1 = eof_1.T.dot(rawdata.T).squeeze()

			slope, intercept, r_value, p_value, std_err = linregress(pc_1, predictand[train])
			predictor = dropped_data.dot(eof_1)
			yhat[i] = slope * predictor + intercept
			e[i] = predictand[i] - yhat[i]
			params.append(slope); std_errs.append(std_err); p_vals.append(p_value)
			t_vals.append(slope/std_err)

		r, p = corr(predictand, yhat)

		index = self.clim_data.index
		hindcast = pd.Series(data = yhat, index = index)
		error = pd.Series(data = e, index = index)
		self.hindcast = hindcast
		self.hindcast_error = error
		self.correlation = round(r, 2)
		self.reg_stats = {	'params' : array(params),
							'std_errs' : array(std_errs),
							't_vals' : array(t_vals),
							'p_vals' : array(p_vals)}

		return
Example #15
0
	def crossvalpcr(self, fig = None, ax = None, phase = 'allyears', onlySST = False, debug = False):
		#Must set phase with bootcorr, and then use crossvalpcr, as it just uses the corr_grid attribute
		import numpy as np
		from scipy.stats import pearsonr as corr
		from scipy.stats import linregress
		from matplotlib import pyplot as plt

		"""
		if fig == None:
			fig = plt.figure()
			ax = fig.add_subplot(111)
		"""
		#Set up predictand from climate data
		predictand = self.clim_data[phase]
		#predictand = (predictand - predictand.mean())/predictand.std()
		#self.predictand[phase] = predictand
		#Get an index of every significantly correlated gridpoint for the predictor fields
		if self.corr_grid['sst'][phase].mask.sum() >= 16019:
			print ('No sig SST or SLP')
			yhat = np.zeros(len(self.clim_data[phase]))
			e = np.zeros(len(self.clim_data[phase]))
			index = self.clim_data[phase].index
			hindcast = pd.Series(data = yhat, index = index)
			error = pd.Series(data = e, index = index)
			self.hindcast[phase] = hindcast
			self.hindcast_error[phase] = error
			self.nosigSST = True
			return
		else:
			self.nosigSST = False
			sstidx = self.corr_grid['sst'][phase].mask == False
		# if self.corr_grid['slp'][phase].mask.sum() == 2664:
		# 	onlySST = True
		if not onlySST:
			slpidx = self.corr_grid['slp'][phase].mask == False
		#Set up some empty variables
		n = len(predictand)
		yhat = np.zeros(n)
		e = np.zeros(n)
		idx = np.arange(n)
		for i in idx:
			test = idx == i
			train = idx != i
			rawSSTdata = self.sst[phase][train]
			rawSLPdata = self.slp[phase][train]
			droppedSSTdata = self.sst[phase][test]
			droppedSLPdata = self.slp[phase][test]
			if onlySST:
				rawdata = rawSSTdata[:, sstidx]#.T
				dropped_data = droppedSSTdata[:,sstidx].squeeze()
			else:
				rawdata = np.concatenate((rawSSTdata[:, sstidx], rawSLPdata[:, slpidx]), axis = 1) #.T
				dropped_data = np.concatenate((droppedSSTdata[:,sstidx], droppedSLPdata[:,slpidx]), axis = 1)#.T.squeeze()

			#U, s, V = np.linalg.svd(rawdata)
			#pc_1 = V[0,:] #_Rows of V are principal components
			#eof_1 = U[:,0].squeeze() #_Columns are EOFS
			#EIGs = s**2 #_s is square root of eigenvalues

			cvr = np.cov(rawdata.T)
			eigval, eigvec = np.linalg.eig(cvr)
			eigvalsort = np.argsort(eigval)[::-1]
			eigval = eigval[eigvalsort]
			eigval = np.real(eigval)
			ncomp = 1
			eof_1 = eigvec[:,:ncomp] #_fv stands for Feature Vector, in this case EOF-1
			eof_1 = np.real(eof_1)
			pc_1 = eof_1.T.dot(rawdata.T).squeeze()

			slope, intercept, r_value, p_value, std_err = linregress(pc_1, predictand[train])
			predictor = dropped_data.dot(eof_1)
			yhat[i] = slope * predictor + intercept
			e[i] = predictand[i] - yhat[i]
		c = corr(predictand, yhat)


		"""
		ax.scatter(predictand, yhat)
		ax.set_title('%s, r = %f' % (phase, round(c[0],2)))
		ax.axis([0,15,0,15])
		"""
		index = self.clim_data[phase].index
		hindcast = pd.Series(data = yhat, index = index)
		error = pd.Series(data = e, index = index)
		self.hindcast[phase] = hindcast
		self.hindcast_error[phase] = error

		return fig, ax
Example #16
0
	def bootcorr(self, n = 100, fig = None, ax = None, field = 'sst', \
		phase = 'allyears', corrconf = 0.9, bootconf = 0.9, cbloc = 'bottom',\
		quick = False, debug = False, monte = False):
		from numpy import meshgrid, zeros, ma, isnan, linspace
		import time
		from random import sample

		if field == 'sst':
			fieldData = self.sst[phase]
		if field == 'slp':
			fieldData = self.slp[phase]

		clim_data = self.clim_data[phase]

		corrlevel = 1 - corrconf

		corr_grid = vcorr(X = fieldData, y = clim_data)

		n_yrs = len(clim_data)

		p_value = sig_test(corr_grid, n_yrs)

		#Mask insignificant gridpoints
		corr_grid = ma.masked_array(corr_grid, ~(p_value < corrlevel))
		#Mask land
		corr_grid = ma.masked_array(corr_grid, isnan(corr_grid))
		#Mask northern/southern ocean
		corr_grid.mask[self.lat[field] > 60] = True
		corr_grid.mask[self.lat[field] < -30] = True

		###SAVE THE MASK TO FILTER THE BOOTSTRAP
		mask = corr_grid.mask

		if quick:
			self.corr_grid[field][phase] = corr_grid
			return
			###SET UP INDICES FOR FIELD DATA###

		###INITIALIZE A NEW CORR GRID####
		nlat = fieldData.shape[1]
		nlon = fieldData.shape[2]
		count = np.zeros((nlat,nlon))
		ntim = n
		dat = clim_data

		mask = corr_grid.mask
		if debug:
			print 'Starting %s' % phase


		for boot in xrange(ntim):
			if debug:
				print 'starting round %i' % boot

			###SHUFFLE THE YEARS AND CREATE THE BOOT DATA###
			idx = np.random.randint(0, len(dat) - 1, len(dat))
			bootdata = np.zeros((len(idx), nlat, nlon))
			bootdata[:] = fieldData[idx]
			bootvar = np.zeros((len(idx)))
			bootvar = dat[idx]

			corr_grid_boot = vcorr(X = bootdata, y = bootvar)

			n_yrs = len(bootvar)

			p_value = sig_test(corr_grid_boot, n_yrs)

			count[p_value <= corrlevel] += 1
			if debug:
				print 'Count max is %i' % count.max()
			# for lon, lat in zip(xx[~mask], yy[~mask]):
			# 		c, p = corr(bootdata[:,lat,lon], bootvar)
			# 		if p <= corrlevel:
			# 			count[lat,lon] += 1

		###GET THE ACTUAL CORRELATION AGAIN

		#_Mask insignificant values
		###CREATE MASKED ARRAY USING THE COUNT AND BOOTCONF ATTRIBUTES
		corr_grid = np.ma.masked_array(corr_grid, count < bootconf * ntim)

		self.corr_grid[field][phase] = corr_grid

		if monte:
			n_phase = len(clim_data)
			n_total = len(self.clim_data['allyears'])
			count = np.zeros((nlat, nlon))
			field = self.sst[phase]
			for t in xrange(100):
				#print 'Starting monte round %i' % t
				idx = sample(xrange(n_total), n_phase)
				var = self.clim_data['allyears'][idx]
				for lon, lat in zip(xx[~mask], yy[~mask]):
					r, p = corr(var, field[:,lat,lon])
					if p <= (1 - corrconf):
						#print 'Entering while loop for grid %i, %i' % (i, j)
						x = 0
						c2 = 0
						while x < 100:
							#print 'Monte round %i' % x
							idx2 = np.random.randint(0, n_phase-1, n_phase)
							x += 1
							r, p = corr(var[idx2], field[idx2,lat,lon])
							if p <= (1 - corrconf):
								c2 += 1
						if c2 >= 80:
							count[lat,lon] += 1
			print 'Count max is %.0f' % (count.max())
			self.monte_count[phase] = count.max()
			self.monte_grid[phase] = np.ma.masked_array(data = count, \
										mask = self.corr_grid['sst'][phase].mask)
		return
Example #17
0
    def crossvalpcr(self, xval=True, debug=False):
        #Must set phase with bootcorr, and then use crossvalpcr, as it just uses the corr_grid attribute
        import numpy as np
        from numpy import array
        from scipy.stats import pearsonr as corr
        from scipy.stats import linregress
        from matplotlib import pyplot as plt
        from utils import weightsst
        predictand = self.clim_data

        if self.corr_grid.mask.sum(
        ) >= len(self.sst.lat) * len(self.sst.lon) - 4:
            yhat = np.nan
            e = np.nan
            #index = self.clim_data.index
            index = self.mei
            hindcast = pd.Series(data=yhat, index=index)
            error = pd.Series(data=e, index=index)
            self.correlation = np.nan
            self.hindcast = np.nan
            self.hindcast_error = np.nan
            self.flags['noSST'] = True
            return

        self.flags['noSST'] = False
        sstidx = self.corr_grid.mask == False
        n = len(predictand)
        yhat = np.zeros(n)
        e = np.zeros(n)
        idx = np.arange(n)

        params = []
        std_errs = []
        p_vals = []
        t_vals = []
        if not xval:
            rawSSTdata = weightsst(self.sst).data
            rawdata = rawSSTdata[:, sstidx]
            cvr = np.cov(rawdata.T)
            eigval, eigvec = np.linalg.eig(cvr)
            eigvalsort = np.argsort(eigval)[::-1]
            eigval = eigval[eigvalsort]
            eigval = np.real(eigval)
            ncomp = 1
            eof_1 = eigvec[:, :
                           ncomp]  #_fv stands for Feature Vector, in this case EOF-1
            eof_1 = np.real(eof_1)
            pc_1 = eof_1.T.dot(rawdata.T).squeeze()
            slope, intercept, r, p, err = linregress(pc_1, predictand)
            yhat = slope * pc_1 + intercept
            self.pc1 = pc_1
            self.correlation = r
            self.hindcast = yhat
            return

        for i in idx:
            test = idx == i
            train = idx != i
            rawSSTdata = weightsst(self.sst).data[train]
            droppedSSTdata = weightsst(self.sst).data[test]
            rawdata = rawSSTdata[:, sstidx]  #
            dropped_data = droppedSSTdata[:, sstidx].squeeze()

            #U, s, V = np.linalg.svd(rawdata)
            #pc_1 = V[0,:] #_Rows of V are principal components
            #eof_1 = U[:,0].squeeze() #_Columns are EOFS
            #EIGs = s**2 #_s is square root of eigenvalues

            cvr = np.cov(rawdata.T)
            #print cvr.shape
            eigval, eigvec = np.linalg.eig(cvr)
            eigvalsort = np.argsort(eigval)[::-1]
            eigval = eigval[eigvalsort]
            eigval = np.real(eigval)
            ncomp = 1
            eof_1 = eigvec[:, :
                           ncomp]  #_fv stands for Feature Vector, in this case EOF-1
            eof_1 = np.real(eof_1)
            pc_1 = eof_1.T.dot(rawdata.T).squeeze()

            slope, intercept, r_value, p_value, std_err = linregress(
                pc_1, predictand[train])
            predictor = dropped_data.dot(eof_1)
            yhat[i] = slope * predictor + intercept
            e[i] = predictand[i] - yhat[i]
            params.append(slope)
            std_errs.append(std_err)
            p_vals.append(p_value)
            t_vals.append(slope / std_err)

        r, p = corr(predictand, yhat)

        hindcast = yhat
        error = e
        self.hindcast = hindcast
        self.hindcast_error = error
        self.correlation = round(r, 2)
        self.reg_stats = {
            'params': array(params),
            'std_errs': array(std_errs),
            't_vals': array(t_vals),
            'p_vals': array(p_vals)
        }

        return
 def ksi_func(pp, tt):
     ppx, ppy = pp.T
     rat_x = corr(ppx, tt.flatten())[0] ** 2
     rat_y = corr(ppy, tt.flatten())[0] ** 2
     return 1./ rat_x + 1./ rat_y
plt.plot(range(1, ph + 1), tmp[::2], c='red', label='ask')
plt.plot(range(1, ph + 1), tmp[1::2], c='green', label='bid')
plt.yscale('log')
plt.xlabel('$h$')
plt.ylabel('Jarque-Bera statistic for $h$-step log returns')
for type in ['pdf', 'png']:
    plt.savefig(f'{path_save}/log_returns_normality.{type}',
                bbox_inches='tight',
                dpi=300)
plt.show()
# %% Correlation

tmp = res
cor_ask = np.array(
    [corr(res[:, i], res[:, 0])[0] for i in range(0, res.shape[1], 2)])
cor_bid = np.array(
    [corr(res[:, i], res[:, 1])[0] for i in range(1, res.shape[1], 2)])

cor_ask_bid = np.array(
    [corr(res[:, i], res[:, 1])[0] for i in range(0, res.shape[1], 2)])
cor_bid_ask = np.array(
    [corr(res[:, i], res[:, 0])[0] for i in range(1, res.shape[1], 2)])

plt.plot(range(0, ph), cor_ask, c='red', label='ask')
plt.plot(range(0, ph), cor_bid, c='green', label='bid')

plt.plot(range(0, ph), cor_ask_bid, c='red', label='ask2', linestyle='dashed')
plt.plot(range(0, ph),
         cor_bid_ask,
         c='green',
Example #20
0
	def crossvalpcr(self, xval = True, debug = False):
		#Must set phase with bootcorr, and then use crossvalpcr, as it just uses the corr_grid attribute
		import numpy as np
		import statsmodels.api as sm
		from numpy import array, ndarray, hstack, zeros, vstack
		from scipy.stats import pearsonr as corr
		from scipy.stats import linregress
		from matplotlib import pyplot as plt
		from atmos_ocean_data import weightsst
		predictand = self.clim_data.copy()
		pcs = self.pcs.copy()
		n = len(predictand)
		yhat = np.zeros(n)
		e = np.zeros(n)

		params = []
		std_errs = []
		p_vals = []
		t_vals = []

		rnd = 0
		ncomps, nt = pcs.shape
		selection_index = range(ncomps)
		xval_idx = np.arange(nt)
		best_score = 0.01
		scores = [0]
		overall_index = []
		final_pcs = zeros((nt))
		while best_score >= scores[rnd]:
			score = zeros((len(selection_index)))
			# print 'Round %i' % rnd
			for ind, index in enumerate(selection_index):
				# print 'Checking pc-%i: ' % index
				# print overall_index + [ind]
				data = pcs[overall_index + [ind]]
				# if rnd ==3: import pdb; pdb.set_trace()

				for i in xval_idx:
					test = xval_idx == i
					train = xval_idx != i
					rawdata = data.T[train]
					dropped_data = data.T[test]
					X = sm.add_constant(data.T[train])
					y = predictand[train]
					olsmod = sm.OLS(y, X)
					olsres = olsmod.fit()
					intercept = array([[0]])
					yhat[i] = olsres.predict(hstack((intercept, dropped_data)))
					e[i] = predictand[i] - yhat[i]
					# params.append(slope); std_errs.append(std_err); p_vals.append(p_value)
					# t_vals.append(slope/std_err)

				r, p = corr(predictand, yhat)
				score[ind] = abs(r)
				# print 'score is %.2f' % abs(r)
			if max(score) > best_score:
				best_score = max(score)
				best_loc = np.where(score == max(score))[0][0]
				best_pc = selection_index[best_loc]
				overall_index.append(best_pc)
				# print overall_index
				# print pcs.shape
				selection_index = np.delete(selection_index, best_loc)
				# print selection_index
				final_pcs = vstack((final_pcs, pcs[best_loc]))
				scores.append(best_score)
			else:
				break
			rnd += 1
		self.overall_index = overall_index
		### NOW REBUILD BEST MODEL ###
		data = final_pcs[1:]
		#print 'Overall index is ', overall_index
		for i in xval_idx:
			test = xval_idx == i
			train = xval_idx != i
			rawdata = data.T[train]
			dropped_data = data.T[test]
			X = sm.add_constant(data.T[train])
			y = predictand[train]
			olsmod = sm.OLS(y, X)
			olsres = olsmod.fit()
			intercept = array([[0]])
			yhat[i] = olsres.predict(hstack((intercept, dropped_data)))
			e[i] = predictand[i] - yhat[i]
			# params.append(slope); std_errs.append(std_err); p_vals.append(p_value)
			# t_vals.append(slope/std_err)

		###Okay, best first predictor has been obtained... Now need to keep adding
		r, p = corr(predictand, yhat)

		index = self.clim_data.index
		hindcast = pd.Series(data = yhat, index = index)
		error = pd.Series(data = e, index = index)
		self.hindcast = hindcast
		self.hindcast_error = error
		self.correlation, _ = corr(self.clim_data, self.hindcast)
		# self.reg_stats = {	'params' : array(params),
		# 					'std_errs' : array(std_errs),
		# 					't_vals' : array(t_vals),
		# 					'p_vals' : array(p_vals)}

		return
Example #21
0
def main():

    #To plot yost v theoretical
    #    arrPitch = []
    #    arrRoll = []
    #    arrAx = []
    #    arrAy = []

    #    imu = int(input("Enter 0 for YOST IMU, 1 for MPU6050:\n"))
    #    fileName = input("Enter the acceleration reading file path and name:\n")

    experiments = [
        "../Data/YOST_stewart_0degPitch_10sPeriod_test_1.txt",
        "../Data/MPU6050_stewart_0degPitch_10sPeriod_test_1.txt",
        "../Data/YOST_stewart_0degPitch_20sPeriod_test_2.txt",
        "../Data/MPU6050_stewart_0degPitch_20Period_test_2.txt",
        "../Data/YOST_stewart_20degPitch_20sPeriod_test_3.txt",
        "../Data/MPU6050_stewart_20degPitch_20Period_test_3.txt"
    ]
    plot = True
    displacements = []
    sigWaveHeights = []

    for i in range(0, 6):
        arrAz = []
        totalTime = 0

        imu = i % 2  #YOST => 0, MPU6050 => 1
        with open(experiments[i]) as f:
            #Valid files
            #YOST_stewart_0degPitch_10sPeriod_test_1.txt
            #YOST_stewart_0degPitch_20sPeriod_test_2.txt
            #YOST_stewart_20degPitch_20sPeriod_test_3.txt
            #MPU6050_stewart_0degPitch_10sPeriod_test_1.txt
            #MPU6050_stewart_0degPitch_20Period_test_2.txt
            #MPU6050_stewart_20degPitch_20Period_test_3.txt

            #If YOST IMU (imu = 0)
            #Data format: "%int(Month)/%int(Day)/%int(Year),%int(Hours):%int(Minutes):%float(Seconds),
            # %float(OrientPitch),%float(OrientYaw),%float(OrientRoll),
            # %float(CorrectedGyroX),%float(CorrectedGyroY),%float(CorrectedGyroZ),
            # %float(CorrectedAccelX),%float(CorrectedAccelY),%float(CorrectedAccelZ),
            # %float(CorrectedMagX),%float(CorrectedMagY),%float(CorrectedMagZ)"
            if (imu == 0):
                f.readline()  # Read in first line - this is the Foramt

                #Get values from file
                startTime = 0
                endTime = 0
                for line in f:

                    row = line.split(',')

                    #Get start time
                    if (startTime == 0):
                        startTime = row[0].split(' ')[1]

                    #Get end time
                    endTime = row[0].split(' ')[1]

                    #Select relevent accleration data - comment out if plotting yost v theoretical
                    row = row[7:10]

                    #Set upper bound of 0.5g Az
                    if (float(row[1]) > 0.5 * g):
                        row[1] = str(0.5 * -g)
                    arrAz.append(
                        float(row[1]) *
                        -g)  #comment out if plotting yost v theoretical

                    #This is also used to compare yost with the true signal
#                    arrAz.append(float(row[-5])*-g )
#                    arrAx.append(float(row[-6])*-g )
#                    arrAy.append(float(row[-4])*-g )
#                    arrPitch.append(float(row[1]))
#                    arrRoll.append( float(row[3]) )

#Calculate the sampling frequency
                startTime = startTime.split(':')
                endTime = endTime.split(':')
                totalTime = []
                totalTime.append(float(endTime[0]) - float(startTime[0]))
                totalTime.append(float(endTime[1]) - float(startTime[1]))
                totalTime.append(float(endTime[2]) - float(startTime[2]))

                totalTime = totalTime[0] * 60 * 60 + totalTime[
                    1] * 60 + totalTime[2]

            #Else MPU6050 (imu = 1)
            #Data format: "int(timeSinceStart ms), float(accelAx mg), float(accelAy mg), float(accelAz g)"
            else:
                startTime = -1
                endTime = 0
                for line in f:
                    #Format is: int ms, float ax, float ay, float az
                    row = line.split(',')
                    if (startTime == -1):
                        startTime = float(row[0]) * 10**-3
                    endTime = float(row[0]) * 10**-3
                    #Set upper bound of 0.5g Az
                    if (float(row[3]) > 0.5 * g):
                        row[1] = str(0.5 * -g)
                    #arrAx.append(float(row[1])*-g/1000 )
                    #arrAy.append(float(row[2])*-g/1000 )
                    arrAz.append(float(row[3]) * -g)

                totalTime = endTime - startTime

            fs = len(arrAz) / (totalTime)  #Sampling frequency
            fs = round(fs)  #Account for errors

            ##Debuging and graphing
            #print("Sampling rate = " + str(fs))
            #trueVerticalAcceleration(arrAx, arrAy, arrAz, arrPitch,arrRoll, fs)
            ##EndDebug

            #Condition signal:
            azFiltered = cond.condition(arrAz, fs, plot)

            #Calculate Wave height time series
            eta, times = heightTimeSeries(azFiltered, fs, plot, plot, plot)

            #Resample to allow for comparison between the imus (has to have same amount of samples)
            eta180, times = sig.resample(eta, 180, t=times)
            if (plot):
                plt.plot(times, eta180, label="Reasmpled heights")
                plt.legend(loc='lower right')
                plt.show()

            displacements.append(eta180)

            ht = significantWaveHeight(eta)
            hs = spectralSignificantWaveHeight(eta, fs)
            sigWaveHeights.append((ht, hs))


#    print(displacements)
    h = 0.045
    c = 0.155
    f = 0.1
    t = np.arange(0, 90, 0.5)
    s = h * np.sin(2 * np.pi * f * t)

    for j in range(0, 6):
        if (j % 2 == 0):
            print("YOST Significant Wave Height (Ht, Hs) for test " +
                  str(round(j * 2 / 5)) + ": Ht=" +
                  '{:6f}'.format(sigWaveHeights[j][0] * 1000) + "mm Hs=" +
                  '{:6f}'.format(sigWaveHeights[j][1] * 1000))
        else:
            print("MPU6050 Significant Wave Height(Ht, Hs) for test " +
                  str(round(j * 2 / 5)) + ": Ht=" +
                  '{:6f}'.format(sigWaveHeights[j][0] * 1000) + "mm Hs=" +
                  '{:6f}'.format(sigWaveHeights[j][1] * 1000))

    print("Theoretical Significant Wave Height: " +
          '{:6f}'.format(significantWaveHeight(s) * 1000) + "mm")

    for k in range(0, 6, 2):
        print("Pearson coerrelation coefficient between IMUs for test " +
              str(int(k / 2)) + " is: " + '{:6f}'.format(
                  abs(corr(displacements[k], displacements[k + 1])[0])))
            if(self.dimensionality(n+1)>=dimensionality):
                return(n+1)

if __name__ == "__main__":
    from scipy.stats import pearsonr as corr
    import matplotlib.pyplot as plt
    
    # Test dataset of random value
    m, n = 128, 10
    weightMatrix = np.random.randn(m, n)
    pcaObj = PCA(matrix=weightMatrix)
    
    # Check that we get back the same matrix we put in when we set the number
    # of selected components equal to a number of possible values (up to n)
    for i in [n,5,1]:
        filteredMatrix = pcaObj.filterMatrix(n=i)
        pcaObjFiltered = PCA(matrix=filteredMatrix)
        isMatrixSame   = np.allclose(weightMatrix,filteredMatrix)
        r = corr(weightMatrix.flatten(),filteredMatrix.flatten())
        
        print("\nIs PCA Filtered Matrix same (n={})?: {}".format(n,isMatrixSame))
        print("Original matrix: {}".format(  weightMatrix.flatten()))
        print("Filtered matrix: {}".format(filteredMatrix.flatten()))
        print("Pearson's Correlation: r = {:4.3f}".format(r[0]))
        print('Target Dimensionality at 50%: {}'.format(pcaObj.computeTargetPCs(0.5)))
        
        (fig,ax)=plt.subplots(nrows=2,ncols=1)
        ax[0].imshow(weightMatrix.T)
        ax[0].set_title('Original dimensionality: {:4.2f}'.format(pcaObj.dimensionality()))
        ax[1].imshow(filteredMatrix.T)
        ax[1].set_title('Filtered dimensionality: {:4.2f}'.format(pcaObjFiltered.dimensionality()))