コード例 #1
0
ファイル: prepro_tseries.py プロジェクト: Ruhul964/dsa2
def test_prepro_all():
    df = test_get_sampledata()
    df.head()

    df_out = transform.robust_scaler(df, drop=["Close_1"])
    df_out = transform.standard_scaler(df, drop=["Close"])
    df_out = transform.fast_fracdiff(df, ["Close", "Open"], 0.5)
    # df_out = transform.windsorization(df,"Close",para,strategy='both')
    df_out = transform.operations(df, ["Close"])
    df_out = transform.triple_exponential_smoothing(df, ["Close"], 12, .2, .2,
                                                    .2, 0)
    df_out = transform.naive_dec(
        copy.deepcopy(df), ["Close", "Open"]
    )  # The function parameter df is changed within the function causing upcoming functions to crash, passing a copy solves this
    df_out = transform.bkb(df, ["Close"])
    df_out = transform.butter_lowpass_filter(df, ["Close"], 4)
    df_out = transform.instantaneous_phases(df, ["Close"])
    df_out = transform.kalman_feat(df, ["Close"])
    df_out = transform.perd_feat(df, ["Close"])
    df_out = transform.fft_feat(df, ["Close"])
    df_out = transform.harmonicradar_cw(df, ["Close"], 0.3, 0.2)
    df_out = transform.saw(df, ["Close", "Open"])
    df_out = transform.modify(df, ["Close"])
    df_out = transform.multiple_rolling(df, columns=["Close"])
    df_out = transform.multiple_lags(df, start=1, end=3, columns=["Close"])
    df_out = transform.prophet_feat(df.reset_index(), ["Close", "Open"],
                                    "Date", "D")

    # **Interaction**
    # The function parameter df is changed within the function causing upcoming functions to crash, passing a copy solves this
    df_out = interact.lowess(copy.deepcopy(df), ["Open", "Volume"],
                             df["Close"],
                             f=0.25,
                             iter=3)
    df_out = interact.autoregression(copy.deepcopy(df))
    df_out = interact.muldiv(copy.deepcopy(df), ["Close", "Open"])
    df_out = interact.decision_tree_disc(copy.deepcopy(df), ["Close"])
    df_out = interact.quantile_normalize(copy.deepcopy(df), drop=["Close"])
    df_out = interact.tech(copy.deepcopy(df))
    df_out = interact.genetic_feat(copy.deepcopy(df))

    # **Mapping**
    df_out = mapper.pca_feature(df,
                                variance_or_components=0.80,
                                drop_cols=["Close_1"])
    df_out = mapper.cross_lag(df)
    '''
    Regarding https://en.wikipedia.org/wiki/Pearson%27s_chi-squared_test chi square test assumes frequencies distribution
    and a frequency can't be a negative number. No familiar with the data but if it is safe to either shift them to have min > 0 
    or to normalize the data to be [0-1]. Since this is for the purpose of testing we'll be using: (df-df.min())/(df.max()-df.min())
    '''
    df_out = mapper.a_chi((df - df.min()) / (df.max() - df.min()))
    df_out = mapper.encoder_dataset(df, ["Close_1"], 15)
    df_out = mapper.lle_feat(df, ["Close_1"], 4)
    df_out = mapper.feature_agg(df, ["Close_1"], 4)
    df_out = mapper.neigh_feat(df, ["Close_1"], 4)

    # **Extraction**
    extract.abs_energy(df["Close"])
    extract.cid_ce(df["Close"], True)
    extract.mean_abs_change(df["Close"])
    extract.mean_second_derivative_central(df["Close"])
    extract.variance_larger_than_standard_deviation(df["Close"])
    # extract.var_index(df["Close"].values,var_index_param)
    extract.symmetry_looking(df["Close"])
    extract.has_duplicate_max(df["Close"])
    extract.partial_autocorrelation(df["Close"])
    extract.augmented_dickey_fuller(df["Close"])
    extract.gskew(df["Close"])
    extract.stetson_mean(df["Close"])
    extract.length(df["Close"])
    extract.count_above_mean(df["Close"])
    extract.longest_strike_below_mean(df["Close"])
    extract.wozniak(df["Close"])
    extract.last_location_of_maximum(df["Close"])
    extract.fft_coefficient(df["Close"])
    extract.ar_coefficient(df["Close"])
    extract.index_mass_quantile(df["Close"])
    extract.number_cwt_peaks(df["Close"])
    extract.spkt_welch_density(df["Close"])
    extract.linear_trend_timewise(df["Close"])
    extract.c3(df["Close"])
    extract.binned_entropy(df["Close"])
    extract.svd_entropy(df["Close"].values)
    extract.hjorth_complexity(df["Close"])
    extract.max_langevin_fixed_point(df["Close"])
    extract.percent_amplitude(df["Close"])
    extract.cad_prob(df["Close"])
    extract.zero_crossing_derivative(df["Close"])
    extract.detrended_fluctuation_analysis(df["Close"])
    extract.fisher_information(df["Close"])
    extract.higuchi_fractal_dimension(df["Close"])
    extract.petrosian_fractal_dimension(df["Close"])
    extract.hurst_exponent(df["Close"])
    extract.largest_lyauponov_exponent(df["Close"])
    extract.whelch_method(df["Close"])
    extract.find_freq(df["Close"])
    extract.flux_perc(df["Close"])
    extract.range_cum_s(df["Close"])
    '''
    From https://github.com/firmai/deltapy#extraction example, It seems like the second argument of the 
    function must be: struct_param = {"Volume":df["Volume"].values, "Open": df["Open"].values}
    '''
    struct_param = {"Volume": df["Volume"].values, "Open": df["Open"].values}
    extract.structure_func(df["Close"], struct_param)
    extract.kurtosis(df["Close"])
    extract.stetson_k(df["Close"])
コード例 #2
0
def test_prepro_all():
    from deltapy import transform, interact, mapper, extract
    df = data_copy()
    df.head()

    df_out = transform.robust_scaler(df, drop=["Close_1"])
    df_out = transform.standard_scaler(df, drop=["Close"])
    df_out = transform.fast_fracdiff(df, ["Close", "Open"], 0.5)
    #df_out = transform.windsorization(df,"Close",para,strategy='both')
    df_out = transform.operations(df, ["Close"])
    df_out = transform.triple_exponential_smoothing(df, ["Close"], 12, .2, .2,
                                                    .2, 0)
    df_out = transform.naive_dec(df, ["Close", "Open"])
    df_out = transform.bkb(df, ["Close"])
    df_out = transform.butter_lowpass_filter(df, ["Close"], 4)
    df_out = transform.instantaneous_phases(df, ["Close"])
    df_out = transform.kalman_feat(df, ["Close"])
    df_out = transform.perd_feat(df, ["Close"])
    df_out = transform.fft_feat(df, ["Close"])
    df_out = transform.harmonicradar_cw(df, ["Close"], 0.3, 0.2)
    df_out = transform.saw(df, ["Close", "Open"])
    df_out = transform.modify(df, ["Close"])
    df_out = transform.multiple_rolling(df, columns=["Close"])
    df_out = transform.multiple_lags(df, start=1, end=3, columns=["Close"])
    df_out = transform.prophet_feat(df.reset_index(), ["Close", "Open"],
                                    "Date", "D")

    #**Interaction**
    df_out = interact.lowess(df, ["Open", "Volume"],
                             df["Close"],
                             f=0.25,
                             iter=3)
    df_out = interact.autoregression(df)
    df_out = interact.muldiv(df, ["Close", "Open"])
    df_out = interact.decision_tree_disc(df, ["Close"])
    df_out = interact.quantile_normalize(df, drop=["Close"])
    df_out = interact.tech(df)
    df_out = interact.genetic_feat(df)

    #**Mapping**
    df_out = mapper.pca_feature(df,
                                variance_or_components=0.80,
                                drop_cols=["Close_1"])
    df_out = mapper.cross_lag(df)
    df_out = mapper.a_chi(df)
    df_out = mapper.encoder_dataset(df, ["Close_1"], 15)
    df_out = mapper.lle_feat(df, ["Close_1"], 4)
    df_out = mapper.feature_agg(df, ["Close_1"], 4)
    df_out = mapper.neigh_feat(df, ["Close_1"], 4)

    #**Extraction**
    extract.abs_energy(df["Close"])
    extract.cid_ce(df["Close"], True)
    extract.mean_abs_change(df["Close"])
    extract.mean_second_derivative_central(df["Close"])
    extract.variance_larger_than_standard_deviation(df["Close"])
    # extract.var_index(df["Close"].values,var_index_param)
    extract.symmetry_looking(df["Close"])
    extract.has_duplicate_max(df["Close"])
    extract.partial_autocorrelation(df["Close"])
    extract.augmented_dickey_fuller(df["Close"])
    extract.gskew(df["Close"])
    extract.stetson_mean(df["Close"])
    extract.length(df["Close"])
    extract.count_above_mean(df["Close"])
    extract.longest_strike_below_mean(df["Close"])
    extract.wozniak(df["Close"])
    extract.last_location_of_maximum(df["Close"])
    extract.fft_coefficient(df["Close"])
    extract.ar_coefficient(df["Close"])
    extract.index_mass_quantile(df["Close"])
    extract.number_cwt_peaks(df["Close"])
    extract.spkt_welch_density(df["Close"])
    extract.linear_trend_timewise(df["Close"])
    extract.c3(df["Close"])
    extract.binned_entropy(df["Close"])
    extract.svd_entropy(df["Close"].values)
    extract.hjorth_complexity(df["Close"])
    extract.max_langevin_fixed_point(df["Close"])
    extract.percent_amplitude(df["Close"])
    extract.cad_prob(df["Close"])
    extract.zero_crossing_derivative(df["Close"])
    extract.detrended_fluctuation_analysis(df["Close"])
    extract.fisher_information(df["Close"])
    extract.higuchi_fractal_dimension(df["Close"])
    extract.petrosian_fractal_dimension(df["Close"])
    extract.hurst_exponent(df["Close"])
    extract.largest_lyauponov_exponent(df["Close"])
    extract.whelch_method(df["Close"])
    extract.find_freq(df["Close"])
    extract.flux_perc(df["Close"])
    extract.range_cum_s(df["Close"])
    extract.structure_func(df["Close"])
    extract.kurtosis(df["Close"])
    extract.stetson_k(df["Close"])