def test_prepro_all(): df = test_get_sampledata() df.head() df_out = transform.robust_scaler(df, drop=["Close_1"]) df_out = transform.standard_scaler(df, drop=["Close"]) df_out = transform.fast_fracdiff(df, ["Close", "Open"], 0.5) # df_out = transform.windsorization(df,"Close",para,strategy='both') df_out = transform.operations(df, ["Close"]) df_out = transform.triple_exponential_smoothing(df, ["Close"], 12, .2, .2, .2, 0) df_out = transform.naive_dec( copy.deepcopy(df), ["Close", "Open"] ) # The function parameter df is changed within the function causing upcoming functions to crash, passing a copy solves this df_out = transform.bkb(df, ["Close"]) df_out = transform.butter_lowpass_filter(df, ["Close"], 4) df_out = transform.instantaneous_phases(df, ["Close"]) df_out = transform.kalman_feat(df, ["Close"]) df_out = transform.perd_feat(df, ["Close"]) df_out = transform.fft_feat(df, ["Close"]) df_out = transform.harmonicradar_cw(df, ["Close"], 0.3, 0.2) df_out = transform.saw(df, ["Close", "Open"]) df_out = transform.modify(df, ["Close"]) df_out = transform.multiple_rolling(df, columns=["Close"]) df_out = transform.multiple_lags(df, start=1, end=3, columns=["Close"]) df_out = transform.prophet_feat(df.reset_index(), ["Close", "Open"], "Date", "D") # **Interaction** # The function parameter df is changed within the function causing upcoming functions to crash, passing a copy solves this df_out = interact.lowess(copy.deepcopy(df), ["Open", "Volume"], df["Close"], f=0.25, iter=3) df_out = interact.autoregression(copy.deepcopy(df)) df_out = interact.muldiv(copy.deepcopy(df), ["Close", "Open"]) df_out = interact.decision_tree_disc(copy.deepcopy(df), ["Close"]) df_out = interact.quantile_normalize(copy.deepcopy(df), drop=["Close"]) df_out = interact.tech(copy.deepcopy(df)) df_out = interact.genetic_feat(copy.deepcopy(df)) # **Mapping** df_out = mapper.pca_feature(df, variance_or_components=0.80, drop_cols=["Close_1"]) df_out = mapper.cross_lag(df) ''' Regarding https://en.wikipedia.org/wiki/Pearson%27s_chi-squared_test chi square test assumes frequencies distribution and a frequency can't be a negative number. No familiar with the data but if it is safe to either shift them to have min > 0 or to normalize the data to be [0-1]. Since this is for the purpose of testing we'll be using: (df-df.min())/(df.max()-df.min()) ''' df_out = mapper.a_chi((df - df.min()) / (df.max() - df.min())) df_out = mapper.encoder_dataset(df, ["Close_1"], 15) df_out = mapper.lle_feat(df, ["Close_1"], 4) df_out = mapper.feature_agg(df, ["Close_1"], 4) df_out = mapper.neigh_feat(df, ["Close_1"], 4) # **Extraction** extract.abs_energy(df["Close"]) extract.cid_ce(df["Close"], True) extract.mean_abs_change(df["Close"]) extract.mean_second_derivative_central(df["Close"]) extract.variance_larger_than_standard_deviation(df["Close"]) # extract.var_index(df["Close"].values,var_index_param) extract.symmetry_looking(df["Close"]) extract.has_duplicate_max(df["Close"]) extract.partial_autocorrelation(df["Close"]) extract.augmented_dickey_fuller(df["Close"]) extract.gskew(df["Close"]) extract.stetson_mean(df["Close"]) extract.length(df["Close"]) extract.count_above_mean(df["Close"]) extract.longest_strike_below_mean(df["Close"]) extract.wozniak(df["Close"]) extract.last_location_of_maximum(df["Close"]) extract.fft_coefficient(df["Close"]) extract.ar_coefficient(df["Close"]) extract.index_mass_quantile(df["Close"]) extract.number_cwt_peaks(df["Close"]) extract.spkt_welch_density(df["Close"]) extract.linear_trend_timewise(df["Close"]) extract.c3(df["Close"]) extract.binned_entropy(df["Close"]) extract.svd_entropy(df["Close"].values) extract.hjorth_complexity(df["Close"]) extract.max_langevin_fixed_point(df["Close"]) extract.percent_amplitude(df["Close"]) extract.cad_prob(df["Close"]) extract.zero_crossing_derivative(df["Close"]) extract.detrended_fluctuation_analysis(df["Close"]) extract.fisher_information(df["Close"]) extract.higuchi_fractal_dimension(df["Close"]) extract.petrosian_fractal_dimension(df["Close"]) extract.hurst_exponent(df["Close"]) extract.largest_lyauponov_exponent(df["Close"]) extract.whelch_method(df["Close"]) extract.find_freq(df["Close"]) extract.flux_perc(df["Close"]) extract.range_cum_s(df["Close"]) ''' From https://github.com/firmai/deltapy#extraction example, It seems like the second argument of the function must be: struct_param = {"Volume":df["Volume"].values, "Open": df["Open"].values} ''' struct_param = {"Volume": df["Volume"].values, "Open": df["Open"].values} extract.structure_func(df["Close"], struct_param) extract.kurtosis(df["Close"]) extract.stetson_k(df["Close"])
def test_prepro_all(): from deltapy import transform, interact, mapper, extract df = data_copy() df.head() df_out = transform.robust_scaler(df, drop=["Close_1"]) df_out = transform.standard_scaler(df, drop=["Close"]) df_out = transform.fast_fracdiff(df, ["Close", "Open"], 0.5) #df_out = transform.windsorization(df,"Close",para,strategy='both') df_out = transform.operations(df, ["Close"]) df_out = transform.triple_exponential_smoothing(df, ["Close"], 12, .2, .2, .2, 0) df_out = transform.naive_dec(df, ["Close", "Open"]) df_out = transform.bkb(df, ["Close"]) df_out = transform.butter_lowpass_filter(df, ["Close"], 4) df_out = transform.instantaneous_phases(df, ["Close"]) df_out = transform.kalman_feat(df, ["Close"]) df_out = transform.perd_feat(df, ["Close"]) df_out = transform.fft_feat(df, ["Close"]) df_out = transform.harmonicradar_cw(df, ["Close"], 0.3, 0.2) df_out = transform.saw(df, ["Close", "Open"]) df_out = transform.modify(df, ["Close"]) df_out = transform.multiple_rolling(df, columns=["Close"]) df_out = transform.multiple_lags(df, start=1, end=3, columns=["Close"]) df_out = transform.prophet_feat(df.reset_index(), ["Close", "Open"], "Date", "D") #**Interaction** df_out = interact.lowess(df, ["Open", "Volume"], df["Close"], f=0.25, iter=3) df_out = interact.autoregression(df) df_out = interact.muldiv(df, ["Close", "Open"]) df_out = interact.decision_tree_disc(df, ["Close"]) df_out = interact.quantile_normalize(df, drop=["Close"]) df_out = interact.tech(df) df_out = interact.genetic_feat(df) #**Mapping** df_out = mapper.pca_feature(df, variance_or_components=0.80, drop_cols=["Close_1"]) df_out = mapper.cross_lag(df) df_out = mapper.a_chi(df) df_out = mapper.encoder_dataset(df, ["Close_1"], 15) df_out = mapper.lle_feat(df, ["Close_1"], 4) df_out = mapper.feature_agg(df, ["Close_1"], 4) df_out = mapper.neigh_feat(df, ["Close_1"], 4) #**Extraction** extract.abs_energy(df["Close"]) extract.cid_ce(df["Close"], True) extract.mean_abs_change(df["Close"]) extract.mean_second_derivative_central(df["Close"]) extract.variance_larger_than_standard_deviation(df["Close"]) # extract.var_index(df["Close"].values,var_index_param) extract.symmetry_looking(df["Close"]) extract.has_duplicate_max(df["Close"]) extract.partial_autocorrelation(df["Close"]) extract.augmented_dickey_fuller(df["Close"]) extract.gskew(df["Close"]) extract.stetson_mean(df["Close"]) extract.length(df["Close"]) extract.count_above_mean(df["Close"]) extract.longest_strike_below_mean(df["Close"]) extract.wozniak(df["Close"]) extract.last_location_of_maximum(df["Close"]) extract.fft_coefficient(df["Close"]) extract.ar_coefficient(df["Close"]) extract.index_mass_quantile(df["Close"]) extract.number_cwt_peaks(df["Close"]) extract.spkt_welch_density(df["Close"]) extract.linear_trend_timewise(df["Close"]) extract.c3(df["Close"]) extract.binned_entropy(df["Close"]) extract.svd_entropy(df["Close"].values) extract.hjorth_complexity(df["Close"]) extract.max_langevin_fixed_point(df["Close"]) extract.percent_amplitude(df["Close"]) extract.cad_prob(df["Close"]) extract.zero_crossing_derivative(df["Close"]) extract.detrended_fluctuation_analysis(df["Close"]) extract.fisher_information(df["Close"]) extract.higuchi_fractal_dimension(df["Close"]) extract.petrosian_fractal_dimension(df["Close"]) extract.hurst_exponent(df["Close"]) extract.largest_lyauponov_exponent(df["Close"]) extract.whelch_method(df["Close"]) extract.find_freq(df["Close"]) extract.flux_perc(df["Close"]) extract.range_cum_s(df["Close"]) extract.structure_func(df["Close"]) extract.kurtosis(df["Close"]) extract.stetson_k(df["Close"])