コード例 #1
0
ファイル: transform.py プロジェクト: udemirezen/datagene
def vect_extract(arr_in):
    dict_vect = {}
    dict_vect["abs_energy"] = extract.abs_energy(arr_in)
    dict_vect["mean_abs_change"] = extract.mean_abs_change(arr_in)
    dict_vect[
        "mean_second_derivative_central"] = extract.mean_second_derivative_central(
            arr_in)
    dict_vect["partial_autocorrelation"] = extract.partial_autocorrelation(
        arr_in)[0][1]
    dict_vect["augmented_dickey_fuller"] = extract.augmented_dickey_fuller(
        arr_in)[0][1]
    dict_vect["gskew"] = extract.gskew(arr_in)
    dict_vect["stetson_mean"] = extract.stetson_mean(arr_in)
    dict_vect["count_above_mean"] = extract.count_above_mean(arr_in)
    dict_vect["longest_strike_below_mean"] = extract.longest_strike_below_mean(
        arr_in)
    dict_vect["wozniak"] = extract.wozniak(arr_in)[0][1]
    dict_vect["fft_coefficient"] = extract.fft_coefficient(arr_in)[0][1]
    dict_vect["ar_coefficient"] = extract.ar_coefficient(arr_in)[0][1]
    dict_vect["index_mass_quantile"] = extract.index_mass_quantile(
        arr_in)[0][1]
    dict_vect["number_cwt_peaks"] = extract.number_cwt_peaks(arr_in)[0][1]
    dict_vect["spkt_welch_density"] = extract.spkt_welch_density(arr_in)
    dict_vect["c3"] = extract.c3(arr_in)
    dict_vect["binned_entropy"] = extract.binned_entropy(arr_in)
    #dict_vect["svd_entropy"] = extract.svd_entropy(arr_in)[0][1]
    dict_vect["hjorth_complexity"] = extract.hjorth_complexity(arr_in)
    dict_vect["max_langevin_fixed_point"] = extract.max_langevin_fixed_point(
        arr_in)
    dict_vect["percent_amplitude"] = extract.percent_amplitude(arr_in)[0][1]
    dict_vect["cad_prob"] = extract.cad_prob(arr_in)[1][1]
    dict_vect["zero_crossing_derivative"] = extract.zero_crossing_derivative(
        arr_in)[0][1]
    dict_vect[
        "detrended_fluctuation_analysis"] = extract.detrended_fluctuation_analysis(
            arr_in)
    #dict_vect["fisher_information"] = extract.fisher_information(arr_in)[0][1]
    dict_vect["higuchi_fractal_dimension"] = extract.higuchi_fractal_dimension(
        arr_in)[0][1]
    dict_vect["hurst_exponent"] = extract.hurst_exponent(arr_in)
    #dict_vect["largest_lyauponov_exponent"] = extract.largest_lyauponov_exponent(arr_in)[0][1]
    dict_vect["whelch_method"] = extract.whelch_method(arr_in)[0][1]
    dict_vect["find_freq"] = extract.find_freq(arr_in)[0][1]
    #dict_vect["flux_perc"] = extract.flux_perc(arr_in)['FluxPercentileRatioMid20']
    dict_vect["range_cum_s"] = extract.range_cum_s(arr_in)['Rcs']
    dict_vect["kurtosis"] = extract.kurtosis(arr_in)
    dict_vect["stetson_k"] = extract.stetson_k(arr_in)
    return pd.DataFrame.from_dict(dict_vect,
                                  orient="index",
                                  columns=["values"]), dict_vect
コード例 #2
0
ファイル: prepro_tseries.py プロジェクト: Ruhul964/dsa2
def test_prepro_all():
    df = test_get_sampledata()
    df.head()

    df_out = transform.robust_scaler(df, drop=["Close_1"])
    df_out = transform.standard_scaler(df, drop=["Close"])
    df_out = transform.fast_fracdiff(df, ["Close", "Open"], 0.5)
    # df_out = transform.windsorization(df,"Close",para,strategy='both')
    df_out = transform.operations(df, ["Close"])
    df_out = transform.triple_exponential_smoothing(df, ["Close"], 12, .2, .2,
                                                    .2, 0)
    df_out = transform.naive_dec(
        copy.deepcopy(df), ["Close", "Open"]
    )  # The function parameter df is changed within the function causing upcoming functions to crash, passing a copy solves this
    df_out = transform.bkb(df, ["Close"])
    df_out = transform.butter_lowpass_filter(df, ["Close"], 4)
    df_out = transform.instantaneous_phases(df, ["Close"])
    df_out = transform.kalman_feat(df, ["Close"])
    df_out = transform.perd_feat(df, ["Close"])
    df_out = transform.fft_feat(df, ["Close"])
    df_out = transform.harmonicradar_cw(df, ["Close"], 0.3, 0.2)
    df_out = transform.saw(df, ["Close", "Open"])
    df_out = transform.modify(df, ["Close"])
    df_out = transform.multiple_rolling(df, columns=["Close"])
    df_out = transform.multiple_lags(df, start=1, end=3, columns=["Close"])
    df_out = transform.prophet_feat(df.reset_index(), ["Close", "Open"],
                                    "Date", "D")

    # **Interaction**
    # The function parameter df is changed within the function causing upcoming functions to crash, passing a copy solves this
    df_out = interact.lowess(copy.deepcopy(df), ["Open", "Volume"],
                             df["Close"],
                             f=0.25,
                             iter=3)
    df_out = interact.autoregression(copy.deepcopy(df))
    df_out = interact.muldiv(copy.deepcopy(df), ["Close", "Open"])
    df_out = interact.decision_tree_disc(copy.deepcopy(df), ["Close"])
    df_out = interact.quantile_normalize(copy.deepcopy(df), drop=["Close"])
    df_out = interact.tech(copy.deepcopy(df))
    df_out = interact.genetic_feat(copy.deepcopy(df))

    # **Mapping**
    df_out = mapper.pca_feature(df,
                                variance_or_components=0.80,
                                drop_cols=["Close_1"])
    df_out = mapper.cross_lag(df)
    '''
    Regarding https://en.wikipedia.org/wiki/Pearson%27s_chi-squared_test chi square test assumes frequencies distribution
    and a frequency can't be a negative number. No familiar with the data but if it is safe to either shift them to have min > 0 
    or to normalize the data to be [0-1]. Since this is for the purpose of testing we'll be using: (df-df.min())/(df.max()-df.min())
    '''
    df_out = mapper.a_chi((df - df.min()) / (df.max() - df.min()))
    df_out = mapper.encoder_dataset(df, ["Close_1"], 15)
    df_out = mapper.lle_feat(df, ["Close_1"], 4)
    df_out = mapper.feature_agg(df, ["Close_1"], 4)
    df_out = mapper.neigh_feat(df, ["Close_1"], 4)

    # **Extraction**
    extract.abs_energy(df["Close"])
    extract.cid_ce(df["Close"], True)
    extract.mean_abs_change(df["Close"])
    extract.mean_second_derivative_central(df["Close"])
    extract.variance_larger_than_standard_deviation(df["Close"])
    # extract.var_index(df["Close"].values,var_index_param)
    extract.symmetry_looking(df["Close"])
    extract.has_duplicate_max(df["Close"])
    extract.partial_autocorrelation(df["Close"])
    extract.augmented_dickey_fuller(df["Close"])
    extract.gskew(df["Close"])
    extract.stetson_mean(df["Close"])
    extract.length(df["Close"])
    extract.count_above_mean(df["Close"])
    extract.longest_strike_below_mean(df["Close"])
    extract.wozniak(df["Close"])
    extract.last_location_of_maximum(df["Close"])
    extract.fft_coefficient(df["Close"])
    extract.ar_coefficient(df["Close"])
    extract.index_mass_quantile(df["Close"])
    extract.number_cwt_peaks(df["Close"])
    extract.spkt_welch_density(df["Close"])
    extract.linear_trend_timewise(df["Close"])
    extract.c3(df["Close"])
    extract.binned_entropy(df["Close"])
    extract.svd_entropy(df["Close"].values)
    extract.hjorth_complexity(df["Close"])
    extract.max_langevin_fixed_point(df["Close"])
    extract.percent_amplitude(df["Close"])
    extract.cad_prob(df["Close"])
    extract.zero_crossing_derivative(df["Close"])
    extract.detrended_fluctuation_analysis(df["Close"])
    extract.fisher_information(df["Close"])
    extract.higuchi_fractal_dimension(df["Close"])
    extract.petrosian_fractal_dimension(df["Close"])
    extract.hurst_exponent(df["Close"])
    extract.largest_lyauponov_exponent(df["Close"])
    extract.whelch_method(df["Close"])
    extract.find_freq(df["Close"])
    extract.flux_perc(df["Close"])
    extract.range_cum_s(df["Close"])
    '''
    From https://github.com/firmai/deltapy#extraction example, It seems like the second argument of the 
    function must be: struct_param = {"Volume":df["Volume"].values, "Open": df["Open"].values}
    '''
    struct_param = {"Volume": df["Volume"].values, "Open": df["Open"].values}
    extract.structure_func(df["Close"], struct_param)
    extract.kurtosis(df["Close"])
    extract.stetson_k(df["Close"])
コード例 #3
0
def test_prepro_all():
    from deltapy import transform, interact, mapper, extract
    df = data_copy()
    df.head()

    df_out = transform.robust_scaler(df, drop=["Close_1"])
    df_out = transform.standard_scaler(df, drop=["Close"])
    df_out = transform.fast_fracdiff(df, ["Close", "Open"], 0.5)
    #df_out = transform.windsorization(df,"Close",para,strategy='both')
    df_out = transform.operations(df, ["Close"])
    df_out = transform.triple_exponential_smoothing(df, ["Close"], 12, .2, .2,
                                                    .2, 0)
    df_out = transform.naive_dec(df, ["Close", "Open"])
    df_out = transform.bkb(df, ["Close"])
    df_out = transform.butter_lowpass_filter(df, ["Close"], 4)
    df_out = transform.instantaneous_phases(df, ["Close"])
    df_out = transform.kalman_feat(df, ["Close"])
    df_out = transform.perd_feat(df, ["Close"])
    df_out = transform.fft_feat(df, ["Close"])
    df_out = transform.harmonicradar_cw(df, ["Close"], 0.3, 0.2)
    df_out = transform.saw(df, ["Close", "Open"])
    df_out = transform.modify(df, ["Close"])
    df_out = transform.multiple_rolling(df, columns=["Close"])
    df_out = transform.multiple_lags(df, start=1, end=3, columns=["Close"])
    df_out = transform.prophet_feat(df.reset_index(), ["Close", "Open"],
                                    "Date", "D")

    #**Interaction**
    df_out = interact.lowess(df, ["Open", "Volume"],
                             df["Close"],
                             f=0.25,
                             iter=3)
    df_out = interact.autoregression(df)
    df_out = interact.muldiv(df, ["Close", "Open"])
    df_out = interact.decision_tree_disc(df, ["Close"])
    df_out = interact.quantile_normalize(df, drop=["Close"])
    df_out = interact.tech(df)
    df_out = interact.genetic_feat(df)

    #**Mapping**
    df_out = mapper.pca_feature(df,
                                variance_or_components=0.80,
                                drop_cols=["Close_1"])
    df_out = mapper.cross_lag(df)
    df_out = mapper.a_chi(df)
    df_out = mapper.encoder_dataset(df, ["Close_1"], 15)
    df_out = mapper.lle_feat(df, ["Close_1"], 4)
    df_out = mapper.feature_agg(df, ["Close_1"], 4)
    df_out = mapper.neigh_feat(df, ["Close_1"], 4)

    #**Extraction**
    extract.abs_energy(df["Close"])
    extract.cid_ce(df["Close"], True)
    extract.mean_abs_change(df["Close"])
    extract.mean_second_derivative_central(df["Close"])
    extract.variance_larger_than_standard_deviation(df["Close"])
    # extract.var_index(df["Close"].values,var_index_param)
    extract.symmetry_looking(df["Close"])
    extract.has_duplicate_max(df["Close"])
    extract.partial_autocorrelation(df["Close"])
    extract.augmented_dickey_fuller(df["Close"])
    extract.gskew(df["Close"])
    extract.stetson_mean(df["Close"])
    extract.length(df["Close"])
    extract.count_above_mean(df["Close"])
    extract.longest_strike_below_mean(df["Close"])
    extract.wozniak(df["Close"])
    extract.last_location_of_maximum(df["Close"])
    extract.fft_coefficient(df["Close"])
    extract.ar_coefficient(df["Close"])
    extract.index_mass_quantile(df["Close"])
    extract.number_cwt_peaks(df["Close"])
    extract.spkt_welch_density(df["Close"])
    extract.linear_trend_timewise(df["Close"])
    extract.c3(df["Close"])
    extract.binned_entropy(df["Close"])
    extract.svd_entropy(df["Close"].values)
    extract.hjorth_complexity(df["Close"])
    extract.max_langevin_fixed_point(df["Close"])
    extract.percent_amplitude(df["Close"])
    extract.cad_prob(df["Close"])
    extract.zero_crossing_derivative(df["Close"])
    extract.detrended_fluctuation_analysis(df["Close"])
    extract.fisher_information(df["Close"])
    extract.higuchi_fractal_dimension(df["Close"])
    extract.petrosian_fractal_dimension(df["Close"])
    extract.hurst_exponent(df["Close"])
    extract.largest_lyauponov_exponent(df["Close"])
    extract.whelch_method(df["Close"])
    extract.find_freq(df["Close"])
    extract.flux_perc(df["Close"])
    extract.range_cum_s(df["Close"])
    extract.structure_func(df["Close"])
    extract.kurtosis(df["Close"])
    extract.stetson_k(df["Close"])