Beispiel #1
0
    def fft_coefficient(self, x, param=None):
        """
        As in tsfresh `fft_coefficient <https://github.com/blue-yonder/tsfresh/blob/master/tsfresh/feature_extraction/\
        feature_calculators.py#L852>`_ \
        Calculates the fourier coefficients of the one-dimensional discrete Fourier Transform for real input by fast \
        fourier transformation algorithm


        .. math::

            A_k =  \\sum_{m=0}^{n-1} a_m \\exp \\left \\{ -2 \\pi i \\frac{m k}{n} \\right \\}, \\qquad k = 0, \\ldots , n-1.


        The resulting coefficients will be complex, this feature calculator can return the real part (attr=="real"), \
        the imaginary part (attr=="imag), the absolute value (attr=""abs) and the angle in degrees (attr=="angle).

        :param x: the time series to calculate the feature of
        :type x: pandas.Series
        :param param: contains dictionaries {"coeff": x, "attr": s} with x int and x >= 0, s str and in ["real", "imag"\
        , "abs", "angle"]
        :type param: list
        :return: the different feature values
        :rtype: pandas.Series
        """
        if param is None:
            param = [{'attr': 'abs', 'coeff': 44}, {'attr': 'abs', 'coeff': 63}, {'attr': 'abs', 'coeff': 0},
                     {'attr': 'real', 'coeff': 0}, {'attr': 'real', 'coeff': 23}]
        _fft_coef = feature_calculators.fft_coefficient(x, param)
        logging.debug("fft coefficient by tsfresh calculated")
        return list(_fft_coef)
Beispiel #2
0
def compute_fft_features_block(xc, seg_id, X):
    
    xcdm = xc - np.mean(xc)
    b, a = des_bw_filter_lp(cutoff=18000)
    xcz = sg.lfilter(b, a, xcdm)
    zc = np.fft.fft(xcz)
    zc = zc[:MAX_FREQ_IDX]

    # FFT stats
    realFFT = np.real(zc)
    imagFFT = np.imag(zc)

    magFFT = np.abs(zc)
    phzFFT = np.angle(zc)
    phzFFT[phzFFT == -np.inf] = -np.pi / 2.0
    phzFFT[phzFFT == np.inf] = np.pi / 2.0
    phzFFT = np.nan_to_num(phzFFT)

    for freq in [x for x in range(0, MAX_FREQ_IDX, FREQ_STEP)]:
        X.loc[seg_id, 'FFT_Mag_01q%d' % freq] = np.quantile(magFFT[freq: freq + FREQ_STEP], 0.01)
        X.loc[seg_id, 'FFT_Mag_10q%d' % freq] = np.quantile(magFFT[freq: freq + FREQ_STEP], 0.1)
        X.loc[seg_id, 'FFT_Mag_90q%d' % freq] = np.quantile(magFFT[freq: freq + FREQ_STEP], 0.9)
        X.loc[seg_id, 'FFT_Mag_99q%d' % freq] = np.quantile(magFFT[freq: freq + FREQ_STEP], 0.99)
        X.loc[seg_id, 'FFT_Mag_mean%d' % freq] = np.mean(magFFT[freq: freq + FREQ_STEP])
        X.loc[seg_id, 'FFT_Mag_std%d' % freq] = np.std(magFFT[freq: freq + FREQ_STEP])
        X.loc[seg_id, 'FFT_Mag_max%d' % freq] = np.max(magFFT[freq: freq + FREQ_STEP])
        X.loc[seg_id, 'FFT_Phz_mean%d' % freq] = np.mean(phzFFT[freq: freq + FREQ_STEP])
        X.loc[seg_id, 'FFT_Phz_std%d' % freq] = np.std(phzFFT[freq: freq + FREQ_STEP])

    X.loc[seg_id, 'FFT_Rmean'] = realFFT.mean()
    X.loc[seg_id, 'FFT_Rstd'] = realFFT.std()
    X.loc[seg_id, 'FFT_Rmax'] = realFFT.max()
    X.loc[seg_id, 'FFT_Rmin'] = realFFT.min()
    X.loc[seg_id, 'FFT_Imean'] = imagFFT.mean()
    X.loc[seg_id, 'FFT_Istd'] = imagFFT.std()
    X.loc[seg_id, 'FFT_Imax'] = imagFFT.max()
    X.loc[seg_id, 'FFT_Imin'] = imagFFT.min()

    X.loc[seg_id, 'FFT_Rmean_first_6000'] = realFFT[:6000].mean()
    X.loc[seg_id, 'FFT_Rstd_first_6000'] = realFFT[:6000].std()
    X.loc[seg_id, 'FFT_Rmax_first_6000'] = realFFT[:6000].max()
    X.loc[seg_id, 'FFT_Rmin_first_6000'] = realFFT[:6000].min()
    X.loc[seg_id, 'FFT_Rmean_first_18000'] = realFFT[:18000].mean()
    X.loc[seg_id, 'FFT_Rstd_first_18000'] = realFFT[:18000].std()
    X.loc[seg_id, 'FFT_Rmax_first_18000'] = realFFT[:18000].max()
    X.loc[seg_id, 'FFT_Rmin_first_18000'] = realFFT[:18000].min()

    X.loc[seg_id, 'FFT_Rmean_last_5000'] = realFFT[-5000:].mean()
    X.loc[seg_id, 'FFT_Rstd_last_5000'] = realFFT[-5000:].std()
    X.loc[seg_id, 'FFT_Rmax_last_5000'] = realFFT[-5000:].max()
    X.loc[seg_id, 'FFT_Rmin_last_5000'] = realFFT[-5000:].min()

    X.loc[seg_id, 'FFT_Rmean_last_15000'] = realFFT[-15000:].mean()
    X.loc[seg_id, 'FFT_Rstd_last_15000'] = realFFT[-15000:].std()
    X.loc[seg_id, 'FFT_Rmax_last_15000'] = realFFT[-15000:].max()
    X.loc[seg_id, 'FFT_Rmin_last_15000'] = realFFT[-15000:].min()

    for coeff, attr in product([1, 2, 3, 4, 5], ['real', 'imag', 'angle']):
        X.loc[seg_id, f'fft_{coeff}_{attr}'] = list(feature_calculators.fft_coefficient(xc, [{'coeff': coeff, 'attr': attr}]))[0][1]
 def fft(self):
     # get FFT coefficient values, return matrix
     f1 = [
         abs(
             list(
                 tcal.fft_coefficient(self.data[i, :], [{
                     "coeff": 11,
                     "attr": "real"
                 }]))[0][1]) for i in range(len(self.data))
     ]
     f2 = [
         abs(
             list(
                 tcal.fft_coefficient(self.data[i, :], [{
                     "coeff": 12,
                     "attr": "real"
                 }]))[0][1]) for i in range(len(self.data))
     ]
     f3 = [
         abs(
             list(
                 tcal.fft_coefficient(self.data[i, :], [{
                     "coeff": 13,
                     "attr": "real"
                 }]))[0][1]) for i in range(len(self.data))
     ]
     f4 = [
         abs(
             list(
                 tcal.fft_coefficient(self.data[i, :], [{
                     "coeff": 14,
                     "attr": "real"
                 }]))[0][1]) for i in range(len(self.data))
     ]
     f5 = [
         abs(
             list(
                 tcal.fft_coefficient(self.data[i, :], [{
                     "coeff": 15,
                     "attr": "real"
                 }]))[0][1]) for i in range(len(self.data))
     ]
     return np.array([f1, f2, f3, f4, f5]).T
def fft_ft(dt):
    
    from tsfresh.feature_extraction.feature_calculators import fft_coefficient
    
    params = []
    for i in range(10):
        for j in ['real', 'imag', 'abs', 'angle']:
            params.append({'coeff': i, 'attr': j})

    ft = fft_coefficient(dt, params)
    
    return {i[0]: i[1] for i in ft}
Beispiel #5
0
def fft(chunk, coeff, attr):
    """Fourier coefficients of the one-dimensional discrete Fourier Transform for real input."""
    return list(fft_coefficient(chunk, [{"coeff": coeff, "attr": attr}]))[0][1]
Beispiel #6
0
 def function(x):
     param = [{'coeff': self.coeff, 'attr': self.attr}]
     return list(fft_coefficient(x, param=param))[0][1]
Beispiel #7
0
    def features(self, x, prefix):
        feature_dict = dict()

        # create features here
        # numpy
        feature_dict[prefix + '_' + 'mean'] = np.mean(x)
        feature_dict[prefix + '_' + 'max'] = np.max(x)
        feature_dict[prefix + '_' + 'min'] = np.min(x)
        feature_dict[prefix + '_' + 'std'] = np.std(x)
        feature_dict[prefix + '_' + 'var'] = np.var(x)
        feature_dict[prefix + '_' + 'ptp'] = np.ptp(x)
        feature_dict[prefix + '_' + 'percentile_10'] = np.percentile(x, 10)
        feature_dict[prefix + '_' + 'percentile_20'] = np.percentile(x, 20)
        feature_dict[prefix + '_' + 'percentile_30'] = np.percentile(x, 30)
        feature_dict[prefix + '_' + 'percentile_40'] = np.percentile(x, 40)
        feature_dict[prefix + '_' + 'percentile_50'] = np.percentile(x, 50)
        feature_dict[prefix + '_' + 'percentile_60'] = np.percentile(x, 60)
        feature_dict[prefix + '_' + 'percentile_70'] = np.percentile(x, 70)
        feature_dict[prefix + '_' + 'percentile_80'] = np.percentile(x, 80)
        feature_dict[prefix + '_' + 'percentile_90'] = np.percentile(x, 90)

        # scipy
        feature_dict[prefix + '_' + 'skew'] = sp.stats.skew(x)
        feature_dict[prefix + '_' + 'kurtosis'] = sp.stats.kurtosis(x)
        feature_dict[prefix + '_' + 'kstat_1'] = sp.stats.kstat(x, 1)
        feature_dict[prefix + '_' + 'kstat_2'] = sp.stats.kstat(x, 2)
        feature_dict[prefix + '_' + 'kstat_3'] = sp.stats.kstat(x, 3)
        feature_dict[prefix + '_' + 'kstat_4'] = sp.stats.kstat(x, 4)
        feature_dict[prefix + '_' + 'moment_1'] = sp.stats.moment(x, 1)
        feature_dict[prefix + '_' + 'moment_2'] = sp.stats.moment(x, 2)
        feature_dict[prefix + '_' + 'moment_3'] = sp.stats.moment(x, 3)
        feature_dict[prefix + '_' + 'moment_4'] = sp.stats.moment(x, 4)

        # tsfresh
        feature_dict[prefix + '_' +
                     'abs_energy'] = feature_calculators.abs_energy(x)
        feature_dict[
            prefix + '_' +
            'abs_sum_of_changes'] = feature_calculators.absolute_sum_of_changes(
                x)
        feature_dict[
            prefix + '_' +
            'count_above_mean'] = feature_calculators.count_above_mean(x)
        feature_dict[
            prefix + '_' +
            'count_below_mean'] = feature_calculators.count_below_mean(x)
        feature_dict[prefix + '_' +
                     'mean_abs_change'] = feature_calculators.mean_abs_change(
                         x)
        feature_dict[prefix + '_' +
                     'mean_change'] = feature_calculators.mean_change(x)
        feature_dict[
            prefix + '_' +
            'var_larger_than_std_dev'] = feature_calculators.variance_larger_than_standard_deviation(
                x)
        feature_dict[prefix + '_' +
                     'range_minf_m4000'] = feature_calculators.range_count(
                         x, -np.inf, -4000)
        feature_dict[prefix + '_' +
                     'range_m4000_m3000'] = feature_calculators.range_count(
                         x, -4000, -3000)
        feature_dict[prefix + '_' +
                     'range_m3000_m2000'] = feature_calculators.range_count(
                         x, -3000, -2000)
        feature_dict[prefix + '_' +
                     'range_m2000_m1000'] = feature_calculators.range_count(
                         x, -2000, -1000)
        feature_dict[prefix + '_' +
                     'range_m1000_0'] = feature_calculators.range_count(
                         x, -1000, 0)
        feature_dict[prefix + '_' +
                     'range_0_p1000'] = feature_calculators.range_count(
                         x, 0, 1000)
        feature_dict[prefix + '_' +
                     'range_p1000_p2000'] = feature_calculators.range_count(
                         x, 1000, 2000)
        feature_dict[prefix + '_' +
                     'range_p2000_p3000'] = feature_calculators.range_count(
                         x, 2000, 3000)
        feature_dict[prefix + '_' +
                     'range_p3000_p4000'] = feature_calculators.range_count(
                         x, 3000, 4000)
        feature_dict[prefix + '_' +
                     'range_p4000_pinf'] = feature_calculators.range_count(
                         x, 4000, np.inf)

        feature_dict[
            prefix + '_' +
            'ratio_unique_values'] = feature_calculators.ratio_value_number_to_time_series_length(
                x)
        feature_dict[
            prefix + '_' +
            'first_loc_min'] = feature_calculators.first_location_of_minimum(x)
        feature_dict[
            prefix + '_' +
            'first_loc_max'] = feature_calculators.first_location_of_maximum(x)
        feature_dict[
            prefix + '_' +
            'last_loc_min'] = feature_calculators.last_location_of_minimum(x)
        feature_dict[
            prefix + '_' +
            'last_loc_max'] = feature_calculators.last_location_of_maximum(x)
        feature_dict[
            prefix + '_' +
            'time_rev_asym_stat_10'] = feature_calculators.time_reversal_asymmetry_statistic(
                x, 10)
        feature_dict[
            prefix + '_' +
            'time_rev_asym_stat_100'] = feature_calculators.time_reversal_asymmetry_statistic(
                x, 100)
        feature_dict[
            prefix + '_' +
            'time_rev_asym_stat_1000'] = feature_calculators.time_reversal_asymmetry_statistic(
                x, 1000)
        feature_dict[
            prefix + '_' +
            'autocorrelation_1'] = feature_calculators.autocorrelation(x, 1)
        feature_dict[
            prefix + '_' +
            'autocorrelation_2'] = feature_calculators.autocorrelation(x, 2)
        feature_dict[
            prefix + '_' +
            'autocorrelation_3'] = feature_calculators.autocorrelation(x, 3)
        feature_dict[
            prefix + '_' +
            'autocorrelation_4'] = feature_calculators.autocorrelation(x, 4)
        feature_dict[
            prefix + '_' +
            'autocorrelation_5'] = feature_calculators.autocorrelation(x, 5)
        feature_dict[
            prefix + '_' +
            'autocorrelation_6'] = feature_calculators.autocorrelation(x, 6)
        feature_dict[
            prefix + '_' +
            'autocorrelation_7'] = feature_calculators.autocorrelation(x, 7)
        feature_dict[
            prefix + '_' +
            'autocorrelation_8'] = feature_calculators.autocorrelation(x, 8)
        feature_dict[
            prefix + '_' +
            'autocorrelation_9'] = feature_calculators.autocorrelation(x, 9)
        feature_dict[
            prefix + '_' +
            'autocorrelation_10'] = feature_calculators.autocorrelation(x, 10)
        feature_dict[
            prefix + '_' +
            'autocorrelation_50'] = feature_calculators.autocorrelation(x, 50)
        feature_dict[
            prefix + '_' +
            'autocorrelation_100'] = feature_calculators.autocorrelation(
                x, 100)
        feature_dict[
            prefix + '_' +
            'autocorrelation_1000'] = feature_calculators.autocorrelation(
                x, 1000)
        feature_dict[prefix + '_' + 'c3_1'] = feature_calculators.c3(x, 1)
        feature_dict[prefix + '_' + 'c3_2'] = feature_calculators.c3(x, 2)
        feature_dict[prefix + '_' + 'c3_3'] = feature_calculators.c3(x, 3)
        feature_dict[prefix + '_' + 'c3_4'] = feature_calculators.c3(x, 4)
        feature_dict[prefix + '_' + 'c3_5'] = feature_calculators.c3(x, 5)
        feature_dict[prefix + '_' + 'c3_10'] = feature_calculators.c3(x, 10)
        feature_dict[prefix + '_' + 'c3_100'] = feature_calculators.c3(x, 100)
        for c in range(1, 34):
            feature_dict[prefix + '_' + 'fft_{0}_real'.format(c)] = list(
                feature_calculators.fft_coefficient(x, [{
                    'coeff': c,
                    'attr': 'real'
                }]))[0][1]
            feature_dict[prefix + '_' + 'fft_{0}_imag'.format(c)] = list(
                feature_calculators.fft_coefficient(x, [{
                    'coeff': c,
                    'attr': 'imag'
                }]))[0][1]
            feature_dict[prefix + '_' + 'fft_{0}_ang'.format(c)] = list(
                feature_calculators.fft_coefficient(x, [{
                    'coeff': c,
                    'attr': 'angle'
                }]))[0][1]
        feature_dict[
            prefix + '_' +
            'long_strk_above_mean'] = feature_calculators.longest_strike_above_mean(
                x)
        feature_dict[
            prefix + '_' +
            'long_strk_below_mean'] = feature_calculators.longest_strike_below_mean(
                x)
        feature_dict[prefix + '_' + 'cid_ce_0'] = feature_calculators.cid_ce(
            x, 0)
        feature_dict[prefix + '_' + 'cid_ce_1'] = feature_calculators.cid_ce(
            x, 1)
        feature_dict[prefix + '_' +
                     'binned_entropy_5'] = feature_calculators.binned_entropy(
                         x, 5)
        feature_dict[prefix + '_' +
                     'binned_entropy_10'] = feature_calculators.binned_entropy(
                         x, 10)
        feature_dict[prefix + '_' +
                     'binned_entropy_20'] = feature_calculators.binned_entropy(
                         x, 20)
        feature_dict[prefix + '_' +
                     'binned_entropy_50'] = feature_calculators.binned_entropy(
                         x, 50)
        feature_dict[prefix + '_' +
                     'binned_entropy_80'] = feature_calculators.binned_entropy(
                         x, 80)
        feature_dict[
            prefix + '_' +
            'binned_entropy_100'] = feature_calculators.binned_entropy(x, 100)

        feature_dict[prefix + '_' +
                     'num_crossing_0'] = feature_calculators.number_crossing_m(
                         x, 0)
        feature_dict[prefix + '_' +
                     'num_peaks_1'] = feature_calculators.number_peaks(x, 1)
        feature_dict[prefix + '_' +
                     'num_peaks_3'] = feature_calculators.number_peaks(x, 3)
        feature_dict[prefix + '_' +
                     'num_peaks_5'] = feature_calculators.number_peaks(x, 5)
        feature_dict[prefix + '_' +
                     'num_peaks_10'] = feature_calculators.number_peaks(x, 10)
        feature_dict[prefix + '_' +
                     'num_peaks_50'] = feature_calculators.number_peaks(x, 50)
        feature_dict[prefix + '_' +
                     'num_peaks_100'] = feature_calculators.number_peaks(
                         x, 100)
        feature_dict[prefix + '_' +
                     'num_peaks_500'] = feature_calculators.number_peaks(
                         x, 500)

        feature_dict[prefix + '_' + 'spkt_welch_density_1'] = list(
            feature_calculators.spkt_welch_density(x, [{
                'coeff': 1
            }]))[0][1]
        feature_dict[prefix + '_' + 'spkt_welch_density_2'] = list(
            feature_calculators.spkt_welch_density(x, [{
                'coeff': 2
            }]))[0][1]
        feature_dict[prefix + '_' + 'spkt_welch_density_5'] = list(
            feature_calculators.spkt_welch_density(x, [{
                'coeff': 5
            }]))[0][1]
        feature_dict[prefix + '_' + 'spkt_welch_density_8'] = list(
            feature_calculators.spkt_welch_density(x, [{
                'coeff': 8
            }]))[0][1]
        feature_dict[prefix + '_' + 'spkt_welch_density_10'] = list(
            feature_calculators.spkt_welch_density(x, [{
                'coeff': 10
            }]))[0][1]
        feature_dict[prefix + '_' + 'spkt_welch_density_50'] = list(
            feature_calculators.spkt_welch_density(x, [{
                'coeff': 50
            }]))[0][1]
        feature_dict[prefix + '_' + 'spkt_welch_density_100'] = list(
            feature_calculators.spkt_welch_density(x, [{
                'coeff': 100
            }]))[0][1]

        feature_dict[
            prefix + '_' +
            'time_rev_asym_stat_1'] = feature_calculators.time_reversal_asymmetry_statistic(
                x, 1)
        feature_dict[
            prefix + '_' +
            'time_rev_asym_stat_2'] = feature_calculators.time_reversal_asymmetry_statistic(
                x, 2)
        feature_dict[
            prefix + '_' +
            'time_rev_asym_stat_3'] = feature_calculators.time_reversal_asymmetry_statistic(
                x, 3)
        feature_dict[
            prefix + '_' +
            'time_rev_asym_stat_4'] = feature_calculators.time_reversal_asymmetry_statistic(
                x, 4)
        feature_dict[
            prefix + '_' +
            'time_rev_asym_stat_10'] = feature_calculators.time_reversal_asymmetry_statistic(
                x, 10)
        feature_dict[
            prefix + '_' +
            'time_rev_asym_stat_100'] = feature_calculators.time_reversal_asymmetry_statistic(
                x, 100)

        for r in range(20):
            feature_dict[prefix + '_' + 'symmetry_looking_' +
                         str(r)] = feature_calculators.symmetry_looking(
                             x, [{
                                 'r': r * 0.05
                             }])[0][1]

        for r in range(1, 20):
            feature_dict[
                prefix + '_' + 'large_standard_deviation_' +
                str(r)] = feature_calculators.large_standard_deviation(
                    x, r * 0.05)

        for r in range(1, 10):
            feature_dict[prefix + '_' + 'quantile_' +
                         str(r)] = feature_calculators.quantile(x, r * 0.1)

        for r in ['mean', 'median', 'var']:
            feature_dict[prefix + '_' + 'agg_autocorr_' +
                         r] = feature_calculators.agg_autocorrelation(
                             x, [{
                                 'f_agg': r,
                                 'maxlag': 40
                             }])[0][-1]

        #for r in range(1, 6):
        #    feature_dict[prefix+'_'+'number_cwt_peaks_'+str(r)] = feature_calculators.number_cwt_peaks(x, r)

        for r in range(1, 10):
            feature_dict[prefix + '_' + 'index_mass_quantile_' +
                         str(r)] = feature_calculators.index_mass_quantile(
                             x, [{
                                 'q': r
                             }])[0][1]

        #for ql in [0., .2, .4, .6, .8]:
        #    for qh in [.2, .4, .6, .8, 1.]:
        #        if ql < qh:
        #            for b in [False, True]:
        #                for f in ["mean", "var"]:
        #                    feature_dict[prefix+'_'+'change_quantiles_'+str(ql)+'_'+str(qh)+'_'+str(b)+'_'+str(f)] = feature_calculators.change_quantiles(x, ql, qh, b, f)

        #for r in [.1, .3, .5, .7, .9]:
        #    feature_dict[prefix+'_'+'approximate_entropy_'+str(r)] = feature_calculators.approximate_entropy(x, 2, r)

        feature_dict[
            prefix + '_' +
            'max_langevin_fixed_point'] = feature_calculators.max_langevin_fixed_point(
                x, 3, 30)

        for r in ['pvalue', 'rvalue', 'intercept', 'slope', 'stderr']:
            feature_dict[prefix + '_' + 'linear_trend_' +
                         str(r)] = feature_calculators.linear_trend(
                             x, [{
                                 'attr': r
                             }])[0][1]

        for r in ['pvalue', 'teststat', 'usedlag']:
            feature_dict[prefix + '_' + 'augmented_dickey_fuller_' +
                         r] = feature_calculators.augmented_dickey_fuller(
                             x, [{
                                 'attr': r
                             }])[0][1]

        for r in [0.5, 1, 1.5, 2, 2.5, 3, 5, 6, 7, 10]:
            feature_dict[prefix + '_' + 'ratio_beyond_r_sigma_' +
                         str(r)] = feature_calculators.ratio_beyond_r_sigma(
                             x, r)

        #for attr in ["pvalue", "rvalue", "intercept", "slope", "stderr"]:
        #    feature_dict[prefix+'_'+'linear_trend_timewise_'+attr] = feature_calculators.linear_trend_timewise(x, [{'attr': attr}])[0][1]
        #for attr in ["rvalue", "intercept", "slope", "stderr"]:
        #    for i in [5, 10, 50]:
        #        for f in ["max", "min", "mean", "var"]:
        #            feature_dict[prefix+'_'+'agg_linear_trend_'+attr+'_'+str(i)+'_'+f] = feature_calculators.agg_linear_trend(x, [{'attr': attr, 'chunk_len': i, 'f_agg': f}])[0][-1]
        #for width in [2, 5, 10, 20]:
        #    for coeff in range(15):
        #        for w in [2, 5, 10, 20]:
        #            feature_dict[prefix+'_'+'cwt_coefficients_'+str(width)+'_'+str(coeff)+'_'+str(w)] = list(feature_calculators.cwt_coefficients(x, [{'widths': width, 'coeff': coeff, 'w': w}]))[0][1]
        #for r in range(10):
        #    feature_dict[prefix+'_'+'partial_autocorr_'+str(r)] = feature_calculators.partial_autocorrelation(x, [{'lag': r}])[0][1]
        # "ar_coefficient": [{"coeff": coeff, "k": k} for coeff in range(5) for k in [10]],
        # "fft_coefficient": [{"coeff": k, "attr": a} for a, k in product(["real", "imag", "abs", "angle"], range(100))],
        # "fft_aggregated": [{"aggtype": s} for s in ["centroid", "variance", "skew", "kurtosis"]],
        # "value_count": [{"value": value} for value in [0, 1, -1]],
        # "range_count": [{"min": -1, "max": 1}, {"min": 1e12, "max": 0}, {"min": 0, "max": 1e12}],
        # "friedrich_coefficients": (lambda m: [{"coeff": coeff, "m": m, "r": 30} for coeff in range(m + 1)])(3),
        #  "energy_ratio_by_chunks": [{"num_segments": 10, "segment_focus": i} for i in range(10)],
        return feature_dict
Beispiel #8
0
 def function(x):
     param = [{"coeff": self.coeff, "attr": self.attr}]
     return list(fft_coefficient(x, param=param))[0][1]
import glob
import pandas as pd
import numpy as np
from tsfresh.feature_extraction.feature_calculators import fft_coefficient
from scipy.fftpack import fft, rfft, irfft

path='./data/data_odiginal/test'
all_files = glob.glob(path + '/*.xls')
test=pd.DataFrame(np.nan,index=range(75000), columns=['id','time','1st','2nd','3rd','4th'])
index=0
for file in all_files:
    temp=pd.read_excel(file,header=None)
    test.iloc[(index*7500):((index+1)*7500),2:6]=temp.loc[:7499,:].values
    test.iloc[(index*7500):((index+1)*7500),0]=index
    test.iloc[(index*7500):((index+1)*7500),1]=range(1,7501)
    index+=1

fft_coefficient(test[test.id==0]['2nd'], param={'coeff':76 , 'attr': "imag"})
Beispiel #10
0
    def features(self, x, y, seg_id):
        feature_dict = dict()
        feature_dict['target'] = y
        feature_dict['seg_id'] = seg_id

        # create features here
        # numpy
        feature_dict['mean'] = np.mean(x)
        feature_dict['max'] = np.max(x)
        feature_dict['min'] = np.min(x)
        feature_dict['std'] = np.std(x)
        feature_dict['var'] = np.var(x)
        feature_dict['ptp'] = np.ptp(x)
        feature_dict['percentile_10'] = np.percentile(x, 10)
        feature_dict['percentile_20'] = np.percentile(x, 20)
        feature_dict['percentile_30'] = np.percentile(x, 30)
        feature_dict['percentile_40'] = np.percentile(x, 40)
        feature_dict['percentile_50'] = np.percentile(x, 50)
        feature_dict['percentile_60'] = np.percentile(x, 60)
        feature_dict['percentile_70'] = np.percentile(x, 70)
        feature_dict['percentile_80'] = np.percentile(x, 80)
        feature_dict['percentile_90'] = np.percentile(x, 90)

        # scipy
        feature_dict['skew'] = sp.stats.skew(x)
        feature_dict['kurtosis'] = sp.stats.kurtosis(x)
        feature_dict['kstat_1'] = sp.stats.kstat(x, 1)
        feature_dict['kstat_2'] = sp.stats.kstat(x, 2)
        feature_dict['kstat_3'] = sp.stats.kstat(x, 3)
        feature_dict['kstat_4'] = sp.stats.kstat(x, 4)
        feature_dict['moment_1'] = sp.stats.moment(x, 1)
        feature_dict['moment_2'] = sp.stats.moment(x, 2)
        feature_dict['moment_3'] = sp.stats.moment(x, 3)
        feature_dict['moment_4'] = sp.stats.moment(x, 4)
        
        feature_dict['abs_energy'] = feature_calculators.abs_energy(x)
        feature_dict['abs_sum_of_changes'] = feature_calculators.absolute_sum_of_changes(x)
        feature_dict['count_above_mean'] = feature_calculators.count_above_mean(x)
        feature_dict['count_below_mean'] = feature_calculators.count_below_mean(x)
        feature_dict['mean_abs_change'] = feature_calculators.mean_abs_change(x)
        feature_dict['mean_change'] = feature_calculators.mean_change(x)
        feature_dict['var_larger_than_std_dev'] = feature_calculators.variance_larger_than_standard_deviation(x)
        feature_dict['range_minf_m4000'] = feature_calculators.range_count(x, -np.inf, -4000)
        feature_dict['range_m4000_m3000'] = feature_calculators.range_count(x, -4000, -3000)
        feature_dict['range_m3000_m2000'] = feature_calculators.range_count(x, -3000, -2000)
        feature_dict['range_m2000_m1000'] = feature_calculators.range_count(x, -2000, -1000)
        feature_dict['range_m1000_0'] = feature_calculators.range_count(x, -1000, 0)
        feature_dict['range_0_p1000'] = feature_calculators.range_count(x, 0, 1000)
        feature_dict['range_p1000_p2000'] = feature_calculators.range_count(x, 1000, 2000)
        feature_dict['range_p2000_p3000'] = feature_calculators.range_count(x, 2000, 3000)
        feature_dict['range_p3000_p4000'] = feature_calculators.range_count(x, 3000, 4000)
        feature_dict['range_p4000_pinf'] = feature_calculators.range_count(x, 4000, np.inf)

        feature_dict['ratio_unique_values'] = feature_calculators.ratio_value_number_to_time_series_length(x)
        feature_dict['first_loc_min'] = feature_calculators.first_location_of_minimum(x)
        feature_dict['first_loc_max'] = feature_calculators.first_location_of_maximum(x)
        feature_dict['last_loc_min'] = feature_calculators.last_location_of_minimum(x)
        feature_dict['last_loc_max'] = feature_calculators.last_location_of_maximum(x)
        feature_dict['time_rev_asym_stat_10'] = feature_calculators.time_reversal_asymmetry_statistic(x, 10)
        feature_dict['time_rev_asym_stat_100'] = feature_calculators.time_reversal_asymmetry_statistic(x, 100)
        feature_dict['time_rev_asym_stat_1000'] = feature_calculators.time_reversal_asymmetry_statistic(x, 1000)
        feature_dict['autocorrelation_5'] = feature_calculators.autocorrelation(x, 5)
        feature_dict['autocorrelation_10'] = feature_calculators.autocorrelation(x, 10)
        feature_dict['autocorrelation_50'] = feature_calculators.autocorrelation(x, 50)
        feature_dict['autocorrelation_100'] = feature_calculators.autocorrelation(x, 100)
        feature_dict['autocorrelation_1000'] = feature_calculators.autocorrelation(x, 1000)
        feature_dict['c3_5'] = feature_calculators.c3(x, 5)
        feature_dict['c3_10'] = feature_calculators.c3(x, 10)
        feature_dict['c3_100'] = feature_calculators.c3(x, 100)
        feature_dict['fft_1_real'] = list(feature_calculators.fft_coefficient(x, [{'coeff': 1, 'attr': 'real'}]))[0][1]
        feature_dict['fft_1_imag'] = list(feature_calculators.fft_coefficient(x, [{'coeff': 1, 'attr': 'imag'}]))[0][1]
        feature_dict['fft_1_ang'] = list(feature_calculators.fft_coefficient(x, [{'coeff': 1, 'attr': 'angle'}]))[0][1]
        feature_dict['fft_2_real'] = list(feature_calculators.fft_coefficient(x, [{'coeff': 2, 'attr': 'real'}]))[0][1]
        feature_dict['fft_2_imag'] = list(feature_calculators.fft_coefficient(x, [{'coeff': 2, 'attr': 'imag'}]))[0][1]
        feature_dict['fft_2_ang'] = list(feature_calculators.fft_coefficient(x, [{'coeff': 2, 'attr': 'angle'}]))[0][1]
        feature_dict['fft_3_real'] = list(feature_calculators.fft_coefficient(x, [{'coeff': 3, 'attr': 'real'}]))[0][1]
        feature_dict['fft_3_imag'] = list(feature_calculators.fft_coefficient(x, [{'coeff': 3, 'attr': 'imag'}]))[0][1]
        feature_dict['fft_3_ang'] = list(feature_calculators.fft_coefficient(x, [{'coeff': 3, 'attr': 'angle'}]))[0][1]
        feature_dict['long_strk_above_mean'] = feature_calculators.longest_strike_above_mean(x)
        feature_dict['long_strk_below_mean'] = feature_calculators.longest_strike_below_mean(x)
        feature_dict['cid_ce_0'] = feature_calculators.cid_ce(x, 0)
        feature_dict['cid_ce_1'] = feature_calculators.cid_ce(x, 1)
        feature_dict['binned_entropy_5'] = feature_calculators.binned_entropy(x, 5)
        feature_dict['binned_entropy_10'] = feature_calculators.binned_entropy(x, 10)
        feature_dict['binned_entropy_20'] = feature_calculators.binned_entropy(x, 20)
        feature_dict['binned_entropy_50'] = feature_calculators.binned_entropy(x, 50)
        feature_dict['binned_entropy_80'] = feature_calculators.binned_entropy(x, 80)
        feature_dict['binned_entropy_100'] = feature_calculators.binned_entropy(x, 100)

        feature_dict['num_crossing_0'] = feature_calculators.number_crossing_m(x, 0)
        feature_dict['num_peaks_10'] = feature_calculators.number_peaks(x, 10)
        feature_dict['num_peaks_50'] = feature_calculators.number_peaks(x, 50)
        feature_dict['num_peaks_100'] = feature_calculators.number_peaks(x, 100)
        feature_dict['num_peaks_500'] = feature_calculators.number_peaks(x, 500)

        feature_dict['spkt_welch_density_1'] = list(feature_calculators.spkt_welch_density(x, [{'coeff': 1}]))[0][1]
        feature_dict['spkt_welch_density_10'] = list(feature_calculators.spkt_welch_density(x, [{'coeff': 10}]))[0][1]
        feature_dict['spkt_welch_density_50'] = list(feature_calculators.spkt_welch_density(x, [{'coeff': 50}]))[0][1]
        feature_dict['spkt_welch_density_100'] = list(feature_calculators.spkt_welch_density(x, [{'coeff': 100}]))[0][1]

        feature_dict['time_rev_asym_stat_1'] = feature_calculators.time_reversal_asymmetry_statistic(x, 1)
        feature_dict['time_rev_asym_stat_10'] = feature_calculators.time_reversal_asymmetry_statistic(x, 10)
        feature_dict['time_rev_asym_stat_100'] = feature_calculators.time_reversal_asymmetry_statistic(x, 100)        

        return feature_dict
Beispiel #11
0
    def features(self, x, y, seg_id, denoise=False):
        if (denoise == True):
            x_hp = high_pass_filter(x, low_cutoff=10000, sample_rate=4000000)

            x = denoise_signal(x_hp, wavelet='haar', level=1)

        feature_dict = dict()
        feature_dict['target'] = y
        feature_dict['seg_id'] = seg_id

        # create features here

        # lists with parameters to iterate over them
        percentiles = [
            1, 5, 10, 20, 25, 30, 40, 50, 60, 70, 75, 80, 90, 95, 99
        ]
        hann_windows = [50, 150, 1500, 15000]
        spans = [300, 3000, 30000, 50000]
        windows = [10, 50, 100, 500, 1000, 10000]
        borders = list(range(-4000, 4001, 1000))
        peaks = [10, 20, 50, 100]
        coefs = [1, 5, 10, 50, 100]
        lags = [10, 100, 1000, 10000]
        autocorr_lags = [5, 10, 50, 100, 500, 1000, 5000, 10000]

        # basic stats
        feature_dict['mean'] = x.mean()
        feature_dict['std'] = x.std()
        feature_dict['max'] = x.max()
        feature_dict['min'] = x.min()

        # basic stats on absolute values
        feature_dict['mean_change_abs'] = np.mean(np.diff(x))
        feature_dict['abs_max'] = np.abs(x).max()
        feature_dict['abs_mean'] = np.abs(x).mean()
        feature_dict['abs_std'] = np.abs(x).std()

        # geometric and harminic means
        feature_dict['hmean'] = stats.hmean(np.abs(x[np.nonzero(x)[0]]))
        feature_dict['gmean'] = stats.gmean(np.abs(x[np.nonzero(x)[0]]))

        # k-statistic and moments
        for i in range(1, 5):
            feature_dict['kstat_{}'.format(i)] = stats.kstat(x, i)
            feature_dict['moment_{}'.format(i)] = stats.moment(x, i)

        for i in [1, 2]:
            feature_dict['kstatvar_{}'.format(i)] = stats.kstatvar(x, i)

        # aggregations on various slices of data
        for agg_type, slice_length, direction in product(
            ['std', 'min', 'max', 'mean'], [1000, 10000, 50000],
            ['first', 'last']):
            if direction == 'first':
                feature_dict['{}_{}_{}'.format(
                    agg_type, direction,
                    slice_length)] = x[:slice_length].agg(agg_type)
            elif direction == 'last':
                feature_dict['{}_{}_{}'.format(
                    agg_type, direction,
                    slice_length)] = x[-slice_length:].agg(agg_type)

        feature_dict['max_to_min'] = x.max() / np.abs(x.min())
        feature_dict['max_to_min_diff'] = x.max() - np.abs(x.min())
        feature_dict['count_big'] = len(x[np.abs(x) > 500])
        feature_dict['sum'] = x.sum()

        feature_dict['mean_change_rate'] = calc_change_rate(x)
        # calc_change_rate on slices of data
        for slice_length, direction in product([1000, 10000, 50000],
                                               ['first', 'last']):
            if direction == 'first':
                feature_dict['mean_change_rate_{}_{}'.format(
                    direction,
                    slice_length)] = calc_change_rate(x[:slice_length])
            elif direction == 'last':
                feature_dict['mean_change_rate_{}_{}'.format(
                    direction,
                    slice_length)] = calc_change_rate(x[-slice_length:])

        # percentiles on original and absolute values
        for p in percentiles:
            feature_dict['percentile_{}'.format(p)] = np.percentile(x, p)
            feature_dict['abs_percentile_{}'.format(p)] = np.percentile(
                np.abs(x), p)

        feature_dict['trend'] = add_trend_feature(x)
        feature_dict['abs_trend'] = add_trend_feature(x, abs_values=True)

        feature_dict['mad'] = x.mad()
        feature_dict['kurt'] = x.kurtosis()
        feature_dict['skew'] = x.skew()
        feature_dict['med'] = x.median()

        feature_dict['Hilbert_mean'] = np.abs(hilbert(x)).mean()

        for hw in hann_windows:
            feature_dict['Hann_window_mean_{}'.format(hw)] = (
                convolve(x, hann(hw), mode='same') / sum(hann(hw))).mean()

        feature_dict['classic_sta_lta1_mean'] = classic_sta_lta(x, 500,
                                                                10000).mean()
        feature_dict['classic_sta_lta2_mean'] = classic_sta_lta(
            x, 5000, 100000).mean()
        feature_dict['classic_sta_lta3_mean'] = classic_sta_lta(x, 3333,
                                                                6666).mean()
        feature_dict['classic_sta_lta4_mean'] = classic_sta_lta(
            x, 10000, 25000).mean()
        feature_dict['classic_sta_lta5_mean'] = classic_sta_lta(x, 50,
                                                                1000).mean()
        feature_dict['classic_sta_lta6_mean'] = classic_sta_lta(x, 100,
                                                                5000).mean()
        feature_dict['classic_sta_lta7_mean'] = classic_sta_lta(x, 333,
                                                                666).mean()
        feature_dict['classic_sta_lta8_mean'] = classic_sta_lta(
            x, 4000, 10000).mean()

        # exponential rolling statistics
        ewma = pd.Series.ewm
        for s in spans:
            feature_dict['exp_Moving_average_{}_mean'.format(s)] = (ewma(
                x, span=s).mean(skipna=True)).mean(skipna=True)
            feature_dict['exp_Moving_average_{}_std'.format(s)] = (ewma(
                x, span=s).mean(skipna=True)).std(skipna=True)
            feature_dict['exp_Moving_std_{}_mean'.format(s)] = (ewma(
                x, span=s).std(skipna=True)).mean(skipna=True)
            feature_dict['exp_Moving_std_{}_std'.format(s)] = (ewma(
                x, span=s).std(skipna=True)).std(skipna=True)

        feature_dict['iqr1'] = np.subtract(*np.percentile(x, [95, 5]))
        feature_dict['ave10'] = stats.trim_mean(x, 0.1)

        for slice_length, threshold in product([50000, 100000, 150000],
                                               [5, 10, 20, 50, 100]):
            feature_dict['count_big_{}_threshold_{}'.format(
                slice_length,
                threshold)] = (np.abs(x[-slice_length:]) > threshold).sum()
            feature_dict['count_big_{}_less_threshold_{}'.format(
                slice_length,
                threshold)] = (np.abs(x[-slice_length:]) < threshold).sum()

        # tfresh features take too long to calculate, so I comment them for now

        feature_dict['abs_energy'] = feature_calculators.abs_energy(x)
        feature_dict[
            'abs_sum_of_changes'] = feature_calculators.absolute_sum_of_changes(
                x)
        feature_dict[
            'count_above_mean'] = feature_calculators.count_above_mean(x)
        feature_dict[
            'count_below_mean'] = feature_calculators.count_below_mean(x)
        feature_dict['mean_abs_change'] = feature_calculators.mean_abs_change(
            x)
        feature_dict['mean_change'] = feature_calculators.mean_change(x)
        feature_dict[
            'var_larger_than_std_dev'] = feature_calculators.variance_larger_than_standard_deviation(
                x)
        feature_dict['range_minf_m4000'] = feature_calculators.range_count(
            x, -np.inf, -4000)
        feature_dict['range_p4000_pinf'] = feature_calculators.range_count(
            x, 4000, np.inf)

        for i, j in zip(borders, borders[1:]):
            feature_dict['range_{}_{}'.format(
                i, j)] = feature_calculators.range_count(x, i, j)

        feature_dict[
            'ratio_unique_values'] = feature_calculators.ratio_value_number_to_time_series_length(
                x)
        feature_dict[
            'first_loc_min'] = feature_calculators.first_location_of_minimum(x)
        feature_dict[
            'first_loc_max'] = feature_calculators.first_location_of_maximum(x)
        feature_dict[
            'last_loc_min'] = feature_calculators.last_location_of_minimum(x)
        feature_dict[
            'last_loc_max'] = feature_calculators.last_location_of_maximum(x)

        for lag in lags:
            feature_dict['time_rev_asym_stat_{}'.format(
                lag)] = feature_calculators.time_reversal_asymmetry_statistic(
                    x, lag)
        for autocorr_lag in autocorr_lags:
            feature_dict['autocorrelation_{}'.format(
                autocorr_lag)] = feature_calculators.autocorrelation(
                    x, autocorr_lag)
            feature_dict['c3_{}'.format(
                autocorr_lag)] = feature_calculators.c3(x, autocorr_lag)

        for coeff, attr in product([1, 2, 3, 4, 5], ['real', 'imag', 'angle']):
            feature_dict['fft_{}_{}'.format(coeff, attr)] = list(
                feature_calculators.fft_coefficient(x, [{
                    'coeff': coeff,
                    'attr': attr
                }]))[0][1]

        feature_dict[
            'long_strk_above_mean'] = feature_calculators.longest_strike_above_mean(
                x)
        feature_dict[
            'long_strk_below_mean'] = feature_calculators.longest_strike_below_mean(
                x)
        feature_dict['cid_ce_0'] = feature_calculators.cid_ce(x, 0)
        feature_dict['cid_ce_1'] = feature_calculators.cid_ce(x, 1)

        for p in percentiles:
            feature_dict['binned_entropy_{}'.format(
                p)] = feature_calculators.binned_entropy(x, p)

        feature_dict['num_crossing_0'] = feature_calculators.number_crossing_m(
            x, 0)

        for peak in peaks:
            feature_dict['num_peaks_{}'.format(
                peaks)] = feature_calculators.number_peaks(x, peak)

        for c in coefs:
            feature_dict['spkt_welch_density_{}'.format(c)] = list(
                feature_calculators.spkt_welch_density(x, [{
                    'coeff': c
                }]))[0][1]
            feature_dict['time_rev_asym_stat_{}'.format(
                c)] = feature_calculators.time_reversal_asymmetry_statistic(
                    x, c)

        # statistics on rolling windows of various sizes
        for w in windows:
            x_roll_std = x.rolling(w).std().dropna().values
            x_roll_mean = x.rolling(w).mean().dropna().values

            feature_dict['ave_roll_std_{}'.format(w)] = x_roll_std.mean()
            feature_dict['std_roll_std_{}'.format(w)] = x_roll_std.std()
            feature_dict['max_roll_std_{}'.format(w)] = x_roll_std.max()
            feature_dict['min_roll_std_{}'.format(w)] = x_roll_std.min()

            for p in percentiles:
                feature_dict['percentile_roll_std_{}_window_{}'.format(
                    p, w)] = np.percentile(x_roll_std, p)

            feature_dict['av_change_abs_roll_std_{}'.format(w)] = np.mean(
                np.diff(x_roll_std))
            feature_dict['av_change_rate_roll_std_{}'.format(w)] = np.mean(
                np.nonzero((np.diff(x_roll_std) / x_roll_std[:-1]))[0])
            feature_dict['abs_max_roll_std_{}'.format(w)] = np.abs(
                x_roll_std).max()

            feature_dict['ave_roll_mean_{}'.format(w)] = x_roll_mean.mean()
            feature_dict['std_roll_mean_{}'.format(w)] = x_roll_mean.std()
            feature_dict['max_roll_mean_{}'.format(w)] = x_roll_mean.max()
            feature_dict['min_roll_mean_{}'.format(w)] = x_roll_mean.min()

            for p in percentiles:
                feature_dict['percentile_roll_mean_{}_window_{}'.format(
                    p, w)] = np.percentile(x_roll_mean, p)

            feature_dict['av_change_abs_roll_mean_{}'.format(w)] = np.mean(
                np.diff(x_roll_mean))
            feature_dict['av_change_rate_roll_mean_{}'.format(w)] = np.mean(
                np.nonzero((np.diff(x_roll_mean) / x_roll_mean[:-1]))[0])
            feature_dict['abs_max_roll_mean_{}'.format(w)] = np.abs(
                x_roll_mean).max()

        return feature_dict
def extract_best_features(timeseries, samples_per_window):
    '''
	By RFE
	'''
    extracted_features = pd.DataFrame()
    start = 0
    end = samples_per_window

    col_feature1 = []
    col_feature2 = []
    col_feature3 = []
    col_feature4 = []
    col_feature5 = []
    col_feature6 = []
    col_feature7 = []
    col_feature8 = []
    for i in tqdm(range(len(timeseries) // samples_per_window)):
        window = timeseries[start:end]['Open'].as_matrix().tolist()
        col_feature1.append(
            list(
                feature_calculators.fft_coefficient(window, [{
                    'coeff': 10,
                    'attr': 'imag'
                }]))[0][1])
        col_feature2.append(
            list(
                feature_calculators.fft_coefficient(window, [{
                    'coeff': 14,
                    'attr': 'imag'
                }]))[0][1])
        col_feature3.append(
            list(
                feature_calculators.fft_coefficient(window, [{
                    'coeff': 2,
                    'attr': 'abs'
                }]))[0][1])
        col_feature4.append(
            list(
                feature_calculators.fft_coefficient(window, [{
                    'coeff': 3,
                    'attr': 'real'
                }]))[0][1])
        col_feature5.append(
            list(
                feature_calculators.fft_coefficient(window, [{
                    'coeff': 4,
                    'attr': 'real'
                }]))[0][1])
        col_feature6.append(
            list(
                feature_calculators.fft_coefficient(window, [{
                    'coeff': 6,
                    'attr': 'imag'
                }]))[0][1])
        col_feature7.append(
            list(
                feature_calculators.fft_coefficient(window, [{
                    'coeff': 7,
                    'attr': 'imag'
                }]))[0][1])
        col_feature8.append(
            list(
                feature_calculators.fft_coefficient(window, [{
                    'coeff': 8,
                    'attr': 'real'
                }]))[0][1])

        start = end
        end += samples_per_window

    extracted_features['Open_feature1'] = col_feature1
    extracted_features['Open_feature2'] = col_feature2
    extracted_features['Open_feature3'] = col_feature3
    extracted_features['Open_feature4'] = col_feature4
    extracted_features['Open_feature5'] = col_feature5
    extracted_features['Open_feature6'] = col_feature6
    extracted_features['Open_feature7'] = col_feature7
    extracted_features['Open_feature8'] = col_feature8

    return extracted_features
def generate_time_series_feats(x_dataset, dataset_name="raw", test=False):
    make_dir_if_not_exists(os.path.join(FEATURES_PATH, 'tsfeats'))
    time_length = x_dataset.shape[1]

    features_function_dict = {
        "mean":
        mean,
        "median":
        median,
        "length":
        length,
        "minimum":
        minimum,
        "maximum":
        maximum,
        "variance":
        variance,
        "skewness":
        skewness,
        "kurtosis":
        kurtosis,
        "sum_values":
        sum_values,
        "abs_energy":
        abs_energy,
        "mean_change":
        mean_change,
        "mean_abs_change":
        mean_abs_change,
        "count_below_mean":
        count_below_mean,
        "count_above_mean":
        count_above_mean,
        "has_duplicate_min":
        has_duplicate_min,
        "has_duplicate_max":
        has_duplicate_max,
        "standard_deviation":
        standard_deviation,
        "absolute_sum_of_changes":
        absolute_sum_of_changes,
        "last_location_of_minimum":
        last_location_of_minimum,
        "last_location_of_maximum":
        last_location_of_maximum,
        "first_location_of_maximum":
        first_location_of_maximum,
        "longest_strike_below_mean":
        longest_strike_below_mean,
        "longest_strike_above_mean":
        longest_strike_above_mean,
        "sum_of_reoccurring_values":
        sum_of_reoccurring_values,
        "first_location_of_minimum":
        first_location_of_minimum,
        "sum_of_reoccurring_data_points":
        sum_of_reoccurring_data_points,
        "variance_larger_than_standard_deviation":
        variance_larger_than_standard_deviation,
        "ratio_value_number_to_time_series_length":
        ratio_value_number_to_time_series_length,
        "percentage_of_reoccurring_values_to_all_values":
        percentage_of_reoccurring_values_to_all_values,
        "binned_entropy_max300":
        lambda x: binned_entropy(x, 300),
        "binned_entropy_max400":
        lambda x: binned_entropy(x, 400),
        "cid_ce_true":
        lambda x: cid_ce(x, True),
        "cid_ce_false":
        lambda x: cid_ce(x, False),
        "percentage_of_reoccurring_datapoints_to_all_datapoints":
        percentage_of_reoccurring_datapoints_to_all_datapoints
    }

    for feature_name, function_call in features_function_dict.iteritems():
        print "{:.<70s}".format("- Processing feature: %s" % feature_name),
        feature_name = 'tsfeats/%s_%s' % (dataset_name, feature_name)
        if not features_exists(feature_name, test):
            feats = x_dataset.apply(function_call, axis=1, raw=True).values
            save_features(feats, feature_name, test)
            print("Done")
        else:
            print("Already generated")

    ar_param_k100 = [{"coeff": i, "k": 100} for i in range(100 + 1)]
    ar_param_k500 = [{"coeff": i, "k": 500} for i in range(500 + 1)]
    agg50_mean_linear_trend = [{
        "attr": val,
        "chunk_len": 50,
        "f_agg": "mean"
    } for val in ("pvalue", "rvalue", "intercept", "slope", "stderr")]
    aug_dickey_fuler_params = [{
        "attr": "teststat"
    }, {
        "attr": "pvalue"
    }, {
        "attr": "usedlag"
    }]
    energy_ratio_num10_focus5 = [{"num_segments": 10, "segment_focus": 5}]
    fft_aggr_spectrum = [{
        "aggtype": "centroid"
    }, {
        "aggtype": "variance"
    }, {
        "aggtype": "skew"
    }, {
        "aggtype": "kurtosis"
    }]
    fft_coefficient_real = [{
        "coeff": i,
        "attr": "real"
    } for i in range((time_length + 1) // 2)]
    fft_coefficient_imag = [{
        "coeff": i,
        "attr": "imag"
    } for i in range((time_length + 1) // 2)]
    fft_coefficient_abs = [{
        "coeff": i,
        "attr": "abs"
    } for i in range((time_length + 1) // 2)]
    fft_coefficient_angle = [{
        "coeff": i,
        "attr": "angle"
    } for i in range((time_length + 1) // 2)]
    linear_trend_params = [{
        "attr": val
    } for val in ("pvalue", "rvalue", "intercept", "slope", "stderr")]

    other_feats_dict = {
        "ar_coeff100":
        lambda x: dict(ar_coefficient(x, ar_param_k100)),
        "ar_coeff500":
        lambda x: dict(ar_coefficient(x, ar_param_k500)),
        "agg50_mean_lin_trend":
        lambda x: dict(agg_linear_trend(x, agg50_mean_linear_trend)),
        "aug_dickey_fuler":
        lambda x: dict(augmented_dickey_fuller(x, aug_dickey_fuler_params)),
        "energy_ratio_num10_focus5":
        lambda x: dict(energy_ratio_by_chunks(x, energy_ratio_num10_focus5)),
        "fft_aggr_spectrum":
        lambda x: dict(fft_aggregated(x, fft_aggr_spectrum)),
        "fft_coeff_real":
        lambda x: dict(fft_coefficient(x, fft_coefficient_real)),
        "fft_coeff_imag":
        lambda x: dict(fft_coefficient(x, fft_coefficient_imag)),
        "fft_coeff_abs":
        lambda x: dict(fft_coefficient(x, fft_coefficient_abs)),
        "fft_coeff_angle":
        lambda x: dict(fft_coefficient(x, fft_coefficient_angle)),
        "linear_trend":
        lambda x: dict(linear_trend(x, linear_trend_params)),
    }

    for feature_name, function_call in other_feats_dict.iteritems():
        print "{:.<70s}".format("- Processing features: %s" % feature_name),
        feature_name = 'tsfeats/%s_%s' % (dataset_name, feature_name)
        if not features_exists(feature_name, test):
            feats_dict = x_dataset.apply(function_call, axis=1,
                                         raw=True).values.tolist()
            feats = pd.DataFrame.from_dict(feats_dict)
            save_features(feats.values, feature_name, test)
            print("Done")
        else:
            print("Already generated")

    # Auto-correlations as features
    print("- Processing Auto-correlation features...")
    corr_dataset = x_dataset.apply(autocorrelation_all, axis=1, raw=True)
    save_features(corr_dataset.values,
                  '%s_auto_correlation_all' % dataset_name, test)

    print("- Processing ARIMA(5,5,1) Features...")
    arima_features = parallelize_row(x_dataset.values,
                                     generate_arima_feats,
                                     n_jobs=2)
    assert arima_features.shape[0] == x_dataset.shape[0]  # Assert the axis
    save_features(arima_features, '%s_arima_5_5_1' % dataset_name, test)