def auto_feature_engineering(input_df: pd.DataFrame,
                             cols: List[str],
                             window_size: int = 5) -> pd.DataFrame:
    """ Automated feature engineering wrapper using TSFEL 
    package for features generated based on temporal metrics
    
    :input_df: Input data
    :window_size: Size of the window
    :return: X data
    """
    cfg_file = tsfel.get_features_by_domain("temporal")
    X_df_list = []
    for col in cols:
        current_df = tsfel.time_series_features_extractor(
            cfg_file,
            input_df,
            fs=50,
            window_splitter=True,
            window_size=window_size,
        )
        current_df = current_df.add_prefix(col)
        X_df_list.append(current_df)

    X = pd.concat(X_df_list, axis=1)
    return X
Esempio n. 2
0
 def get_last_window(self,
                     mfe_features=None,
                     tsfel_config=None,
                     features_summaries=None,
                     n_classes=None,
                     delta_acc_summary_func=None):
     if features_summaries is None:
         features_summaries = ["max", "min", "mean", "var"]
     if mfe_features is None:
         mfe_features = ["nr_class", "attr_ent", "kurtosis", "skewness"]
     if tsfel_config is None:
         tsfel_config = tsfel.get_features_by_domain()
     if self.history is not None:
         X = [sample for x in self.history for sample in x[0]]
         if delta_acc_summary_func is not None:
             current_acc = summary_funcs[delta_acc_summary_func](
                 [x[2] for x in self.history])
             last_window_acc = self.last_window_acc
             self.last_window_acc = current_acc
         else:
             last_window_acc = None
             current_acc = None
         features = get_window_features(X,
                                        mfe_features,
                                        tsfel_config,
                                        features_summaries,
                                        n_classes=n_classes,
                                        last_window_acc=last_window_acc,
                                        current_acc=current_acc)
         self.history = []
         return features
     else:
         return None
Esempio n. 3
0
def tsfel_calculator(x):
    
    import tsfel

    # Instantiate calculation configuration

    cfg_file = tsfel.get_features_by_domain()

    # Produce calculations
    
    extracted_features = tsfel.time_series_features_extractor(cfg_file, x)
    
    return extracted_features
Esempio n. 4
0
import pandas as pd

df = pd.read_sql_table('display', 'sqlite:///dissertation.db')

df_engineering = df.copy()
#df_engineering['id']=np.repeat(range(1,4033),360)
df_20weeks = df_engineering[:1209600]  # 20 weeks
#df_tsfresh=df_engineering[['time','id','kWh']]
df_tsfel = df_20weeks[['kWh']]

df_hour = df_20weeks[['kWh']] * 1000  # convert kwh to wh
df_hour.rename(columns={'kWh': 'Wh'}, inplace=True)

import tsfel
#from tsfresh import extract_relevant_features
cfg = tsfel.get_features_by_domain()
X_train = tsfel.time_series_features_extractor(cfg,
                                               df_tsfel,
                                               window_splitter=True,
                                               window_size=8640)  # one day
print(list(X_train.columns))
X_train = X_train[[
    '0_Absolute energy', '0_Mean', '0_Max', '0_Standard deviation',
    '0_FFT mean coefficient_0', '0_Spectral kurtosis', '0_Skewness',
    '0_Zero crossing rate'
]]

X_hour = tsfel.time_series_features_extractor(cfg,
                                              df_hour,
                                              window_splitter=True,
                                              window_size=360)  # one hour
Esempio n. 5
0
        k for k in df.columns
        if 'Average' in k and 'huser' not in k and 'lejl' not in k)]
#minmaxscaling
scaler = MinMaxScaler(feature_range=(0, 1))
df[categories] = scaler.fit_transform(df[categories])
X_train = tsfel.time_series_features_extractor(cfg_file,
                                               serie,
                                               fs=24,
                                               window_splitter=True,
                                               window_size=720)

#serie=
#X_train = tsfel.time_series_features_extractor(cfg_file, serie, fs=24, window_splitter=True, window_size=720)
corr_features = tsfel.correlated_features(X_train)
list = []
cfg_file = tsfel.get_features_by_domain()
for category in categories:
    serie = df[category].dropna()
    X_train = tsfel.time_series_features_extractor(cfg_file,
                                                   serie,
                                                   fs=24,
                                                   window_splitter=True,
                                                   window_size=720)
    X_train.drop(corr_features, axis=1, inplace=True)
    #X_train=scaler.fit_transform(X_train)
    list.append(X_train)

mo = []
for m in np.arange(12):
    data = pd.DataFrame()
    for i in np.arange(len(list)):
Esempio n. 6
0
    def makeDataset(self, signals=False,strategy="ad-hoc"):
    
        def adhoc(strategy):
            
            
            self.features = strategy
            if(strategy=="ad-hoc"):
                self.features = self.signalsToFeatures(signals)            

            featuresSpec = [] 
            for var in self.features:
                #print (var)
                x = var.split(" ")
                if len(x) == 2:
                    x.append(False)
                else:
                    x[2] = True
                featuresSpec.append(x)
                signals.append(x[0])            

            for feature in featuresSpec:
                var = feature[0]
                isDerivate = feature[2]
                featureType = feature[1]

                if(isDerivate == True):
                    temp = ""
                    if(self.includeFiltering == True):
                        temp = self.filterOutliers(chunk[var].diff(), self.n)
                    else:
                        temp = chunk[var].diff()                   

                    if featureType == "mean":
                        w.append(np.nanmean(temp))
                    elif featureType == "std":
                        w.append(np.nanstd(temp))
                    else:
                        w.append(np.nanpercentile(temp,int(featureType)))
                else:
                    temp = ""
                    if(self.includeFiltering == True):
                        temp = self.filterOutliers(chunk[var], self.n)
                    else:
                        temp = chunk[var]

                    if featureType == "mean":
                        w.append(np.nanmean(temp))
                    elif featureType == "std":
                        w.append(np.nanstd(temp))
                    else:
                        w.append(np.nanpercentile(temp,int(featureType)))
                            
            return w
        
        
        Id = []
        X = []
        Y = []

        for j, row in self.files_list.iterrows():            
            file = row["cycleName"]
            df = pd.read_csv(self.fpathCameoFiles + file,  encoding ="latin1", usecols = signals)
            
            chunks = self.makeChunks(df, int(df.shape[0]/(self.minuteWindow*60)))
            
            for chunk in chunks:
                chunk = chunk.fillna(method='bfill')
                #return chunk,"a","a"
                w = []
                if(strategy=="ad-hoc" or type(strategy) == list): w = adhoc(strategy)
                elif(strategy=="tsfel-all" or strategy=="tsfel-all-corr"): 
                    cfg = tsfel.get_features_by_domain()
                    w = tsfel.time_series_features_extractor(cfg, chunk)   
                    self.features = list(w.columns)
                elif(strategy=="tsfel-statistical"): 
                    cfg = tsfel.get_features_by_domain('statistical')
                    w = tsfel.time_series_features_extractor(cfg, chunk)  
                    self.features = list(w.columns)
                elif(strategy=="tsfel-temporal"): 
                    cfg = tsfel.get_features_by_domain('temporal')
                    w = tsfel.time_series_features_extractor(cfg, chunk)  
                    self.features = list(w.columns)
                elif(strategy=="vest"): 
                    model = BivariateVEST()
                    features = model.extract_features(df, pairwise_transformations=False, summary_operators=SUMMARY_OPERATIONS_SMALL)
                    w = multivariate_feature_extraction(df, apply_transform_operators=False, summary_operators=SUMMARY_OPERATIONS_SMALL)    
                    self.features = list(w.columns)
                
                X.append(w)
                Id.append(file)
                Y.append(row['label'])        
        
        
        if(strategy=="tsfel-all-corr"):
            dataset = pd.concat(X)
            to_drop = tsfel.correlated_features(dataset)
            dataset = dataset.drop(to_drop,axis=1)
            self.features = dataset.columns
            X = dataset
                
            
        return np.array(X), np.array(Y), np.array(Id)
import tsfel


FEATURES_JSON = tsfel.__path__[0] + '/feature_extraction/features.json'

settings0 = tsfel.load_json(FEATURES_JSON)

settings1 = tsfel.get_features_by_domain('statistical')

settings2 = tsfel.get_features_by_domain('temporal')

settings3 = tsfel.get_features_by_domain('spectral')

settings4 = tsfel.get_features_by_domain(None)

settings5 = tsfel.extract_sheet('Features')
Esempio n. 8
0
import sklearn.metrics as metrics
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import silhouette_score
import pandas as pd
import tsfel


# IMPORT
df=pd.read_csv('Data kategorier.csv',skiprows=2,index_col=['READ_TIME_Date'])
df.index = pd.to_datetime(df.index)

# Houses class
serie=df.iloc[:,7].dropna()

cfg_file = tsfel.get_features_by_domain(domain='statistical')
X_train = tsfel.time_series_features_extractor(cfg_file, serie, fs=24, window_splitter=True, window_size=720)


# Remove corr features
corr_features = tsfel.correlated_features(X_train)
X_train.drop(corr_features, axis=1, inplace=True)

X_train['months']=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
X_train.index=X_train['months']
X_train.drop(['months'],axis=1)


Kurtosis=X_train['0_Spectral kurtosis']
#SignifFeatKurtosis=X_train['0_Spectral kurtosis']
Skewness=X_train['0_Skewness']
from sklearn.decomposition import PCA

pl.rc('text', usetex=True)
pl.rc('font', family='serif', serif='Times')

#%%
#Don't forget to change this PATH
path = 'csv/'
dataframeempty = pd.DataFrame()
W = []
for csv_path in glob(path + 'Residential_*.csv'):
    df = pd.read_csv(csv_path)
    df.dropna(inplace=True)
    df.drop(['date'], axis=1, inplace=True)
    # Retrieves a pre-defined feature configuration file to extract all available features
    cfg = tsfel.get_features_by_domain()

    # Extract features
    X = tsfel.time_series_features_extractor(cfg, df)
    X['File Name'] = csv_path
    dataframeempty = dataframeempty.append(X)

    cfgx = tsfel.get_features_by_domain(domain="spectral")
    cfgs = tsfel.get_features_by_domain(domain="statistical")
    cfgt = tsfel.get_features_by_domain(domain="temporal")

    x = tsfel.time_series_features_extractor(cfg, df, verbose=0)
    xx = tsfel.time_series_features_extractor(cfgx, df, verbose=0)
    xs = tsfel.time_series_features_extractor(cfgs, df, verbose=0)
    xt = tsfel.time_series_features_extractor(cfgt, df, verbose=0)