def main():

    download_robot_execution_failures()

    df_ts, y = load_robot_execution_failures()
    # We create an empty feature matrix that has the proper index
    X = pd.DataFrame(index=y.index)
    # Split data into train and test set
    X_train, X_test, y_train, y_test = train_test_split(X, y)
    print(df_ts)
    # We have a pipeline that consists of a feature extraction step with a subsequent Random Forest Classifier
    ppl = Pipeline([('fresh',
                     RelevantFeatureAugmenter(column_id='id',
                                              column_sort='time')),
                    ('clf', RandomForestClassifier())])
    # Here comes the tricky part, due to limitations of the sklearn pipeline API, we can not pass the dataframe
    # containing the time series dataframe but instead have to use the set_params method
    # In this case, df_ts contains the time series of both train and test set, if you have different dataframes for
    # train and test set, you have to call set_params two times (see the notebook pipeline_with_two_datasets.ipynb)
    ppl.set_params(fresh__timeseries_container=df_ts)

    # We fit the pipeline
    ppl.fit(X_train, y_train)

    # Predicting works as well
    y_pred = ppl.predict(X_test)

    # So, finally we inspect the performance
    print(classification_report(y_test, y_pred))
Exemple #2
0
    def setUp(self):
        download_robot_execution_failures()
        self.timeseries, self.y = load_robot_execution_failures()
        self.df = pd.DataFrame(index=self.timeseries.id.unique())

        # shrink the time series for this test
        self.timeseries = self.timeseries[["id", "time", "a"]]
    def setUp(self):
        download_robot_execution_failures()
        self.timeseries, self.y = load_robot_execution_failures()
        self.df = pd.DataFrame(index=self.timeseries.id.unique())

        # shrink the time series for this test
        self.timeseries = self.timeseries[["id", "time", "F_x"]]
Exemple #4
0
    def setUp(self):
        self.temporary_folder = tempfile.mkdtemp()
        temporary_file = os.path.join(self.temporary_folder, "data")

        download_robot_execution_failures(file_name=temporary_file)
        self.timeseries, self.y = load_robot_execution_failures(file_name=temporary_file)
        self.df = pd.DataFrame(index=self.timeseries.id.unique())

        # shrink the time series for this test
        self.timeseries = self.timeseries[["id", "time", "F_x"]]
Exemple #5
0
    def testLocalTSFresh(self):
        robot_execution_failures.download_robot_execution_failures()
        df, y = robot_execution_failures.load_robot_execution_failures()

        dist = MarsDistributor()

        df = df.iloc[:200]

        extraction_settings = ComprehensiveFCParameters()
        extract_features(df, column_id='id', column_sort='time',
                         default_fc_parameters=extraction_settings,
                         # we impute = remove all NaN features automatically
                         impute_function=impute, distributor=dist)
Exemple #6
0
def test_distributed_ts_fresh(setup):
    robot_execution_failures.download_robot_execution_failures()
    df, y = robot_execution_failures.load_robot_execution_failures()
    default_session = get_default_session()
    sync_session = new_session(default_session.address)
    dist = MarsDistributor(session=sync_session)

    df = df.iloc[:200].copy()

    extraction_settings = ComprehensiveFCParameters()
    extract_features(df, column_id='id', column_sort='time',
                     default_fc_parameters=extraction_settings,
                     # we impute = remove all NaN features automatically
                     impute_function=impute, distributor=dist)
    def testDistributedTSFresh(self):
        robot_execution_failures.download_robot_execution_failures()
        df, y = robot_execution_failures.load_robot_execution_failures()

        service_ep = 'http://127.0.0.1:' + self.web_port
        with new_session(service_ep) as sess:
            dist = MarsDistributor(sess)

            df = df.iloc[:200]

            extraction_settings = ComprehensiveFCParameters()
            extract_features(df, column_id='id', column_sort='time',
                             default_fc_parameters=extraction_settings,
                             # we impute = remove all NaN features automatically
                             impute_function=impute, distributor=dist)
Exemple #8
0
def test_timing():
    from tsfresh.examples.robot_execution_failures import download_robot_execution_failures, \
        load_robot_execution_failures
    download_robot_execution_failures()
    df, y = load_robot_execution_failures()

    commit_hash = check_output(["git", "log", "--format=\"%H\"", "-1"
                                ]).decode("ascii").strip().replace("\"", "")

    lengths_to_test = [1, 5, 10, 60, 100, 400, 600, 1000, 2000]
    results = []

    for length in lengths_to_test:
        results.append(test_with_length(length, df))
        results.append(test_with_length(length, df))
        results.append(test_with_length(length, df))

    results = pd.DataFrame(results)
    results.to_csv("{hash}.dat".format(hash=commit_hash))
def main():

    # download and load the data
    download_robot_execution_failures()
    timeseries, y = load_robot_execution_failures()

    # plot healthy example
    timeseries[timeseries['id'] == 3].plot(subplots=True, sharex=True, figsize=(10,10))

    # plot failure example
    timeseries[timeseries['id'] == 21].plot(subplots=True, sharex=True, figsize=(10,10))
    plt.show()

    # extract features
    extracted_features = extract_features(timeseries, column_id="id", column_sort="time")
    print('shape of extracted features: {},{}'.format(*extracted_features.shape))

    # fill NaNs based on rules
    impute(extracted_features)

    # filter for significant features
    features_filtered = select_features(extracted_features, y)
    print('shape of selected features: {},{}'.format(*features_filtered.shape))
    import pdb; pdb.set_trace()
import numpy as np
import pickle
import matplotlib.pyplot as plt
import random
import math
from scipy.fftpack import fft, irfft, rfft
from scipy.optimize import curve_fit
import tsfresh.feature_extraction.feature_calculators as ts

from tsfresh.examples.robot_execution_failures import download_robot_execution_failures, load_robot_execution_failures
from tsfresh import extract_features
download_robot_execution_failures()
timeseries, y = load_robot_execution_failures()


def normalize(signal, range=None, offset=None):
    '''
    # If range = None and Offset = None:
        - return normalized signal with values in range (0,1)
    # Range squeezes the signal between range(-range, +range)
    # Offet adds offset...
    '''
    norm_sig = (signal - np.min(signal)) / (np.max(signal) - np.min(signal))
    if range is not None:
        norm_sig = (2 * norm_sig - 1) * range
    if offset is not None:
        norm_sig = norm_sig + offset
    return norm_sig


# with open('/Users/jiayun/PycharmProjects/D'
Exemple #11
0
 def setUp(self):
     download_robot_execution_failures()
     self.X, self.y = load_robot_execution_failures()
Exemple #12
0
# -*- coding: utf-8 -*-
from tsfresh.examples.robot_execution_failures import download_robot_execution_failures, load_robot_execution_failures
import matplotlib.pyplot as plt
from tsfresh import extract_features
from tsfresh import select_features
from tsfresh import extract_relevant_features
from tsfresh.utilities.dataframe_functions import impute
import pandas as pd
import numpy as np
from classifiers.base_classification import Base_Classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

### First test, delet after real test work ###
'''
download_robot_execution_failures()
timeseries, y = load_robot_execution_failures()
print(timeseries.head())
timeseries[timeseries['id'] == 3].plot(subplots=True, sharex=True, figsize=(10,10))
plt.show()

extracted_features = extract_features(timeseries, column_id="id", column_sort="time")
impute(extracted_features)
features_filtered = select_features(extracted_features, y)
'''

### Real Work ###

# First load hmp data in extratrees_classification.py
dataframe_1 = hmp.data_with_window["f1"]["training"]
dataframe_2 = pd.DataFrame()
 def setUp(self):
     download_robot_execution_failures()
     self.X, self.y = load_robot_execution_failures()