def main(): download_robot_execution_failures() df_ts, y = load_robot_execution_failures() # We create an empty feature matrix that has the proper index X = pd.DataFrame(index=y.index) # Split data into train and test set X_train, X_test, y_train, y_test = train_test_split(X, y) print(df_ts) # We have a pipeline that consists of a feature extraction step with a subsequent Random Forest Classifier ppl = Pipeline([('fresh', RelevantFeatureAugmenter(column_id='id', column_sort='time')), ('clf', RandomForestClassifier())]) # Here comes the tricky part, due to limitations of the sklearn pipeline API, we can not pass the dataframe # containing the time series dataframe but instead have to use the set_params method # In this case, df_ts contains the time series of both train and test set, if you have different dataframes for # train and test set, you have to call set_params two times (see the notebook pipeline_with_two_datasets.ipynb) ppl.set_params(fresh__timeseries_container=df_ts) # We fit the pipeline ppl.fit(X_train, y_train) # Predicting works as well y_pred = ppl.predict(X_test) # So, finally we inspect the performance print(classification_report(y_test, y_pred))
def setUp(self): download_robot_execution_failures() self.timeseries, self.y = load_robot_execution_failures() self.df = pd.DataFrame(index=self.timeseries.id.unique()) # shrink the time series for this test self.timeseries = self.timeseries[["id", "time", "a"]]
def setUp(self): download_robot_execution_failures() self.timeseries, self.y = load_robot_execution_failures() self.df = pd.DataFrame(index=self.timeseries.id.unique()) # shrink the time series for this test self.timeseries = self.timeseries[["id", "time", "F_x"]]
def setUp(self): self.temporary_folder = tempfile.mkdtemp() temporary_file = os.path.join(self.temporary_folder, "data") download_robot_execution_failures(file_name=temporary_file) self.timeseries, self.y = load_robot_execution_failures(file_name=temporary_file) self.df = pd.DataFrame(index=self.timeseries.id.unique()) # shrink the time series for this test self.timeseries = self.timeseries[["id", "time", "F_x"]]
def testLocalTSFresh(self): robot_execution_failures.download_robot_execution_failures() df, y = robot_execution_failures.load_robot_execution_failures() dist = MarsDistributor() df = df.iloc[:200] extraction_settings = ComprehensiveFCParameters() extract_features(df, column_id='id', column_sort='time', default_fc_parameters=extraction_settings, # we impute = remove all NaN features automatically impute_function=impute, distributor=dist)
def test_distributed_ts_fresh(setup): robot_execution_failures.download_robot_execution_failures() df, y = robot_execution_failures.load_robot_execution_failures() default_session = get_default_session() sync_session = new_session(default_session.address) dist = MarsDistributor(session=sync_session) df = df.iloc[:200].copy() extraction_settings = ComprehensiveFCParameters() extract_features(df, column_id='id', column_sort='time', default_fc_parameters=extraction_settings, # we impute = remove all NaN features automatically impute_function=impute, distributor=dist)
def testDistributedTSFresh(self): robot_execution_failures.download_robot_execution_failures() df, y = robot_execution_failures.load_robot_execution_failures() service_ep = 'http://127.0.0.1:' + self.web_port with new_session(service_ep) as sess: dist = MarsDistributor(sess) df = df.iloc[:200] extraction_settings = ComprehensiveFCParameters() extract_features(df, column_id='id', column_sort='time', default_fc_parameters=extraction_settings, # we impute = remove all NaN features automatically impute_function=impute, distributor=dist)
def test_timing(): from tsfresh.examples.robot_execution_failures import download_robot_execution_failures, \ load_robot_execution_failures download_robot_execution_failures() df, y = load_robot_execution_failures() commit_hash = check_output(["git", "log", "--format=\"%H\"", "-1" ]).decode("ascii").strip().replace("\"", "") lengths_to_test = [1, 5, 10, 60, 100, 400, 600, 1000, 2000] results = [] for length in lengths_to_test: results.append(test_with_length(length, df)) results.append(test_with_length(length, df)) results.append(test_with_length(length, df)) results = pd.DataFrame(results) results.to_csv("{hash}.dat".format(hash=commit_hash))
def main(): # download and load the data download_robot_execution_failures() timeseries, y = load_robot_execution_failures() # plot healthy example timeseries[timeseries['id'] == 3].plot(subplots=True, sharex=True, figsize=(10,10)) # plot failure example timeseries[timeseries['id'] == 21].plot(subplots=True, sharex=True, figsize=(10,10)) plt.show() # extract features extracted_features = extract_features(timeseries, column_id="id", column_sort="time") print('shape of extracted features: {},{}'.format(*extracted_features.shape)) # fill NaNs based on rules impute(extracted_features) # filter for significant features features_filtered = select_features(extracted_features, y) print('shape of selected features: {},{}'.format(*features_filtered.shape)) import pdb; pdb.set_trace()
import numpy as np import pickle import matplotlib.pyplot as plt import random import math from scipy.fftpack import fft, irfft, rfft from scipy.optimize import curve_fit import tsfresh.feature_extraction.feature_calculators as ts from tsfresh.examples.robot_execution_failures import download_robot_execution_failures, load_robot_execution_failures from tsfresh import extract_features download_robot_execution_failures() timeseries, y = load_robot_execution_failures() def normalize(signal, range=None, offset=None): ''' # If range = None and Offset = None: - return normalized signal with values in range (0,1) # Range squeezes the signal between range(-range, +range) # Offet adds offset... ''' norm_sig = (signal - np.min(signal)) / (np.max(signal) - np.min(signal)) if range is not None: norm_sig = (2 * norm_sig - 1) * range if offset is not None: norm_sig = norm_sig + offset return norm_sig # with open('/Users/jiayun/PycharmProjects/D'
def setUp(self): download_robot_execution_failures() self.X, self.y = load_robot_execution_failures()
def test_multilabel_target_on_request(self): _, y = load_robot_execution_failures(multiclass=True) assert len(y.unique()) > 2 assert y.dtype == np.object
def test_binary_target_is_default(self): _, y = load_robot_execution_failures() assert len(y.unique()) == 2
fig = dict(data=data, layout=layout) iplot(fig) # In[50]: # plot(fig, filename='chart.html') # # tsfresh example # In[ ]: download_robot_execution_failures() # In[41]: df, y = load_robot_execution_failures() df = df[['id', 'time', 'F_x']] df.head() # In[60]: y.head() # In[73]: y # In[66]: df.time.unique()