def main(): generator = DataGenerator(labeled_data_file=args.labeled_data_file, data_util_file=args.data_util_file, threshold=args.threshold, dt=args.dt, L=args.L, tmin=args.tmin, tmax=args.tmax) training_data, test_data = generator.get_data(ts_nth_element=args.ts_nth_element, training_frac=0.7) steps = [ ('extract', RandomIntervalFeatureExtractor(n_intervals='sqrt', features=[np.mean, np.std, time_series_slope])), ('clf', DecisionTreeClassifier()) ] time_series_tree = Pipeline(steps) tsf = TimeSeriesForestClassifier( estimator=time_series_tree, n_estimators=args.n_estimators, criterion='entropy' if args.criterion == 'entropy' else 'gini', bootstrap=True, oob_score=True, random_state=1, # n_jobs=4, verbose=1 ) x = detabularize(pd.DataFrame(training_data[:,1:])) try: with parallel_backend('threading', n_jobs=args.n_jobs): tsf = tsf.fit(x, training_data[:,0]) with open('{save_file_name}.pickle'.format(save_file_name=args.save_file_name), 'wb') \ as TimeSeriesForestModel: pickle.dump(tsf, TimeSeriesForestModel, protocol=pickle.HIGHEST_PROTOCOL) except Exception as ex: print(ex)
def test_indices(n_components): np.random.seed(42) X = detabularize(pd.DataFrame(data=np.random.randn(10, 5))) X.columns = pd.CategoricalIndex(['col_0']) X.index = pd.Int64Index([i+10 for i in range(10)]) pca = PCATransformer(n_components=n_components) Xt = pca.fit_transform(X) assert X.columns.equals(Xt.columns) assert X.index.equals(Xt.index) assert get_time_index(Xt).equals(pd.Int64Index(range(pca.pca.n_components_)))
def main(): L, dt, kdmin, kd = args.L, args.dt, int( args.tmin / args.dt), int(args.tmax / args.dt) + 1 R1, R2 = 6.00, 25.00 C1, C2 = 2.80, 3.40 x0, y0, z0 = 0.50, 4.00, 1.00 NC, NL, NR = 600, 600, 600 deltaC = (C2 - C1) / (NC - 1) deltaR = (R2 - R1) / (NR - 1) xyz_points_untrimmed = np.loadtxt(args.data_util_file, delimiter=',') xyz_points = np.array([[item[0], item[1], item[2]] for item in xyz_points_untrimmed], dtype=np.double) original_untrimmed = np.loadtxt(args.labeled_data_file, delimiter=',') cr_point_list = np.array( [[item[0], item[1], 0 if item[2] < args.threshold else 1] for item in original_untrimmed], dtype=np.double) cr_point_list = np.concatenate((cr_point_list, xyz_points), axis=1) training_len = 18000 with open( '{model_file_name}.pickle'.format( model_file_name=args.model_file_name), 'rb') as model_pickle: model = pickle.load(model_pickle) for i in range(0, 10): R = torch.from_numpy(cr_point_list[training_len * i:training_len * (i + 1), 0]).double().cpu() C = torch.from_numpy(cr_point_list[training_len * i:training_len * (i + 1), 1]).double().cpu() x0 = torch.from_numpy(cr_point_list[training_len * i:training_len * (i + 1), 3]).double().cpu() y0 = torch.from_numpy(cr_point_list[training_len * i:training_len * (i + 1), 4]).double().cpu() z0 = torch.from_numpy(cr_point_list[training_len * i:training_len * (i + 1), 5]).double().cpu() x = runge_kutta_4(R, C, x0, y0, z0, training_len, dt, kd, kdmin, L, args.ts_nth_element) x = detabularize(pd.DataFrame(x)) with parallel_backend('threading', n_jobs=args.n_jobs): x = model.predict_proba(x) with open('{out_file_name}_{i}.pickle'.format(out_file_name=args.out_file_name, i=i), 'wb') \ as model_probabilities: pickle.dump(x, model_probabilities, protocol=pickle.HIGHEST_PROTOCOL) del R, C, x0, y0, z0, x
def test_output_format_dim(len_series, n_instances, n_components): np.random.seed(42) X = detabularize(pd.DataFrame(data=np.random.randn(n_instances, len_series))) trans = PCATransformer(n_components=n_components) Xt = trans.fit_transform(X) # Check number of rows and output type. assert isinstance(Xt, pd.DataFrame) assert Xt.shape[0] == X.shape[0] # Check number of principal components in the output. assert tabularize(Xt).shape[1] == min(n_components, tabularize(X).shape[1])
def test_pca_results(n_components): np.random.seed(42) # sklearn X = pd.DataFrame(data=np.random.randn(10, 5)) pca = PCA(n_components=n_components) Xt1 = pca.fit_transform(X) # sktime Xs = detabularize(X) pca_transform = PCATransformer(n_components=n_components) Xt2 = pca_transform.fit_transform(Xs) assert np.allclose(np.asarray(Xt1), np.asarray(tabularize(Xt2)))
def main(): #Load arff file into Tuple of size 2. #First element has the time-series data in arrays and Second element has the description of the attributes TRAIN = arff.loadarff('ItalyPowerDemand_TRAIN.arff') TEST = arff.loadarff('ItalyPowerDemand_TEST.arff') #Convert the data from the first Tuple elemento to a tabularized dataframe df_TRAIN = pd.DataFrame(TRAIN[0]) df_TEST = pd.DataFrame(TEST[0]) #Using sktime to handle the data print(df_TRAIN.head()) print('\n Is nested the df above?', is_nested_dataframe(df_TRAIN), '\n') #Handling the datasets X_train = df_TRAIN.drop('target', axis=1) y_train = df_TRAIN['target'].astype(int) print(X_train.head(), y_train.head(), '\n') X_test = df_TEST.drop('target', axis=1) y_test = df_TEST['target'].astype(int) #Detabularizing and Nesting X_train, X_test X_train_detab = detabularize(X_train) X_test_detab = detabularize(X_test) print(X_train_detab.head()) print('Is nested the detabularized df above?', is_nested_dataframe(X_train_detab), '\n') #The lines above could be simplified with the following method from sktime X, y = load_from_arff_to_dataframe('ItalyPowerDemand_TRAIN.arff') print(X_train_detab.head(), X.head(), type(y_train), type(y)) #Classifier algorithm knn = KNeighborsTimeSeriesClassifier(n_neighbors=1, metric="dtw") knn.fit(X_train_detab, y_train) print('The score of the KNN classifier is:', round(knn.score(X_test_detab, y_test), 4))
def main(): generator = DataGenerator(labeled_data_file=args.labeled_data_file, data_util_file=args.data_util_file, threshold=args.threshold, dt=args.dt, L=args.L, tmin=args.tmin, tmax=args.tmax) training_data, test_data = generator.get_data(ts_nth_element=args.ts_nth_element, training_frac=0.7) knn = KNeighborsTimeSeriesClassifier(n_neighbors=args.n_neighbors, verbose=1, metric="dtw") x = detabularize(pd.DataFrame(training_data[:,1:])) try: with parallel_backend('threading', n_jobs=args.n_jobs): knn = knn.fit(x, training_data[:,0]) with open('{save_file_name}.pickle'.format(save_file_name=args.save_file_name), 'wb') \ as KNeighborsTimeSeriesModel: pickle.dump(knn, KNeighborsTimeSeriesModel, protocol=pickle.HIGHEST_PROTOCOL) except Exception as ex: print(ex)
def inverse_transform(self, X, y=None): """Transform tabular pandas dataframe into nested dataframe. Parameters ---------- X : pandas DataFrame Tabular dataframe with primitives in cells. y : array-like, optional (default=None) Returns ------- Xt : pandas DataFrame Transformed dataframe with series in cells. """ self.check_is_fitted() X = check_X(X) Xt = detabularize(X, index=self._index, time_index=self._time_index) Xt.columns = self._columns return Xt
def transform(self, X, y=None): """Concatenate multivariate time series/panel data into long univiariate time series/panel data by simply concatenating times series in time. Parameters ---------- X : nested pandas DataFrame of shape [n_samples, n_features] Nested dataframe with time-series in cells. Returns ------- Xt : pandas DataFrame Transformed pandas DataFrame with same number of rows and single column """ self.check_is_fitted() X = check_X(X) # We concatenate by tabularizing all columns and then detabularizing # them into a single column return detabularize(tabularize(X))
def transform(self, X, y=None): """Concatenate multivariate time series/panel data into long univiariate time series/panel data by simply concatenating times series in time. Parameters ---------- X : nested pandas DataFrame of shape [n_samples, n_features] Nested dataframe with time-series in cells. Returns ------- Xt : pandas DataFrame Transformed pandas DataFrame with same number of rows and single column """ check_is_fitted(self, 'is_fitted_') if not isinstance(X, pd.DataFrame): raise ValueError( f"Expected input is a pandas DataFrame, but found {type(X)}") Xt = detabularize(tabularize(X)) return Xt
def inverse_transform(self, X, y=None): """Transform tabular pandas dataframe into nested dataframe. Parameters ---------- X : pandas DataFrame Tabular dataframe with primitives in cells. y : array-like, optional (default=None) Returns ------- Xt : pandas DataFrame Transformed dataframe with series in cells. """ check_is_fitted_in_transform(self, '_time_index') # TODO check if for each column, all rows have equal-index series if self.check_input: validate_X(X) Xit = detabularize(X, index=self._index, time_index=self._time_index) return Xit
def test_pca_kwargs(kwargs): np.random.seed(42) X = detabularize(pd.DataFrame(data=np.random.randn(10, 5))) pca = PCATransformer(n_components=1, **kwargs) pca.fit_transform(X)