def prepare_dataset(self): genre_index = 1 number_of_genres = len(self.genres) for genre in self.genres: self.my_logger.info( "[+] Creating dataset for genre {} ({}/{})".format( genre, genre_index, number_of_genres)) # TODOx dataset_name in this case? dataset_name = "{}_{}".format(genre, self.user_args.run_id) # TODOx look inside # fixmeX # TODOx run inspection GetDataset(genre, config.slice_size, config.dataset_path, dataset_name, self.active_config.path_to_slices_for_training, self.user_args, self.genres).start() genre_index += 1 # TODOx look inside all_dataset = self.load_dataset_back_to_memory() # TODOx look inside array_of_array_of_data_points = self.zip_again(all_dataset) array_of_data_points = [ data_point for array_of_data_points in array_of_array_of_data_points for data_point in array_of_data_points ] return array_of_data_points
def run(stock: str, model_type: str, stationary=True): df = Analysis.get_data(stock) df["Company stock name"] = stock.split('/')[-1].split('.')[0] dataset = GetDataset(df) dataset.get_dataset(scale=False, stationary=stationary) train_data, test_data, train_data_len = dataset.split(train_split_ratio=0.8, time_period=30) train_data, test_data = dataset.get_torchdata() x_train, y_train = train_data x_test, y_test = test_data if model_type == 'lstm': params = rnn_params model = TorchRNN(rnn_type=params.rnn_type, input_dim=params.input_dim, hidden_dim=params.hidden_dim, output_dim=params.output_dim, num_layers=params.num_layers) elif model_type == 'transformer': params = transf_params model = TransformerModel(params) else: raise ValueError('Wrong model type selection, select either "rnn" or "transformer"!') clf = Classifier(model) clf.train([x_train, y_train], params=params) y_scaler = dataset.y_scaler predictions = clf.predict([x_test, y_test], y_scaler, data_scaled=False) predictions = pd.DataFrame(predictions) predictions.reset_index(drop=True, inplace=True) predictions.index = df.index[-len(x_test):] predictions['Actual'] = y_test[:-1] predictions.rename(columns={0: 'Predictions'}, inplace=True) if stationary: predictions = Analysis.inverse_stationary_data(old_df=df, new_df=predictions, orig_feature='Actual', new_feature='Predictions', diff=12, do_orig=False) plot_predictions(df, train_data_len, predictions["Predictions"].values, model_type)
def start_test_real(): # TODOx task check again # TODOx config.run_id might be wrong. Indeed it is wrong # TODOx remove comment out dataset_name = "{}_{}".format(config.unknown_genre, user_args.run_id_real_test) # TODOx look inside user_args for run_id use case dataset = GetDataset(config.unknown_genre, config.slice_size, config.dataset_path, dataset_name, active_config.path_to_slices_for_testing, user_args, genres).start() test_task = Test(user_args, dataset, model, path_to_model) test_task.predict() test_task.rearrange_result_file()
from dataset import GetDataset from model import GetModel # Train params epochs = 40 input_shape = (64, 64, 3) num_classes = 4 batch_size = 128 # Get the dataset and the model (train, val) = GetDataset(batch_size) model = GetModel(input_shape, num_classes) # Steps to use in fit_generator train_steps = train.n / train.batch_size val_steps = val.n / val.batch_size # Compile settings model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc']) # The big deal history = model.fit_generator(generator=train, steps_per_epoch=train_steps, validation_data=val, validation_steps=val_steps, epochs=epochs, verbose=1) # Save the model after train
def run_experiment(base_dataset_path, dataset_name, test_size, n_cal, alpha=0.1, experiment=0, bbox_method='NNet', out_dir = './results', random_state=2020, n_jobs=1, verbose = False): # Set random seed np.random.seed(random_state) # Random state for this experiment random_state = 2020 + experiment X, Y = GetDataset(dataset_name, base_dataset_path) # Add noise to response Y += 1e-6*np.random.normal(size=Y.shape) # if transform_y: # Y = np.log(1 + Y - min(Y)) y_min = min(Y) y_max = max(Y) if X.shape[0] <= 2*n_cal + test_size: return # Determine output file out_file = out_dir + "/summary.csv" print(out_file) # Set random seed random.seed(random_state) np.random.seed(random_state) torch.manual_seed(random_state) if torch.cuda.is_available(): torch.cuda.manual_seed_all(random_state) # Split the data X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=test_size, random_state=random_state) X_train, X_calib, Y_train, Y_calib = train_test_split(X_train, Y_train, test_size=n_cal, random_state=random_state) scaler = StandardScaler() scaler.fit(X_train) X_train = scaler.transform(X_train) X_calib = scaler.transform(X_calib) X_test = scaler.transform(X_test) n_train = X_train.shape[0] assert(n_cal == X_calib.shape[0]) n_test = X_test.shape[0] results = pd.DataFrame() results_full = pd.DataFrame() if len(X.shape) == 1: n_features = 1 else: n_features = X.shape[1] y_step = 1000 # maybe larger than 1000 # Initialize the black-box and the conformalizer if bbox_method == 'NNet': epochs = 2000 lr = 0.0005 batch_size = n_train dropout = 0.1 grid_quantiles = np.arange(0.01,1.0,0.01) bbox = QNet(grid_quantiles, n_features, no_crossing=True, batch_size=batch_size, dropout=dropout, num_epochs=epochs, learning_rate=lr, calibrate=1, verbose=verbose) elif bbox_method == 'RF': n_estimators = 100 min_samples_leaf=50 # min_samples_leaf = 20 #max_features = x_train.shape[1] grid_quantiles = np.arange(0.01,1.0,0.01) bbox = QRF(grid_quantiles, n_estimators=n_estimators, min_samples_leaf=min_samples_leaf, random_state=2020, n_jobs=n_jobs, verbose=verbose) # Train the black-box model bbox.fit(X_train, Y_train) # Define list of methods to use in experiments methods = { 'CHR' : CHR(bbox, ymin=y_min, ymax=y_max, y_steps=1000, randomize=True), 'DistSplit' : DistSplit(bbox, ymin=y_min, ymax=y_max), 'DCP' : DCP(bbox, ymin=y_min, ymax=y_max), 'CQR' : CQR(bbox), 'CQR2' : CQR2(bbox) } for method_name in methods: # Apply the conformalization method method = methods[method_name] method.calibrate(X_calib, Y_calib, alpha) # Compute prediction on test data pred = method.predict(X_test) # Evaluate results res = evaluate_predictions(pred, Y_test, X=X_test) # Add information about this experiment res['Box'] = bbox_method res['Dataset'] = dataset_name res['Method'] = method_name res['Experiment'] = experiment res['Nominal'] = 1-alpha res['n_train'] = n_train res['n_cal'] = n_cal res['n_test'] = n_test # Add results to the list results = results.append(res) # Write results on output files if not os.path.exists(out_dir): os.mkdir(out_dir) results.to_csv(out_file, index=False, float_format="%.4f") print("Updated summary of results on\n {}".format(out_file)) sys.stdout.flush() return results
# Input arguments dataset_name = str(sys.argv[1]) bbox_method = str(sys.argv[2]) experiment = int(sys.argv[3]) print(dataset_name) print(bbox_method) print(experiment) # Set random seed np.random.seed(random_state) # Random state for this experiment random_state = 2020 + experiment X, Y = GetDataset(dataset_name, base_dataset_path) # Add noise to response Y += 1e-6 * np.random.normal(size=Y.shape) # if transform_y: # Y = np.log(1 + Y - min(Y)) y_min = min(Y) y_max = max(Y) if X.shape[0] <= 2 * n_cal + test_size: raise # Temporary file to store black-box model tmp_file = tmp_dir + "/dataset_" + dataset_name + "_bbox_" + bbox_method + "_exp_" + str(