def ensemble_max_vote(features_train_text, features_train_notext, features_test_text, features_test_notext, y_train, y_test, imbalance_sampling=None): model_notext = ModelFactory.get_model('LogisticRegression') model_text = ModelFactory.get_model('RandomForest') model_text.fit_model(features_train_text, y_train) model_notext.fit_model(features_train_notext, y_train) pred1 = model_text.predict(features_test_text) pred2 = model_notext.predict(features_test_notext) final_pred = np.array([]) for i in range(0, len(y_test)): val = mode([pred1[i], pred2[i]]) final_pred = np.append(final_pred, val) return ScoreMetrics.get_scores(y_test, final_pred)
def ensemble_averaging(features_train_text, features_train_notext, features_test_text, features_test_notext, y_train, y_test, w1, w2, imbalance_sampling=None): model_notext = ModelFactory.get_model('SVM') model_text = ModelFactory.get_model('RandomForest', optimised=False) y_train_text = y_train if imbalance_sampling != None: features_train_text, y_train_text = ImbalanceSampling.get_sampled_data( imbalance_sampling, features_train_text, y_train) features_train_notext, y_train = ImbalanceSampling.get_sampled_data( imbalance_sampling, features_train_notext, y_train) # models that perform best with imbalance sampling model_notext = ModelFactory.get_model('MLP', optimised=True) model_text = ModelFactory.get_model('SVM', optimised=False) model_notext.fit_model(features_train_notext, y_train) model_text.fit_model(features_train_text, y_train_text) pred1 = model_notext.predict_proba(features_test_notext) pred2 = model_text.predict_proba(features_test_text) final_pred = np.array([]) for i in range(0, len(y_test)): first = (w1 * pred1[i][0] + w2 * (pred2[i][0])) second = (w1 * pred1[i][1] + w2 * (pred2[i][1])) val = 1 if first > second: val = 0 final_pred = np.append(final_pred, val) return ScoreMetrics.get_scores('ensemble', y_test, final_pred)
def execute_model(self, name, imbalance_sampling=None): model = ModelFactory.get_model(name) split_data = self.split_data() features_train = self.extract_features(split_data['x_train']) features_test = self.extract_features(split_data['x_test']) features_train = self.combine_features(features_train, False) features_test = self.combine_features(features_test, False) x_train = features_train y_train = split_data['y_train'] if imbalance_sampling: x_train, y_train = ImbalanceSampling.get_sampled_data( imbalance_sampling, x_train, y_train) model.fit_model(x_train, y_train) y_pred = model.predict(features_test) ScoreMetrics.print_scores(split_data['y_test'], y_pred) return model
def compare_models(self, features_train, features_test, y_train, y_test): """ Executes models of all types implemented in this project and prints their results """ x_train = features_train model_names = ModelFactory.get_models_list() score_df = pd.DataFrame( columns=['name', 'accuracy', 'precision', 'recall', 'f1']) if self.imbalance_sampling: x_train, y_train = ImbalanceSampling.get_sampled_data( self.imbalance_sampling, x_train, y_train) for name in model_names: model = ModelFactory.get_model(name, optimised=False) model.fit_model(x_train, y_train) y_pred = model.predict(features_test) score = ScoreMetrics.get_scores(name, y_test, y_pred) print('-------') print(name) ScoreMetrics.print_scores(y_test, y_pred) score_df = score_df.append(score) return score_df
# Fix the random seed of Pytorch when using GPU. if torch.cuda.is_available(): torch.cuda.manual_seed_all(args.random_state) torch.cuda.manual_seed(args.random_state) # Fix the random seed of Pytorch when using CPU. torch.manual_seed(args.random_state) torch.random.manual_seed(args.random_state) # get dataset and alphabets dataset = DataIOSST2(config['data']) if config['use_pre_embedding']: seq_alphabet = AlphabetEmbeddings(**config['embedding']) seq_alphabet.load_embeddings_from_file() else: seq_alphabet = AlphabetEmbeddings(**config['embedding']) seq_alphabet.add_instance(dataset.train_word) label_alphabet = Alphabet('label', False, False) label_alphabet.add_instance(dataset.train_label) # get model if args.load is not None: model = torch.load(args.load) else: model = ModelFactory.get_model(config, args, seq_alphabet, label_alphabet) process = Process(config, args, dataset, model, seq_alphabet, label_alphabet) process.train()
def main(): parser = argparse.ArgumentParser( description= "Train and run k-fold cross-validation on physionet BCI 2000 dataset") parser.add_argument("-c", "--num_classes", type=int, default=4, choices=[2, 3, 4]) parser.add_argument( "-m", "--model_name", type=str, help="Name of the model used", default="eegA", choices=["eegA", "eegB", "eegC", "eegD", "eegA_LSTM", "eegD_LSTM"]) parser.add_argument("-cf", "--num_conv_filters", type=int, default=32) parser.add_argument( '--stride', dest='stride', help="Whether stride is used in the last Conv2D of first block", action='store_true') parser.add_argument('--no-stride', dest='stride', action='store_false') parser.set_defaults(stride=True) parser.add_argument("-dr", "--dropout_rate", type=float, default=0.5) parser.add_argument("-bs", "--batch_size", type=int, default=16) parser.add_argument("-e", "--epochs", type=int, default=10) parser.add_argument("-p", "--patience", help="Parameter for EarlyStopping callback", type=int, default=5) parser.add_argument("-kf", "--k_fold", type=int, default=5) parser.add_argument( "-o", "--output_name", type=str, help="logs will be put in ./logs/fit/output_name. If none is" "provided, time at run start is chosen", default=None) args = parser.parse_args() # input validation try: num_windows = json.load(open("./data/args_bci2000_preprocess.txt", 'r'))['num_windows'] except FileNotFoundError: raise FileNotFoundError( "Preprocessed data arguments not found. Run main_preprocess_data_bci2000.py and try again." ) if num_windows == 1 and 'LSTM' in args.model_name: raise ValueError( "LSTM can only be chosen for data preprocessed with -w > 1") if num_windows > 1 and 'LSTM' not in args.model_name: raise ValueError( "Only LSTM models can be chosen for data preprocessed with -w > 1") if args.output_name is None: args.output_name = datetime.now().strftime('%Y%m%d-%H%M%S') model_factory = ModelFactory(dataset="BCI2000", output_name=args.output_name, model_name=args.model_name, num_classes=args.num_classes, num_conv_filters=args.num_conv_filters, dropout_rate=args.dropout_rate, use_stride=args.stride) X, y = load_preprocessed_bci2000_data(num_classes=args.num_classes) kf = KFold(n_splits=args.k_fold, shuffle=True, random_state=42) for idx, [train, test] in enumerate(kf.split(X, y)): X_train = X[train] X_test = X[test] y_train = y[train] y_test = y[test] X_train, scaler = scale_data(X_train) X_test, _ = scale_data(X_test, scaler) model = model_factory.get_model() history = model.fit(x=X_train, y=y_train, batch_size=args.batch_size, epochs=args.epochs, validation_data=(X_test, y_test), callbacks=model_factory.get_callbacks( patience=args.patience, log_dir_suffix=f"{idx + 1}"), shuffle=True) write_history(history.history, log_dir=model_factory.get_log_dir()) with open(f"{model_factory.get_log_dir()}/model_summary.txt", 'w') as file: model.summary(print_fn=lambda x: file.write(x + '\n')) # write parameters used for training with open(f"{model_factory.get_log_dir()}/input_args.txt", 'w') as file: file.write(json.dumps(args.__dict__, indent=4))
def main(): """ Does the following: - For each subject: - Load preprocessed data from subject (preprocessed from 'A0XT.mat') - Train model on ALL data from 'A0XT.mat' - Evaluate model on test data originating from 'A0XE.mat' """ parser = argparse.ArgumentParser( description="Train and run model for data set 2a of BCI Competition IV." ) parser.add_argument( "-m", "--model_name", type=str, help="Name of the model used", default="eegA", choices=["eegA", "eegB", "eegC", "eegD", "eegA_LSTM", "eegD_LSTM"]) parser.add_argument("-cf", "--num_conv_filters", type=int, default=32) parser.add_argument( '--stride', dest='stride', help="Whether stride is used in the last Conv2D of first block", action='store_true') parser.add_argument('--no-stride', dest='stride', action='store_false') parser.set_defaults(stride=True) parser.add_argument("-dr", "--dropout_rate", type=float, default=0.5) parser.add_argument("-bs", "--batch_size", type=int, default=16) parser.add_argument("-e", "--epochs", type=int, default=10) parser.add_argument("-p", "--patience", help="Parameter for EarlyStopping callback", type=int, default=10) parser.add_argument("-kf", "--k_fold", type=int, default=5) parser.add_argument( "-o", "--output_name", type=str, help="logs will be put in ./logs/fit/output_name. If none is" "provided, time at run start is chosen", default=None) args = parser.parse_args() # input validation try: num_windows = json.load(open("./data/args_bci2aiv_preprocess.txt", 'r'))['num_windows'] except FileNotFoundError: raise FileNotFoundError( "Preprocessed data arguments not found. Run main_preprocess_data_bci2aiv.py and try again." ) if num_windows == 1 and 'LSTM' in args.model_name: raise ValueError( "LSTM can only be chosen for data preprocessed with -w > 1") if num_windows > 1 and 'LSTM' not in args.model_name: raise ValueError( "Only LSTM models can be chosen for data preprocessed with -w > 1") if args.output_name is None: args.output_name = datetime.now().strftime('%Y%m%d-%H%M%S') model_factory = ModelFactory(dataset="BCI2aIV", output_name=args.output_name, model_name=args.model_name, num_conv_filters=args.num_conv_filters, use_stride=args.stride, dropout_rate=args.dropout_rate ) # num_classes is always 4 for this dataset for subject_num in g.subject_num_range_bci2aiv: X_train, y_train = load_single_subject_bci2aiv_data( subject_num=subject_num, is_training=True) X_test, y_test = load_single_subject_bci2aiv_data( subject_num=subject_num, is_training=False) X_train, scaler = scale_data(X_train) X_test, _ = scale_data(X_test, scaler) model = model_factory.get_model() history = model.fit(x=X_train, y=y_train, batch_size=args.batch_size, epochs=args.epochs, validation_data=(X_test, y_test), callbacks=model_factory.get_callbacks( patience=args.patience, log_dir_suffix=f"{subject_num}"), shuffle=True) write_history(history.history, subject_num=subject_num, log_dir=model_factory.get_log_dir()) with open(f"{model_factory.get_log_dir()}/model_summary.txt", 'w') as file: model.summary(print_fn=lambda x: file.write(x + '\n')) # write parameters used for training with open(f"{model_factory.get_log_dir()}/input_args.txt", 'w') as file: file.write(json.dumps(args.__dict__, indent=4))
def initialize(self): model_factory = ModelFactory() self.model = model_factory.get_model()
def execute_model_data(self, name, x_train, y_train, x_test): model = ModelFactory.get_model(name) model.fit_model(x_train, y_train) y_pred = model.predict(x_test) return y_pred