def ensemble_max_vote(features_train_text,
                          features_train_notext,
                          features_test_text,
                          features_test_notext,
                          y_train,
                          y_test,
                          imbalance_sampling=None):
        model_notext = ModelFactory.get_model('LogisticRegression')
        model_text = ModelFactory.get_model('RandomForest')

        model_text.fit_model(features_train_text, y_train)
        model_notext.fit_model(features_train_notext, y_train)

        pred1 = model_text.predict(features_test_text)
        pred2 = model_notext.predict(features_test_notext)

        final_pred = np.array([])
        for i in range(0, len(y_test)):
            val = mode([pred1[i], pred2[i]])
            final_pred = np.append(final_pred, val)
        return ScoreMetrics.get_scores(y_test, final_pred)
    def ensemble_averaging(features_train_text,
                           features_train_notext,
                           features_test_text,
                           features_test_notext,
                           y_train,
                           y_test,
                           w1,
                           w2,
                           imbalance_sampling=None):
        model_notext = ModelFactory.get_model('SVM')
        model_text = ModelFactory.get_model('RandomForest', optimised=False)
        y_train_text = y_train

        if imbalance_sampling != None:
            features_train_text, y_train_text = ImbalanceSampling.get_sampled_data(
                imbalance_sampling, features_train_text, y_train)
            features_train_notext, y_train = ImbalanceSampling.get_sampled_data(
                imbalance_sampling, features_train_notext, y_train)

            # models that perform best with imbalance sampling
            model_notext = ModelFactory.get_model('MLP', optimised=True)
            model_text = ModelFactory.get_model('SVM', optimised=False)

        model_notext.fit_model(features_train_notext, y_train)
        model_text.fit_model(features_train_text, y_train_text)

        pred1 = model_notext.predict_proba(features_test_notext)
        pred2 = model_text.predict_proba(features_test_text)

        final_pred = np.array([])
        for i in range(0, len(y_test)):
            first = (w1 * pred1[i][0] + w2 * (pred2[i][0]))
            second = (w1 * pred1[i][1] + w2 * (pred2[i][1]))
            val = 1
            if first > second:
                val = 0
            final_pred = np.append(final_pred, val)
        return ScoreMetrics.get_scores('ensemble', y_test, final_pred)
 def execute_model(self, name, imbalance_sampling=None):
     model = ModelFactory.get_model(name)
     split_data = self.split_data()
     features_train = self.extract_features(split_data['x_train'])
     features_test = self.extract_features(split_data['x_test'])
     features_train = self.combine_features(features_train, False)
     features_test = self.combine_features(features_test, False)
     x_train = features_train
     y_train = split_data['y_train']
     if imbalance_sampling:
         x_train, y_train = ImbalanceSampling.get_sampled_data(
             imbalance_sampling, x_train, y_train)
     model.fit_model(x_train, y_train)
     y_pred = model.predict(features_test)
     ScoreMetrics.print_scores(split_data['y_test'], y_pred)
     return model
    def compare_models(self, features_train, features_test, y_train, y_test):
        """
        Executes models of all types implemented in this project and prints their results
        """
        x_train = features_train
        model_names = ModelFactory.get_models_list()
        score_df = pd.DataFrame(
            columns=['name', 'accuracy', 'precision', 'recall', 'f1'])
        if self.imbalance_sampling:
            x_train, y_train = ImbalanceSampling.get_sampled_data(
                self.imbalance_sampling, x_train, y_train)

        for name in model_names:
            model = ModelFactory.get_model(name, optimised=False)
            model.fit_model(x_train, y_train)
            y_pred = model.predict(features_test)
            score = ScoreMetrics.get_scores(name, y_test, y_pred)
            print('-------')
            print(name)
            ScoreMetrics.print_scores(y_test, y_pred)
            score_df = score_df.append(score)
        return score_df
Beispiel #5
0
    # Fix the random seed of Pytorch when using GPU.
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(args.random_state)
        torch.cuda.manual_seed(args.random_state)

    # Fix the random seed of Pytorch when using CPU.
    torch.manual_seed(args.random_state)
    torch.random.manual_seed(args.random_state)

    # get dataset and alphabets
    dataset = DataIOSST2(config['data'])
    if config['use_pre_embedding']:
        seq_alphabet = AlphabetEmbeddings(**config['embedding'])
        seq_alphabet.load_embeddings_from_file()
    else:
        seq_alphabet = AlphabetEmbeddings(**config['embedding'])
        seq_alphabet.add_instance(dataset.train_word)
    label_alphabet = Alphabet('label', False, False)
    label_alphabet.add_instance(dataset.train_label)

    # get model
    if args.load is not None:
        model = torch.load(args.load)
    else:
        model = ModelFactory.get_model(config, args, seq_alphabet,
                                       label_alphabet)

    process = Process(config, args, dataset, model, seq_alphabet,
                      label_alphabet)
    process.train()
def main():
    parser = argparse.ArgumentParser(
        description=
        "Train and run k-fold cross-validation on physionet BCI 2000 dataset")
    parser.add_argument("-c",
                        "--num_classes",
                        type=int,
                        default=4,
                        choices=[2, 3, 4])
    parser.add_argument(
        "-m",
        "--model_name",
        type=str,
        help="Name of the model used",
        default="eegA",
        choices=["eegA", "eegB", "eegC", "eegD", "eegA_LSTM", "eegD_LSTM"])
    parser.add_argument("-cf", "--num_conv_filters", type=int, default=32)
    parser.add_argument(
        '--stride',
        dest='stride',
        help="Whether stride is used in the last Conv2D of first block",
        action='store_true')
    parser.add_argument('--no-stride', dest='stride', action='store_false')
    parser.set_defaults(stride=True)
    parser.add_argument("-dr", "--dropout_rate", type=float, default=0.5)
    parser.add_argument("-bs", "--batch_size", type=int, default=16)
    parser.add_argument("-e", "--epochs", type=int, default=10)
    parser.add_argument("-p",
                        "--patience",
                        help="Parameter for EarlyStopping callback",
                        type=int,
                        default=5)
    parser.add_argument("-kf", "--k_fold", type=int, default=5)
    parser.add_argument(
        "-o",
        "--output_name",
        type=str,
        help="logs will be put in ./logs/fit/output_name. If none is"
        "provided, time at run start is chosen",
        default=None)

    args = parser.parse_args()

    # input validation
    try:
        num_windows = json.load(open("./data/args_bci2000_preprocess.txt",
                                     'r'))['num_windows']
    except FileNotFoundError:
        raise FileNotFoundError(
            "Preprocessed data arguments not found. Run main_preprocess_data_bci2000.py and try again."
        )
    if num_windows == 1 and 'LSTM' in args.model_name:
        raise ValueError(
            "LSTM can only be chosen for data preprocessed with -w > 1")
    if num_windows > 1 and 'LSTM' not in args.model_name:
        raise ValueError(
            "Only LSTM models can be chosen for data preprocessed with -w > 1")

    if args.output_name is None:
        args.output_name = datetime.now().strftime('%Y%m%d-%H%M%S')

    model_factory = ModelFactory(dataset="BCI2000",
                                 output_name=args.output_name,
                                 model_name=args.model_name,
                                 num_classes=args.num_classes,
                                 num_conv_filters=args.num_conv_filters,
                                 dropout_rate=args.dropout_rate,
                                 use_stride=args.stride)

    X, y = load_preprocessed_bci2000_data(num_classes=args.num_classes)

    kf = KFold(n_splits=args.k_fold, shuffle=True, random_state=42)
    for idx, [train, test] in enumerate(kf.split(X, y)):
        X_train = X[train]
        X_test = X[test]
        y_train = y[train]
        y_test = y[test]

        X_train, scaler = scale_data(X_train)
        X_test, _ = scale_data(X_test, scaler)

        model = model_factory.get_model()

        history = model.fit(x=X_train,
                            y=y_train,
                            batch_size=args.batch_size,
                            epochs=args.epochs,
                            validation_data=(X_test, y_test),
                            callbacks=model_factory.get_callbacks(
                                patience=args.patience,
                                log_dir_suffix=f"{idx + 1}"),
                            shuffle=True)

        write_history(history.history, log_dir=model_factory.get_log_dir())

        with open(f"{model_factory.get_log_dir()}/model_summary.txt",
                  'w') as file:
            model.summary(print_fn=lambda x: file.write(x + '\n'))

        # write parameters used for training
        with open(f"{model_factory.get_log_dir()}/input_args.txt",
                  'w') as file:
            file.write(json.dumps(args.__dict__, indent=4))
Beispiel #7
0
def main():
    """
    Does the following:
    - For each subject:
        - Load preprocessed data from subject (preprocessed from 'A0XT.mat')
        - Train model on ALL data from 'A0XT.mat'
        - Evaluate model on test data originating from 'A0XE.mat'
    """

    parser = argparse.ArgumentParser(
        description="Train and run model for data set 2a of BCI Competition IV."
    )

    parser.add_argument(
        "-m",
        "--model_name",
        type=str,
        help="Name of the model used",
        default="eegA",
        choices=["eegA", "eegB", "eegC", "eegD", "eegA_LSTM", "eegD_LSTM"])
    parser.add_argument("-cf", "--num_conv_filters", type=int, default=32)
    parser.add_argument(
        '--stride',
        dest='stride',
        help="Whether stride is used in the last Conv2D of first block",
        action='store_true')
    parser.add_argument('--no-stride', dest='stride', action='store_false')
    parser.set_defaults(stride=True)
    parser.add_argument("-dr", "--dropout_rate", type=float, default=0.5)
    parser.add_argument("-bs", "--batch_size", type=int, default=16)
    parser.add_argument("-e", "--epochs", type=int, default=10)
    parser.add_argument("-p",
                        "--patience",
                        help="Parameter for EarlyStopping callback",
                        type=int,
                        default=10)
    parser.add_argument("-kf", "--k_fold", type=int, default=5)
    parser.add_argument(
        "-o",
        "--output_name",
        type=str,
        help="logs will be put in ./logs/fit/output_name. If none is"
        "provided, time at run start is chosen",
        default=None)

    args = parser.parse_args()

    # input validation
    try:
        num_windows = json.load(open("./data/args_bci2aiv_preprocess.txt",
                                     'r'))['num_windows']
    except FileNotFoundError:
        raise FileNotFoundError(
            "Preprocessed data arguments not found. Run main_preprocess_data_bci2aiv.py and try again."
        )
    if num_windows == 1 and 'LSTM' in args.model_name:
        raise ValueError(
            "LSTM can only be chosen for data preprocessed with -w > 1")
    if num_windows > 1 and 'LSTM' not in args.model_name:
        raise ValueError(
            "Only LSTM models can be chosen for data preprocessed with -w > 1")

    if args.output_name is None:
        args.output_name = datetime.now().strftime('%Y%m%d-%H%M%S')

    model_factory = ModelFactory(dataset="BCI2aIV",
                                 output_name=args.output_name,
                                 model_name=args.model_name,
                                 num_conv_filters=args.num_conv_filters,
                                 use_stride=args.stride,
                                 dropout_rate=args.dropout_rate
                                 )  # num_classes is always 4 for this dataset

    for subject_num in g.subject_num_range_bci2aiv:
        X_train, y_train = load_single_subject_bci2aiv_data(
            subject_num=subject_num, is_training=True)
        X_test, y_test = load_single_subject_bci2aiv_data(
            subject_num=subject_num, is_training=False)

        X_train, scaler = scale_data(X_train)
        X_test, _ = scale_data(X_test, scaler)

        model = model_factory.get_model()

        history = model.fit(x=X_train,
                            y=y_train,
                            batch_size=args.batch_size,
                            epochs=args.epochs,
                            validation_data=(X_test, y_test),
                            callbacks=model_factory.get_callbacks(
                                patience=args.patience,
                                log_dir_suffix=f"{subject_num}"),
                            shuffle=True)

        write_history(history.history,
                      subject_num=subject_num,
                      log_dir=model_factory.get_log_dir())

        with open(f"{model_factory.get_log_dir()}/model_summary.txt",
                  'w') as file:
            model.summary(print_fn=lambda x: file.write(x + '\n'))

        # write parameters used for training
        with open(f"{model_factory.get_log_dir()}/input_args.txt",
                  'w') as file:
            file.write(json.dumps(args.__dict__, indent=4))
 def initialize(self):
     model_factory = ModelFactory()
     self.model = model_factory.get_model()
 def execute_model_data(self, name, x_train, y_train, x_test):
     model = ModelFactory.get_model(name)
     model.fit_model(x_train, y_train)
     y_pred = model.predict(x_test)
     return y_pred