Ejemplo n.º 1
0
def test_if_ModelTrainer_method___train____is_running_properly():
    input_data = prepped_data = os.path.join(os.path.dirname(__file__),
                                             "data/model_trainer/train")
    model_candidate_folder = os.path.join(prepped_data, "models")
    if os.path.isdir(model_candidate_folder):
        clean_dir(model_candidate_folder)

    run = AzureMLRunMoq(None)
    trainer = ModelTrainer(run)
    trainer.train(input_data, prepped_data, model_candidate_folder)
    classifier_file = os.path.join(model_candidate_folder, "classifier.hdf5")
    assert os.path.isfile(classifier_file) == True
    generator_file = os.path.join(model_candidate_folder, "generator.hdf5")
    assert os.path.isfile(generator_file) == True
Ejemplo n.º 2
0
    # If provided, load test data
    if not options.test_file is None:
        datamanager_test_file = ConllLoader(input_file=options.test_file,
                                            oracle=False)
        datamanager_test_file.load_file()
    else:
        datamanager_test_file = None

    model = BiLSTMParser(name=options.model_name,
                         vocab=vocab,
                         pos_tags=pos_tags,
                         word_dim=options.word_dim,
                         pos_dim=options.pos_dim,
                         num_layers_lstm=options.num_layers_lstm,
                         hidden_units_lstm=options.hidden_units_lstm,
                         hidden_units_mlp=options.hidden_units_mlp,
                         arc_labels=arc_labels,
                         features=options.features)

    trainer = ModelTrainer(model=model,
                           datamanager_train_file=datamanager_train_file,
                           datamanager_test_file=datamanager_test_file,
                           epochs=options.num_epochs,
                           criterion=options.criterion,
                           optimizer=options.optimizer,
                           run=options.run,
                           l2_penalty=options.l2_penalty)

    trainer.train(test_each_epoch=True)
Ejemplo n.º 3
0
    def processer(self):
        if self.impute:
            self.train_dataframe, self.test_dataframe = self.data_preprocess_instance.data_imputer(
                train_dataframe=self.train_dataframe,
                test_dataframe=self.test_dataframe)

        if self.shuffle:
            print(f'Shuffling train and test dataframe')
            self.train_dataframe, self.test_dataframe = self.data_preprocess_instance.shuffle_data(
                train_dataframe=self.train_dataframe,
                test_dataframe=self.test_dataframe)

        if self.cross_validation:
            print(
                f'cross validating the dataset using {self.problem_type} method'
            )
            cross_instance = CrossValidation(
                df=self.train_dataframe,
                target_cols=self.target_column,
                multilabel_delimiter=self.multilabel_delimiter,
                problem_type=self.problem_type,
                num_folds=self.num_folds,
                random_state=self.random_state)
            self.train_dataframe = cross_instance.split()

        if self.encoding:
            if self.data_type == 'numerical':
                print(
                    f'Performing categorical encoding using {self.encoding_type}'
                )
                self.train_dataframe, self.test_dataframe = self.data_preprocess_instance.numerical_encoder(
                    train_dataframe=self.train_dataframe,
                    test_dataframe=self.test_dataframe)
            elif self.data_type == 'categorical':
                print(
                    f'Performing categorical encoding using {self.encoding_type}'
                )
                self.train_dataframe, self.test_dataframe = self.data_preprocess_instance.categorical_encoder(
                    train_dataframe=self.train_dataframe,
                    test_dataframe=self.test_dataframe)
            else:
                raise Exception(f"{self.data_type} not available")

        if self.train_model:
            for fold in range(5):
                print(f"selecting fold {fold}")
                main_train = self.train_dataframe[
                    self.train_dataframe.kfold.isin(
                        self.FOLD_MAPPING.get(fold))]
                main_validate = self.train_dataframe[self.train_dataframe.kfold
                                                     == fold]

                ########### splitting the train data frame into x_train, x_test, y_train, X_test ##############
                self.y_train = main_train[self.target_column].values
                self.y_validate = main_validate[self.target_column].values
                self.X_train = main_train.drop(["id", "target", "kfold"],
                                               axis=1)
                self.X_validate = main_validate.drop(["id", "target", "kfold"],
                                                     axis=1)
                if self.feature_scaling:
                    print(f'feature scaling the dataset of fold {fold}')
                    self.X_train, self.X_validate = self.data_preprocess_instance.feature_scalar(
                        train_dataframe=self.X_train,
                        test_dataframe=self.X_validate)
                    if self.feature_extractor:
                        print(
                            f"extracting features from the dataset of fold {fold} using {self.feature_extractor_type}"
                        )
                        feat_ext = FeatureExtractor(
                            X_train=self.X_train,
                            X_validate=self.X_validate,
                            feature_extractor_type=self.feature_extractor_type,
                            n_components=self.n_components,
                            y_train=self.y_train)
                        self.X_train, self.X_validate, self.n_components = feat_ext.extact(
                        )

                train_instance = ModelTrainer(X_train=self.X_train,
                                              X_validate=self.X_validate,
                                              y_train=self.y_train,
                                              y_validate=self.y_validate,
                                              model_name=self.model_name)
                train_instance.train()