def load_checkpoint(self, name, with_trainer=True, model_use_gpu=True):
        """Load model checkpoint and setup the Model and ModelTrainer

        Args:
            name (str): Checkpoint name
            with_trainer (bool, optional): Whether to load the ModelTrainer. Defaults to True.
            model_use_gpu (bool, optional): Setup the Model with GPU enabled. Defaults to True.

        Returns:
            (Model, ModelTrainer): Returns model and trainer
        """
        checkpoint = torch.load(self.get_checkpoint_path(name))

        # recover model state
        model = Model(checkpoint['class_to_idx'],
                      arch=checkpoint['arch'],
                      out_features=checkpoint['out_features'],
                      hidden_units_1=checkpoint['hidden_units_1'],
                      hidden_units_2=checkpoint['hidden_units_2'],
                      dropout_1=checkpoint['dropout_1'],
                      dropout_2=checkpoint['dropout_2'],
                      use_gpu=model_use_gpu)

        model.model.load_state_dict(checkpoint['model_state'])
        model.model.class_to_idx = checkpoint['class_to_idx']

        # recover trainer state
        if with_trainer:
            trainer = ModelTrainer(model, checkpoint['learning_rate'])
            trainer.optimizer.load_state_dict(checkpoint['optimizer_state'])
            trainer.trained_epochs = checkpoint['trained_epochs']
            return (model, trainer)
        else:
            return model
 def perform_SVR(self):
     print(
         'SVRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR'
     )
     model_trainer = ModelTrainer()
     svr = SVR(gamma='poly', C=1e3, epsilon=0.2)
     Y_test, Y_pred, y_true_glucose, y_pred_glucose = model_trainer.train_model(
         svr, self.X_train, self.X_test, self.Y_train, self.Y_test)
Beispiel #3
0
class CreateClassifier(object):
    def __init__(self):
        self.pre = Preprocess()
        self.nlp = NLPHelper()
        self.fex = FeatureExtractor()
        self.ut = Utility()
        self.mt = ModelTrainer()

    def createClassifier(self):
        # get golden data
        # data = self.nlp.getGoldenDataset()
        # extract entity and save into pickle
        # self.nlp.extractNews(data)  #CHANGE MODULE WHEN SWITCHING BETWEEN ADDITIONAL IDN AND DEFAULT
        # self.nlp.core_nlp.close()

        # # find feature in one text and save it to excel
        # # scenario 1
        # # path = "scenario1_halfidn_pickle/"
        # # scenario 2
        # # path = "scenario2_fullidn_pickle/"
        # # scenario 3
        # path = "scenario3_stanford_pickle/"
        # path = "test/"
        # filelist = os.listdir(path)
        # data = pd.DataFrame()

        # for idx, file in enumerate(filelist):

        #     #buka file pickle yang isinya data ner, coref, dan pos dari suatu teks berita
        #     pkl_dict = self.ut.loadPickle(os.path.join(path, file))
        #     # ekstraksi fitur dari file pickle
        #     temp = self.fex.extractFeaturesFromPickle(pkl_dict)
        #     data = data.append(temp)

        # #scenario 1
        # self.ut.convertToExcel("scenario1_idnnerhalf_extracted_feature.xlsx",data,'Sheet1')
        # #scenario 2
        # self.ut.convertToExcel("scenario2_idnnerfull_extracted_feature.xlsx",data,'Sheet1')
        # #scenario 3
        # self.ut.convertToExcel("scenario3_stanford_extracted_feature.xlsx",data,'Sheet1')
        # #scenario testing
        # self.ut.convertToExcel("testing_rf.xlsx",data,'Sheet1')

        # for training use
        # reading excel that contain features (HARUS DIKASIH KOLOM WHO DAN WHERE DULU, DAN DITENTUKAN YANG MANA WHO DAN WHERE)
        # scenario 1
        # df = pd.read_excel('scenario1_idnnerhalf_extracted_feature.xlsx', sheet_name='Sheet1')
        # scenario 2
        df = pd.read_excel('scenario2_idnnerfull_extracted_feature.xlsx',
                           sheet_name='Sheet1')
        # # scenario 3
        # df = pd.read_excel('scenario3_stanford_extracted_feature.xlsx', sheet_name='Sheet1')

        # # training model for detecting who and where, input "where" or "who" meaning that column will be dropped (deleted)
        who = self.mt.train(df, 'where')
        where = self.mt.train(df, 'who')
        self.nlp.core_nlp.close()
    def perform_ridge_regression(self):
        print(
            '*********************************************RIDGE REGRESSION**************************************************'
        )
        model_trainer = ModelTrainer()
        ridge = Ridge(alpha=1.0)
        Y_test, Y_pred, y_true_glucose, y_pred_glucose = model_trainer.train_model(
            ridge, self.X_train, self.X_test, self.Y_train, self.Y_test)
        evl = MetricsCalculator()
        evl.evaluate('root mean square error for ridge regression',
                     y_true_glucose, y_pred_glucose)

        viz = Visualizer()
        viz.visualize('ridge regression', y_true_glucose, y_pred_glucose)
Beispiel #5
0
def model_trainer_from_checkpoint(checkpoint_filename,
                                  video_retriever_generator, selector,
                                  extractor):
    """This function restores a model from a tf checkpoint and creates a ModelTrainer"""
    builder, model_saver, params = restore_model(checkpoint_filename,
                                                 video_retriever_generator,
                                                 selector, extractor)

    model_trainer = ModelTrainer(model_saver, builder, params["epoch"],
                                 float(params["best_loss"]))

    model_trainer.load_last_checkpoint()

    return model_trainer
    def perform_lasso_regression(self):

        print(
            '................................... LASSO REGRESSION ............................................'
        )
        model_trainer = ModelTrainer()
        lasso = Lasso()
        Y_test, Y_pred, y_true_glucose, y_pred_glucose = model_trainer.train_model(
            lasso, self.X_train, self.X_test, self.Y_train, self.Y_test)
        evl = MetricsCalculator()
        evl.evaluate('root mean square error for lasso regression',
                     y_true_glucose, y_pred_glucose)

        viz = Visualizer()
        viz.visualize('lasso regression', y_true_glucose, y_pred_glucose)
Beispiel #7
0
    def perform_linear_regression(self):

        print(
            '------------------------------------------LINEAR REGRESSION------------------------------------------'
        )
        model_trainer = ModelTrainer()
        linear_reg = LinearRegression()
        Y_test, Y_pred, y_true_glucose, y_pred_glucose = model_trainer.train_model(
            linear_reg, self.X_train, self.X_test, self.Y_train, self.Y_test)
        evl = MetricsCalculator()
        evl.evaluate('root mean square error for linear regression',
                     y_true_glucose, y_pred_glucose)

        viz = Visualizer()
        viz.visualize('linear regression', y_true_glucose, y_pred_glucose)
Beispiel #8
0
    def __init__(self, experiment_name: str, search_config: AgingEvoConfig,
                 training_config: TrainingConfig, bound_config: BoundConfig):
        self.log = logging.getLogger(
            name=f"AgingEvoSearch [{experiment_name}]")
        self.config = search_config
        self.trainer = ModelTrainer(training_config)

        self.root_dir = Path(search_config.checkpoint_dir)
        self.root_dir.mkdir(parents=True, exist_ok=True)
        self.experiment_name = experiment_name

        if training_config.pruning and not training_config.pruning.structured:
            self.log.warning(
                "For unstructured pruning, we can only meaningfully use the model "
                "size resource metric.")
            bound_config.peak_mem_bound = None
            bound_config.mac_bound = None
        self.pruning = training_config.pruning

        # We establish an order of objective in the feature vector, all functions must ensure the order is the same
        self.constraint_bounds = [
            bound_config.error_bound, bound_config.peak_mem_bound,
            bound_config.model_size_bound, bound_config.mac_bound
        ]

        self.history: List[EvaluatedPoint] = []
        self.population: List[EvaluatedPoint] = []

        self.population_size = search_config.population_size
        self.initial_population_size = search_config.initial_population_size or self.population_size
        self.rounds = search_config.rounds
        self.sample_size = search_config.sample_size
        num_gpus = len(tf.config.experimental.list_physical_devices("GPU"))
        self.max_parallel_evaluations = search_config.max_parallel_evaluations or num_gpus
Beispiel #9
0
def train_and_eval(embedding, layers, batch_size, layers_type):
    # Device
    device = get_device()

    # Training parameters
    epochs = 5

    # Train and dev data
    train_file = './data/snli_1.0_train.jsonl'
    train_data = Data(train_file, embedding)
    dev_file = './data/snli_1.0_dev.jsonl'
    dev_data = Data(dev_file, embedding)
    test_file = './data/snli_1.0_test.jsonl'
    test_data = Data(test_file, embedding)

    # Create the model
    model = ResidualLSTMEncoder(embedding_vectors=embedding.vectors,
                                padding_index=train_data.padding_index,
                                layers_def=layers,
                                output_size=len(train_data.c2i),
                                max_sentence_length=Data.MAX_SENTENCE_SIZE,
                                hidden_mlp=800,
                                device=device,
                                layers_type=layers_type)

    num_of_params = sum(p.numel() for p in model.parameters())

    print("Number of model parameters: %d" % num_of_params)
    model = model.to(device)

    # Create optimizer
    optimizer = optim.Adam(model.parameters(), lr=2e-4)
    # optimizer = optim.Adagrad(model.parameters())

    # Create a model trainer object
    model_trainer = ModelTrainer(net=model,
                                 device=device,
                                 optimizer=optimizer)

    # Train the model
    model_trainer.train(train_data, dev_data,
                        train_log_file='train_1.txt', dev_log_file='dev_1.txt',
                        epochs=epochs, batch_size=batch_size)

    # Save the model
    model_trainer.save_model('./models/model_1')

    # Test the model
    test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size,
                                              shuffle=False, num_workers=0)

    test_performencer = Performencer(name='Test',
                                     output_size=model.output_size)
    model_trainer.eval(test_loader, test_performencer)
    test_performencer.pinpoint()
    test_performencer.log_to_file('test_1.txt')
def process_image(frame):

    image = np.empty_like(frame)
    np.copyto(image, frame)
    model = ModelTrainer.get_trained_model()
    img = CarDetector.process(model, image)

    GV.current_frame += 1

    return img
Beispiel #11
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--data',
                        type=str,
                        help='Data file path',
                        required=True)
    parser.add_argument('--output',
                        type=str,
                        help='Output file path',
                        required=True)
    parser.add_argument('--output_model',
                        type=str,
                        help='Model path',
                        default=None)
    parser.add_argument('--level', type=int, default=0)
    parser.add_argument('--fold', type=int, default=2)
    parser.add_argument('--iter', type=int, default=1)
    parser.add_argument('--batch_size', type=int, default=16)
    parser.add_argument('--epoch', type=int, default=30)
    parser.add_argument('--random_state', type=int, default=None)
    args = parser.parse_args()

    dataset = BrainDataset(args.data, expand_dim=True, level=args.level)
    model = CNN1D(len(np.unique(dataset.label))).to(DEVICE)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    epochs = args.epoch
    batch_size = args.batch_size
    trainer = ModelTrainer(model, dataset, DEVICE)
    result = trainer.train(optimizer,
                           criterion,
                           batch_size=batch_size,
                           epochs=epochs,
                           kfold=args.fold,
                           iteration=args.iter,
                           random_state=args.random_state)

    result = np.array(result)
    np.savetxt(args.output, result, delimiter=",")
    if args.output_model is not None:
        torch.save(model.state_dict(), args.output_model)
    def perform_PLS(self):
        print(
            ',,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, PARTIAL LEAST SQUARE ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,'
        )
        model_trainer = ModelTrainer()

        pls = PLSRegression(n_components=20,
                            scale=True,
                            max_iter=5000,
                            tol=1e-06,
                            copy=True)
        Y_test, Y_pred, y_true_glucose, y_pred_glucose = model_trainer.train_model(
            pls, self.X_train, self.X_test, self.Y_train, self.Y_test)

        evl = MetricsCalculator()
        evl.evaluate('root mean square error for partial least square',
                     y_true_glucose, y_pred_glucose)

        viz = Visualizer()
        viz.visualize('pls', y_true_glucose, y_pred_glucose)
Beispiel #13
0
    def perform_NN(self):
        print(
            '/////////////////////////////////////////////////// NEURAL NETWORK ///////////////////////////////////'
        )
        model_trainer = ModelTrainer()
        nn = MLPRegressor(hidden_layer_sizes=(200, ),
                          activation='relu',
                          solver='adam',
                          alpha=0.1,
                          batch_size='auto',
                          learning_rate='constant',
                          learning_rate_init=0.001,
                          power_t=0.5,
                          max_iter=3000,
                          shuffle=True,
                          random_state=None,
                          tol=0.0001,
                          verbose=False,
                          warm_start=False,
                          momentum=0.9,
                          nesterovs_momentum=True,
                          early_stopping=False,
                          validation_fraction=0.1,
                          beta_1=0.9,
                          beta_2=0.999,
                          epsilon=1e-08,
                          n_iter_no_change=10)
        Y_test, Y_pred, y_true_glucose, y_pred_glucose = model_trainer.train_model(
            nn, self.X_train, self.X_test, self.Y_train, self.Y_test)

        evl = MetricsCalculator()
        evl.evaluate('root mean square error for Neural network',
                     y_true_glucose, y_pred_glucose)

        viz = Visualizer()
        viz.visualize('neural  network', y_true_glucose, y_pred_glucose)
Beispiel #14
0
    def __init__(self,
                 experiment_name: str,
                 search_config: BayesOptConfig,
                 training_config: TrainingConfig,
                 bound_config: BoundConfig):
        assert search_config.starting_points >= 1

        self.log = logging.getLogger(name=f"BayesOpt [{experiment_name}]")
        self.config = search_config
        self.trainer = ModelTrainer(training_config)

        self.root_dir = Path(search_config.checkpoint_dir)
        self.root_dir.mkdir(parents=True, exist_ok=True)
        self.experiment_name = experiment_name

        if training_config.pruning and not training_config.pruning.structured:
            self.log.warning("For unstructured pruning, we can only use the model size resource metric.")
            bound_config.peak_mem_bound = None
            bound_config.mac_bound = None

        # We establish an order of objective in the feature vector, all functions must ensure the order is the same
        self.constraint_bounds = [bound_config.error_bound, bound_config.peak_mem_bound,
                                  bound_config.model_size_bound, bound_config.mac_bound]
    elif opt.model_name == 2:
        # MLP
        train_config['using_spectrogram'] = True
        train_config['criterion'] = 'MSE'

    elif opt.model_name == 3:
        # simple generator
        train_config['using_simple_g'] = True
        train_config['criterion'] = 'MSE'

    elif opt.model_name == 4:
        # 1D auto encoder
        train_config['criterion'] = 'MSE'

    elif opt.model_name == 5:
        # 2D auto encoder
        train_config['criterion'] = 'MSE'
        train_config['using_spectrogram'] = True

    elif opt.model_name == 6:
        # simple auto encoder
        train_config['criterion'] = 'MSE'

    elif opt.model_name == 7:
        # adversarial MLP
        train_config['using_spectrogram'] = True

    trainer = ModelTrainer(**train_config)
    trainer.train()
Beispiel #16
0
def main():
    """Run Training Session"""

    # Measures total program runtime by collecting start time
    start_time = time.time()

    # get input args
    in_arg = get_input_args()

    # Function that checks command line arguments using in_arg
    print_command_line_arguments(in_arg)
    print()

    # load datasets
    dataloader = ModelDataLoader(in_arg.data_dir)
    train_dataset, train_dataloader = dataloader.get_train_data()
    valid_dataset, valid_dataloader = dataloader.get_validation_data()
    test_dataset, test_dataloader = dataloader.get_test_data()

    # Use model loader to load existing checkpoint
    loader = ModelLoader(in_arg.checkpoint_dir)

    if loader.checkpoint_exists(in_arg.checkpoint):
        # load checkpoint
        print("Loading checkpoint %s" %
              (loader.get_checkpoint_path(in_arg.checkpoint)))
        model, trainer = loader.load_checkpoint(
            in_arg.checkpoint, model_use_gpu=in_arg.gpu)
        print("Epochs trained so far: %d" % (trainer.trained_epochs))
    else:
        # no checkpoint, create fresh model using input arguments
        print("Checkpoint '%s' does not exist" %
              (loader.get_checkpoint_path(in_arg.checkpoint)))
        model = Model(train_dataset.class_to_idx,
                      arch=in_arg.arch, use_gpu=in_arg.gpu,
                      hidden_units_1=in_arg.hidden_units_1, hidden_units_2=in_arg.hidden_units_2,
                      dropout_1=in_arg.dropout_1, dropout_2=in_arg.dropout_2)
        trainer = ModelTrainer(model, learning_rate=in_arg.learning_rate)

    print()
    print("Model training in session...")
    print()

    epochs = in_arg.epochs

    # train model and print results
    for result in trainer.train_epochs(epochs, train_dataloader, valid_dataloader):
        print(
            "Epoch: %3d/%3d" % (result['epoch']+1, epochs),
            " | Train Loss: %10.5f" % (result['train_loss']),
            " | Validation Loss: %10.5f" % (result['validation_loss']),
            " | Validation Acc: %6.3f%%" % (
                result['validation_accuracy'] * 100),
            " | Duration: %10.3fs" % (result['duration'])
        )

    print()
    print("Testing model against test data...")
    print()

    # test model against test data
    test_result = trainer.test(test_dataloader)

    print(
        "Test Loss: %10.5f" % (test_result['test_loss']),
        " | Test Acc: %6.3f%%" % (test_result['test_accuracy'] * 100),
        " | Duration: %10.3fs" % (test_result['duration'])
    )

    # save checkpoint
    loader.save_checkpoint(in_arg.checkpoint, model, trainer)

    print()
    print("Total Train Duration: %.3fs" % (time.time() - start_time))
def main():
    logging.warning("dummy warning!!!")
    logging.error("dummy error!!!")
    logging.info("dummy info!!!")
    logging.debug("dummy debug!!!")

    logging.warning(f"Inside {__file__}")

    parser = argparse.ArgumentParser()
    parser.add_argument("--subscription_id",
                        type=str,
                        dest="subscription_id",
                        help="The Azure subscription ID")
    parser.add_argument("--resource_group",
                        type=str,
                        dest="resource_group",
                        help="The resource group name")
    parser.add_argument("--workspace_name",
                        type=str,
                        dest="workspace_name",
                        help="The workspace name")
    parser.add_argument("--experiments_config_filepath",
                        type=str,
                        dest="experiments_config_filepath",
                        help="A path to the JSON config file")  # noqa: E501
    parser.add_argument("--model_name",
                        type=str,
                        dest="model_name",
                        help="Name of the Model")
    parser.add_argument("--should_register_model",
                        type=str2bool,
                        dest="should_register_model",
                        default=False,
                        help="Register trained model")  # noqa: E501
    args = parser.parse_args()

    logging.warning(f"Argument 1: {args.subscription_id}")
    logging.warning(f"Argument 2: {args.resource_group}")
    logging.warning(f"Argument 3: {args.workspace_name}")
    logging.warning(f"Argument 4: {args.experiments_config_filepath}")
    logging.warning(f"Argument 5: {args.model_name}")
    logging.warning(f"Argument 6: {args.should_register_model}")

    # Get current service context
    run = Run.get_context()
    workspace = run.experiment.workspace

    # Load training configuration
    experiment_configuration = ExperimentConfigurationWrapper()
    experiment_configuration.load(args.experiments_config_filepath)
    training_config = experiment_configuration.json["feature_extractor"][
        "training"]

    # initialize empty collections for data
    # train_set = []
    # test_set = []
    # dev_set = []

    download_root_dir = os.path.join('/mnt', 'tmp', 'datasets')
    data_splitter = HDF5TrainTestSplitter()
    for data_config in training_config["data"]:
        cropped_cells_dataset_name = data_config['input'][
            'cropped_cells_dataset_name']
        cropped_cells_dataset_version = data_config['input'][
            'cropped_cells_dataset_version']
        cropped_cells_dataset = Dataset.get_by_name(
            workspace=workspace,
            name=cropped_cells_dataset_name,
            version=cropped_cells_dataset_version)

        msg = (
            f"Dataset '{cropped_cells_dataset_name}', id: {cropped_cells_dataset.id}"
            f", version: {cropped_cells_dataset.version} will be used to prepare data for a feature extractor training."
        )
        logging.warning(msg)

        # Create a folder where datasets will be downloaded to
        dataset_target_path = os.path.join(download_root_dir,
                                           cropped_cells_dataset_name)
        os.makedirs(dataset_target_path, exist_ok=True)

        # Download 'cropped cells' dataset (consisting of HDF5 and CSV files)
        dataset_target_path = download_registered_file_dataset(
            workspace, cropped_cells_dataset, download_root_dir)
        list_all_files_in_location(dataset_target_path)

        # Split data (indices) into subsets
        df_metadata = pd.read_csv(
            os.path.join(dataset_target_path, 'cropped_nuclei.csv'))
        logging.warning(f"Metadata dataframe (shape): {df_metadata.shape}")

        logging.warning("Splitting data into subsets...")
        data_splitter.add_dataset(
            name=data_config['input']['cropped_cells_dataset_name'],
            fname=os.path.join(dataset_target_path,
                               'cropped_nuclei_images.h5'),
            metadata=df_metadata)

    data_splitter.train_dev_test_split()

    # --------
    # Training
    # --------

    # Init dataloaders
    #train_dataset = CellDataset(cell_list=train_set, target_cell_shape=INPUT_IMAGE_SIZE)
    train_dataset = CellDataset(splitter=data_splitter, dset_type='train')
    train_data_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        num_workers=DATA_LOADER_WORKERS,
    )
    #dev_dataset = CellDataset(cell_list=dev_set, target_cell_shape=INPUT_IMAGE_SIZE)
    dev_dataset = CellDataset(splitter=data_splitter, dset_type='dev')
    dev_data_loader = torch.utils.data.DataLoader(
        dev_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=DATA_LOADER_WORKERS,
    )
    #test_dataset = CellDataset(cell_list=test_set, target_cell_shape=INPUT_IMAGE_SIZE)
    test_dataset = CellDataset(splitter=data_splitter, dset_type='test')
    test_data_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        num_workers=DATA_LOADER_WORKERS,
    )

    # Define and Train model
    device = torch.device(DEVICE)
    model = AUTOENCODER(
        latent_dim_size=LATENT_DIM_SIZE,
        input_image_size=INPUT_IMAGE_SIZE,
        device=device,
    )

    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
    print(f"Using {torch.cuda.device_count()} GPUs for training")
    # model = torch.nn.DataParallel(model, device_ids=[0, 1, 2, 3])
    trainer = ModelTrainer(model, device)
    tr_losses, dev_losses = trainer.train(
        epochs=EPOCHS,
        optimizer=optimizer,
        train_data_loader=train_data_loader,
        dev_data_loader=dev_data_loader,
    )
    test_loss = trainer.test_model(test_data_loader)
    run.log("dev_loss", np.max(dev_losses))
    run.log("train_loss", np.max(tr_losses))
    run.log("test_loss", test_loss)
    # Plot training metrics and model sample reconstructions
    trainer.get_training_plot(tr_losses=tr_losses, dev_losses=dev_losses)
    run.log_image("model training metrics", plot=plt)

    dataiter = iter(test_data_loader)
    images = dataiter.next()
    trainer.get_pred_samples(images, figsize=(40, 40))
    run.log_image("sample reconstructions", plot=plt)

    # Training completed!  Let's save the model and upload it to AML
    os.makedirs("./models", exist_ok=True)
    model_file_name = "model.ext"
    model_output_loc = os.path.join(".", "models", model_file_name)
    torch.save(model, model_output_loc)

    run.upload_files(names=[model_output_loc], paths=[model_output_loc])

    # Register model (ideally, this should be a separate step)
    if args.should_register_model:
        logging.warning("List of the associated stored files:")
        logging.warning(run.get_file_names())

        logging.warning("Registering a new model...")
        # TODO: prepare a list of metrics that were logged using run.log()
        metric_names = []

        if os.path.exists(model_output_loc):
            register_model(
                run=run,
                model_name=args.model_name,
                model_description="Feature extraction model",
                model_path=model_output_loc,
                training_context="PythonScript",
                metric_names=metric_names,
            )
        else:
            logging.warning(
                f"Cannot register model as path {model_output_loc} does not exist."
            )
    else:
        logging.warning("A trained model will not be registered.")

    logging.warning("Done!")
    logging.info("Done Info Style!")
Beispiel #18
0
def main(args):

    total_step = 100//args.EF

    # set random seed
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    random.seed(args.seed)

    # prepare checkpoints and log folders
    if not os.path.exists(args.checkpoints_dir):
        os.makedirs(args.checkpoints_dir)
    if not os.path.exists(args.logs_dir):
        os.makedirs(args.logs_dir)

    # initialize dataset
    if args.dataset == 'visda':
        args.data_dir = os.path.join(args.data_dir, 'visda')
        data = Visda_Dataset(root=args.data_dir, partition='train', label_flag=None)

    elif args.dataset == 'office':
        args.data_dir = os.path.join(args.data_dir, 'Office')
        data = Office_Dataset(root=args.data_dir, partition='train', label_flag=None, source=args.source_name,
                              target=args.target_name)

    elif args.dataset == 'home':
        args.data_dir = os.path.join(args.data_dir, 'OfficeHome')
        data = Home_Dataset(root=args.data_dir, partition='train', label_flag=None, source=args.source_name,
                              target=args.target_name)
    elif args.dataset == 'visda18':
        args.data_dir = os.path.join(args.data_dir, 'visda18')
        data = Visda18_Dataset(root=args.data_dir, partition='train', label_flag=None)
    else:
        print('Unknown dataset!')

    args.class_name = data.class_name
    args.num_class = data.num_class
    args.alpha = data.alpha
    # setting experiment name
    label_flag = None
    selected_idx = None
    args.experiment = set_exp_name(args)
    logger = Logger(args)

    if not args.visualization:

        for step in range(total_step):

            print("This is {}-th step with EF={}%".format(step, args.EF))

            trainer = ModelTrainer(args=args, data=data, step=step, label_flag=label_flag, v=selected_idx, logger=logger)

            # train the model
            args.log_epoch = 4 + step//2
            trainer.train(step, epochs= 4 + (step) * 2, step_size=args.log_epoch)

            # pseudo_label
            pred_y, pred_score, pred_acc = trainer.estimate_label()

            # select data from target to source
            selected_idx = trainer.select_top_data(pred_score)

            # add new data
            label_flag, data = trainer.generate_new_train_data(selected_idx, pred_y, pred_acc)
    else:
        # load trained weights
        trainer = ModelTrainer(args=args, data=data)
        trainer.load_model_weight(args.checkpoint_path)
        vgg_feat, node_feat, target_labels, split = trainer.extract_feature()
        visualize_TSNE(node_feat, target_labels, args.num_class, args, split)

        plt.savefig('./node_tsne.png', dpi=300)
Beispiel #19
0
    parser = argparse.ArgumentParser()
    parser.add_argument("--config_path",
                        default='../config.ini',
                        required=False)

    args = parser.parse_args()

    cfg = OCTConfig(args.config_path)
    oct_logger = OCTLogger(cfg, RUN_TIMESTAMP)
    oct_logger.print_cfg()

    generator_resolver = GeneratorResolver(cfg)
    training_data_iterator, test_data_iterator, val_data_iterator = generator_resolver.resolve_data_iterators(
    )

    model_resolver = ModelResolver(cfg)
    model = model_resolver.resolve_model()

    augmented_image_data_generator = generator_resolver.provide_augmented_image_data_generator(
    )
    augmentation_processor = AugmentationProcessor(
        cfg, augmented_image_data_generator)
    augmentation_processor.perform_data_augmentation()

    model_trainer = ModelTrainer(cfg, model, training_data_iterator,
                                 val_data_iterator, RUN_TIMESTAMP)
    model_trainer.train_model()

    model_evaluator = ModelEvaluator(cfg, model, test_data_iterator)
    model_evaluator.evaluate_model()
Beispiel #20
0
from model_trainer import ModelTrainer
from logger import Logger

if __name__ == '__main__':

    logger = Logger(show=True,
                    html_output=True,
                    config_file="config.txt",
                    data_folder="drive")

    trainer = ModelTrainer(logger)
    trainer.run_training(num_epochs=50, save_after_epochs=10)
Beispiel #21
0
def main(args):
    # Modified here
    total_step = 100 // args.EF

    # set random seed
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    random.seed(args.seed)

    # prepare checkpoints and log folders
    if not os.path.exists(args.checkpoints_dir):
        os.makedirs(args.checkpoints_dir)
    if not os.path.exists(args.logs_dir):
        os.makedirs(args.logs_dir)

    # initialize dataset
    if args.dataset == 'nusimg':
        args.data_dir = os.path.join(args.data_dir, 'visda')
        data = NUSIMG_Dataset(root=args.data_dir,
                              partition='train',
                              label_flag=None,
                              source=args.source_path,
                              target=args.target_path)

    elif args.dataset == 'office':
        args.data_dir = os.path.join(args.data_dir, 'Office')
        data = Office_Dataset(root=args.data_dir,
                              partition='train',
                              label_flag=None,
                              source=args.source_path,
                              target=args.target_path)
    elif args.dataset == 'mrc':
        data = MRC_Dataset(root=args.data_dir,
                           partition='train',
                           label_flag=None,
                           source=args.source_path,
                           target=args.target_path)
    else:
        print('Unknown dataset!')

    args.class_name = data.class_name
    args.num_class = data.num_class
    args.alpha = data.alpha
    # setting experiment name
    label_flag = None
    selected_idx = None
    args.experiment = set_exp_name(args)

    logger = Logger(args)
    trainer = ModelTrainer(args=args,
                           data=data,
                           label_flag=label_flag,
                           v=selected_idx,
                           logger=logger)
    for step in range(total_step):

        print("This is {}-th step with EF={}%".format(step, args.EF))
        # train the model
        args.log_epoch = 5
        trainer.train(epochs=24, step=step)  #24
        # psedo_label
        pred_y, pred_score, pred_acc = trainer.estimate_label()

        # select data from target to source
        selected_idx = trainer.select_top_data(pred_score)

        # add new data
        trainer.generate_new_train_data(selected_idx, pred_y, pred_acc)
 def train_small_test_version(self, hyperparams_dict):
     """perform training on small test data"""
     trainer = ModelTrainer(self.dataloaders, hyperparams_dict,
                            self.wv_wrapper, self.path)
     model, losses, accuracies = trainer.train(epochs=3)
     return model, losses, accuracies
def main():
    config = get_config_args()
    RWAModel = RecurrentWeightedAverage(config);
    Trainer = ModelTrainer(config, RWAModel)
    Trainer.do_training();
Beispiel #24
0
            'max_depth': [30, 80],
            'max_features': [2, 15, 'auto', None],
            'min_samples_leaf': [3, 5],
            'min_samples_split': [2, 5, 8],
            'n_estimators': [500, 800],
            'n_jobs': [10]
        }
    }
    # rf = {
    #     'estimator': RandomForestClassifier(),
    #     'parameters': {
    #         'bootstrap': [True],
    #         'max_depth': [30, 50],
    #         'max_features': [None],
    #         'min_samples_leaf': [3, 5],
    #         'min_samples_split': [2, 5, 8],
    #         'n_estimators': [500, 600]
    #     }
    # }
    dt = {'estimator': DecisionTreeClassifier(), 'parameters': {}}

    model_trainer = ModelTrainer(german_config.testset_ratio,
                                 german_config.logger)
    model_trainer.add_estimators([rf])
    german_classifier = WordSenseAlignmentClassifier(german_config,
                                                     feature_extractor,
                                                     model_trainer)
    german_classifier.load_data() \
        .extract_features(['similarities', 'len_diff', 'pos_diff']) \
        .train(with_testset=True)
Beispiel #25
0
def main():
    config = get_config_args()
    GRUModel = SequencePredictor(config)
    Trainer = ModelTrainer(config, GRUModel)
    Trainer.do_training()
Beispiel #26
0
  # Build Model Graph from Config  
  model_config = configparser.ConfigParser()
  model_config.read(args.model)
  model = ModelBuilder(model_config)
  model.build_graph()
  model.compile()
  model.summary_txt()
  model.print_png()
  model.save_graph()

  # Train Model
  if args.train:
    train_config = configparser.ConfigParser()
    train_config.read(args.train)
    trainer = ModelTrainer(train_config)
    trainer.get_hyperparameters()
    trainer.get_train_set()
    trainer.get_dev_set()
    trainer.get_callbacks()
    train_history = model.train(trainer.fit_options)
    model.save_weights(trainer.outputs_config['weights'])
    trainer.write_outputs(train_history)

  # Test Model
  if args.test:
    test_config = configparser.ConfigParser()
    test_config.read(args.test)    
    tester = ModelTester(test_config)
    tester.get_weights()
    model.load_weights(tester.weights)
Beispiel #27
0
 def __init__(self):
     self.ut = Utility()
     self.fwe = FiveWExtractor()
     self.fex = FeatureExtractor()
     self.nlp = NLPHelper()
     self.tr = ModelTrainer()
def main():
    config = get_config_args()
    GRUModel = SequencePredictor(config);
    Trainer = ModelTrainer(config, GRUModel)
    Trainer.do_training();
Beispiel #29
0
        .similarity_diff_to_target()\
        .max_dependency_tree_depth() \
        .target_word_synset_count()\
        .token_count_norm_diff()\
        .semicol_count()\
        .elmo_similarity()

    rf = {
        'estimator': RandomForestClassifier(),
        'parameters': {
            'bootstrap': [True],
            'class_weight': ['balanced', 'balanced_subsample', 'None'],
            'max_depth': [5, 10, 30, 50, 80],
            'max_features': [2, 10, 15, 'auto', 'sqrt', 'log2'],
            'min_samples_leaf': [2, 5, 10],
            'min_samples_split': [2, 5, 10, 20],
            'n_estimators': [500, 800, 1000, 1500],
            'n_jobs': [8]
        }
    }

    model_trainer = ModelTrainer(english_config.testset_ratio,
                                 english_config.logger)
    model_trainer.add_estimators([rf])
    english_classifier = WordSenseAlignmentClassifier(english_config,
                                                      feature_extractor,
                                                      model_trainer)
    english_classifier.load_data() \
        .extract_features(['len_diff', 'pos_diff']) \
        .train()
Beispiel #30
0
                                 logger='dutch_testset',
                                 is_testdata=True)

feature_extractor = FeatureExtractor() \
    .first_word() \
    .similarity() \
    .diff_pos_count() \
    .tfidf() \
    .ont_hot_pos() \
    .matching_lemma() \
    .count_each_pos() \
    .cosine() \
    .jaccard() \
    .difference_in_length()

model_trainer = ModelTrainer(german_config, german_config.logger)

german_classifier = WordSenseAlignmentClassifier(german_config,
                                                 feature_extractor,
                                                 model_trainer)
data = german_classifier.load_data().get_preprocessed_data()

feats = feature_extractor.extract(
    data, feats_to_scale=['similarities', 'len_diff', 'pos_diff'])
feats = feature_extractor.keep_feats([
    'similarities', 'cos_tfidf', 'ADP', 'DET', 'pos_diff', 'len_diff', 'PRON',
    'CONJ', 'X', 'PROPN', 'NOUN', 'cos', 'ADJ', 'VERB', 'jaccard', 'PUNCT',
    'noun', 'ADV', 'adjective'
])
x_trainset, x_testset = model_trainer.split_data(feats, 0.0)
Beispiel #31
0
test_pairs = load_clean_sentences('testpairs.pkl')
training_pairs = load_clean_sentences('trainingpairs.pkl')

# print("tp[0] = ", training_pairs[:,0])
# print("tp[1] =", training_pairs[:,1])

#Setting up a syllabus
syl = WeightedTaskSyllabus(data=(training_pairs[:, 0], training_pairs[:, 1]),
                           weightings=weightings,
                           validation_split=0.1,
                           difficulty_sorter=sort_data,
                           task_count=task_count,
                           pre_run=False)

#Create a model trainer with our syllabus
trainer = ModelTrainer(model, syl, verbose_level=1)


#Create a callback for our model trainer and pass it in
def preprocess_data(data, syllabus, model):
    data['x'], data['y'], data['val_x'], data[
        'val_y'] = language_translation_encode_data(data['x'], data['y'],
                                                    data['val_x'],
                                                    data['val_y'])


trainer.on_task_start(preprocess_data)

#Use the model trainer
trainer.train()