def load_checkpoint(self, name, with_trainer=True, model_use_gpu=True): """Load model checkpoint and setup the Model and ModelTrainer Args: name (str): Checkpoint name with_trainer (bool, optional): Whether to load the ModelTrainer. Defaults to True. model_use_gpu (bool, optional): Setup the Model with GPU enabled. Defaults to True. Returns: (Model, ModelTrainer): Returns model and trainer """ checkpoint = torch.load(self.get_checkpoint_path(name)) # recover model state model = Model(checkpoint['class_to_idx'], arch=checkpoint['arch'], out_features=checkpoint['out_features'], hidden_units_1=checkpoint['hidden_units_1'], hidden_units_2=checkpoint['hidden_units_2'], dropout_1=checkpoint['dropout_1'], dropout_2=checkpoint['dropout_2'], use_gpu=model_use_gpu) model.model.load_state_dict(checkpoint['model_state']) model.model.class_to_idx = checkpoint['class_to_idx'] # recover trainer state if with_trainer: trainer = ModelTrainer(model, checkpoint['learning_rate']) trainer.optimizer.load_state_dict(checkpoint['optimizer_state']) trainer.trained_epochs = checkpoint['trained_epochs'] return (model, trainer) else: return model
def perform_SVR(self): print( 'SVRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRRR' ) model_trainer = ModelTrainer() svr = SVR(gamma='poly', C=1e3, epsilon=0.2) Y_test, Y_pred, y_true_glucose, y_pred_glucose = model_trainer.train_model( svr, self.X_train, self.X_test, self.Y_train, self.Y_test)
class CreateClassifier(object): def __init__(self): self.pre = Preprocess() self.nlp = NLPHelper() self.fex = FeatureExtractor() self.ut = Utility() self.mt = ModelTrainer() def createClassifier(self): # get golden data # data = self.nlp.getGoldenDataset() # extract entity and save into pickle # self.nlp.extractNews(data) #CHANGE MODULE WHEN SWITCHING BETWEEN ADDITIONAL IDN AND DEFAULT # self.nlp.core_nlp.close() # # find feature in one text and save it to excel # # scenario 1 # # path = "scenario1_halfidn_pickle/" # # scenario 2 # # path = "scenario2_fullidn_pickle/" # # scenario 3 # path = "scenario3_stanford_pickle/" # path = "test/" # filelist = os.listdir(path) # data = pd.DataFrame() # for idx, file in enumerate(filelist): # #buka file pickle yang isinya data ner, coref, dan pos dari suatu teks berita # pkl_dict = self.ut.loadPickle(os.path.join(path, file)) # # ekstraksi fitur dari file pickle # temp = self.fex.extractFeaturesFromPickle(pkl_dict) # data = data.append(temp) # #scenario 1 # self.ut.convertToExcel("scenario1_idnnerhalf_extracted_feature.xlsx",data,'Sheet1') # #scenario 2 # self.ut.convertToExcel("scenario2_idnnerfull_extracted_feature.xlsx",data,'Sheet1') # #scenario 3 # self.ut.convertToExcel("scenario3_stanford_extracted_feature.xlsx",data,'Sheet1') # #scenario testing # self.ut.convertToExcel("testing_rf.xlsx",data,'Sheet1') # for training use # reading excel that contain features (HARUS DIKASIH KOLOM WHO DAN WHERE DULU, DAN DITENTUKAN YANG MANA WHO DAN WHERE) # scenario 1 # df = pd.read_excel('scenario1_idnnerhalf_extracted_feature.xlsx', sheet_name='Sheet1') # scenario 2 df = pd.read_excel('scenario2_idnnerfull_extracted_feature.xlsx', sheet_name='Sheet1') # # scenario 3 # df = pd.read_excel('scenario3_stanford_extracted_feature.xlsx', sheet_name='Sheet1') # # training model for detecting who and where, input "where" or "who" meaning that column will be dropped (deleted) who = self.mt.train(df, 'where') where = self.mt.train(df, 'who') self.nlp.core_nlp.close()
def perform_ridge_regression(self): print( '*********************************************RIDGE REGRESSION**************************************************' ) model_trainer = ModelTrainer() ridge = Ridge(alpha=1.0) Y_test, Y_pred, y_true_glucose, y_pred_glucose = model_trainer.train_model( ridge, self.X_train, self.X_test, self.Y_train, self.Y_test) evl = MetricsCalculator() evl.evaluate('root mean square error for ridge regression', y_true_glucose, y_pred_glucose) viz = Visualizer() viz.visualize('ridge regression', y_true_glucose, y_pred_glucose)
def model_trainer_from_checkpoint(checkpoint_filename, video_retriever_generator, selector, extractor): """This function restores a model from a tf checkpoint and creates a ModelTrainer""" builder, model_saver, params = restore_model(checkpoint_filename, video_retriever_generator, selector, extractor) model_trainer = ModelTrainer(model_saver, builder, params["epoch"], float(params["best_loss"])) model_trainer.load_last_checkpoint() return model_trainer
def perform_lasso_regression(self): print( '................................... LASSO REGRESSION ............................................' ) model_trainer = ModelTrainer() lasso = Lasso() Y_test, Y_pred, y_true_glucose, y_pred_glucose = model_trainer.train_model( lasso, self.X_train, self.X_test, self.Y_train, self.Y_test) evl = MetricsCalculator() evl.evaluate('root mean square error for lasso regression', y_true_glucose, y_pred_glucose) viz = Visualizer() viz.visualize('lasso regression', y_true_glucose, y_pred_glucose)
def perform_linear_regression(self): print( '------------------------------------------LINEAR REGRESSION------------------------------------------' ) model_trainer = ModelTrainer() linear_reg = LinearRegression() Y_test, Y_pred, y_true_glucose, y_pred_glucose = model_trainer.train_model( linear_reg, self.X_train, self.X_test, self.Y_train, self.Y_test) evl = MetricsCalculator() evl.evaluate('root mean square error for linear regression', y_true_glucose, y_pred_glucose) viz = Visualizer() viz.visualize('linear regression', y_true_glucose, y_pred_glucose)
def __init__(self, experiment_name: str, search_config: AgingEvoConfig, training_config: TrainingConfig, bound_config: BoundConfig): self.log = logging.getLogger( name=f"AgingEvoSearch [{experiment_name}]") self.config = search_config self.trainer = ModelTrainer(training_config) self.root_dir = Path(search_config.checkpoint_dir) self.root_dir.mkdir(parents=True, exist_ok=True) self.experiment_name = experiment_name if training_config.pruning and not training_config.pruning.structured: self.log.warning( "For unstructured pruning, we can only meaningfully use the model " "size resource metric.") bound_config.peak_mem_bound = None bound_config.mac_bound = None self.pruning = training_config.pruning # We establish an order of objective in the feature vector, all functions must ensure the order is the same self.constraint_bounds = [ bound_config.error_bound, bound_config.peak_mem_bound, bound_config.model_size_bound, bound_config.mac_bound ] self.history: List[EvaluatedPoint] = [] self.population: List[EvaluatedPoint] = [] self.population_size = search_config.population_size self.initial_population_size = search_config.initial_population_size or self.population_size self.rounds = search_config.rounds self.sample_size = search_config.sample_size num_gpus = len(tf.config.experimental.list_physical_devices("GPU")) self.max_parallel_evaluations = search_config.max_parallel_evaluations or num_gpus
def train_and_eval(embedding, layers, batch_size, layers_type): # Device device = get_device() # Training parameters epochs = 5 # Train and dev data train_file = './data/snli_1.0_train.jsonl' train_data = Data(train_file, embedding) dev_file = './data/snli_1.0_dev.jsonl' dev_data = Data(dev_file, embedding) test_file = './data/snli_1.0_test.jsonl' test_data = Data(test_file, embedding) # Create the model model = ResidualLSTMEncoder(embedding_vectors=embedding.vectors, padding_index=train_data.padding_index, layers_def=layers, output_size=len(train_data.c2i), max_sentence_length=Data.MAX_SENTENCE_SIZE, hidden_mlp=800, device=device, layers_type=layers_type) num_of_params = sum(p.numel() for p in model.parameters()) print("Number of model parameters: %d" % num_of_params) model = model.to(device) # Create optimizer optimizer = optim.Adam(model.parameters(), lr=2e-4) # optimizer = optim.Adagrad(model.parameters()) # Create a model trainer object model_trainer = ModelTrainer(net=model, device=device, optimizer=optimizer) # Train the model model_trainer.train(train_data, dev_data, train_log_file='train_1.txt', dev_log_file='dev_1.txt', epochs=epochs, batch_size=batch_size) # Save the model model_trainer.save_model('./models/model_1') # Test the model test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=False, num_workers=0) test_performencer = Performencer(name='Test', output_size=model.output_size) model_trainer.eval(test_loader, test_performencer) test_performencer.pinpoint() test_performencer.log_to_file('test_1.txt')
def process_image(frame): image = np.empty_like(frame) np.copyto(image, frame) model = ModelTrainer.get_trained_model() img = CarDetector.process(model, image) GV.current_frame += 1 return img
def main(): parser = argparse.ArgumentParser() parser.add_argument('--data', type=str, help='Data file path', required=True) parser.add_argument('--output', type=str, help='Output file path', required=True) parser.add_argument('--output_model', type=str, help='Model path', default=None) parser.add_argument('--level', type=int, default=0) parser.add_argument('--fold', type=int, default=2) parser.add_argument('--iter', type=int, default=1) parser.add_argument('--batch_size', type=int, default=16) parser.add_argument('--epoch', type=int, default=30) parser.add_argument('--random_state', type=int, default=None) args = parser.parse_args() dataset = BrainDataset(args.data, expand_dim=True, level=args.level) model = CNN1D(len(np.unique(dataset.label))).to(DEVICE) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) epochs = args.epoch batch_size = args.batch_size trainer = ModelTrainer(model, dataset, DEVICE) result = trainer.train(optimizer, criterion, batch_size=batch_size, epochs=epochs, kfold=args.fold, iteration=args.iter, random_state=args.random_state) result = np.array(result) np.savetxt(args.output, result, delimiter=",") if args.output_model is not None: torch.save(model.state_dict(), args.output_model)
def perform_PLS(self): print( ',,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, PARTIAL LEAST SQUARE ,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,' ) model_trainer = ModelTrainer() pls = PLSRegression(n_components=20, scale=True, max_iter=5000, tol=1e-06, copy=True) Y_test, Y_pred, y_true_glucose, y_pred_glucose = model_trainer.train_model( pls, self.X_train, self.X_test, self.Y_train, self.Y_test) evl = MetricsCalculator() evl.evaluate('root mean square error for partial least square', y_true_glucose, y_pred_glucose) viz = Visualizer() viz.visualize('pls', y_true_glucose, y_pred_glucose)
def perform_NN(self): print( '/////////////////////////////////////////////////// NEURAL NETWORK ///////////////////////////////////' ) model_trainer = ModelTrainer() nn = MLPRegressor(hidden_layer_sizes=(200, ), activation='relu', solver='adam', alpha=0.1, batch_size='auto', learning_rate='constant', learning_rate_init=0.001, power_t=0.5, max_iter=3000, shuffle=True, random_state=None, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10) Y_test, Y_pred, y_true_glucose, y_pred_glucose = model_trainer.train_model( nn, self.X_train, self.X_test, self.Y_train, self.Y_test) evl = MetricsCalculator() evl.evaluate('root mean square error for Neural network', y_true_glucose, y_pred_glucose) viz = Visualizer() viz.visualize('neural network', y_true_glucose, y_pred_glucose)
def __init__(self, experiment_name: str, search_config: BayesOptConfig, training_config: TrainingConfig, bound_config: BoundConfig): assert search_config.starting_points >= 1 self.log = logging.getLogger(name=f"BayesOpt [{experiment_name}]") self.config = search_config self.trainer = ModelTrainer(training_config) self.root_dir = Path(search_config.checkpoint_dir) self.root_dir.mkdir(parents=True, exist_ok=True) self.experiment_name = experiment_name if training_config.pruning and not training_config.pruning.structured: self.log.warning("For unstructured pruning, we can only use the model size resource metric.") bound_config.peak_mem_bound = None bound_config.mac_bound = None # We establish an order of objective in the feature vector, all functions must ensure the order is the same self.constraint_bounds = [bound_config.error_bound, bound_config.peak_mem_bound, bound_config.model_size_bound, bound_config.mac_bound]
elif opt.model_name == 2: # MLP train_config['using_spectrogram'] = True train_config['criterion'] = 'MSE' elif opt.model_name == 3: # simple generator train_config['using_simple_g'] = True train_config['criterion'] = 'MSE' elif opt.model_name == 4: # 1D auto encoder train_config['criterion'] = 'MSE' elif opt.model_name == 5: # 2D auto encoder train_config['criterion'] = 'MSE' train_config['using_spectrogram'] = True elif opt.model_name == 6: # simple auto encoder train_config['criterion'] = 'MSE' elif opt.model_name == 7: # adversarial MLP train_config['using_spectrogram'] = True trainer = ModelTrainer(**train_config) trainer.train()
def main(): """Run Training Session""" # Measures total program runtime by collecting start time start_time = time.time() # get input args in_arg = get_input_args() # Function that checks command line arguments using in_arg print_command_line_arguments(in_arg) print() # load datasets dataloader = ModelDataLoader(in_arg.data_dir) train_dataset, train_dataloader = dataloader.get_train_data() valid_dataset, valid_dataloader = dataloader.get_validation_data() test_dataset, test_dataloader = dataloader.get_test_data() # Use model loader to load existing checkpoint loader = ModelLoader(in_arg.checkpoint_dir) if loader.checkpoint_exists(in_arg.checkpoint): # load checkpoint print("Loading checkpoint %s" % (loader.get_checkpoint_path(in_arg.checkpoint))) model, trainer = loader.load_checkpoint( in_arg.checkpoint, model_use_gpu=in_arg.gpu) print("Epochs trained so far: %d" % (trainer.trained_epochs)) else: # no checkpoint, create fresh model using input arguments print("Checkpoint '%s' does not exist" % (loader.get_checkpoint_path(in_arg.checkpoint))) model = Model(train_dataset.class_to_idx, arch=in_arg.arch, use_gpu=in_arg.gpu, hidden_units_1=in_arg.hidden_units_1, hidden_units_2=in_arg.hidden_units_2, dropout_1=in_arg.dropout_1, dropout_2=in_arg.dropout_2) trainer = ModelTrainer(model, learning_rate=in_arg.learning_rate) print() print("Model training in session...") print() epochs = in_arg.epochs # train model and print results for result in trainer.train_epochs(epochs, train_dataloader, valid_dataloader): print( "Epoch: %3d/%3d" % (result['epoch']+1, epochs), " | Train Loss: %10.5f" % (result['train_loss']), " | Validation Loss: %10.5f" % (result['validation_loss']), " | Validation Acc: %6.3f%%" % ( result['validation_accuracy'] * 100), " | Duration: %10.3fs" % (result['duration']) ) print() print("Testing model against test data...") print() # test model against test data test_result = trainer.test(test_dataloader) print( "Test Loss: %10.5f" % (test_result['test_loss']), " | Test Acc: %6.3f%%" % (test_result['test_accuracy'] * 100), " | Duration: %10.3fs" % (test_result['duration']) ) # save checkpoint loader.save_checkpoint(in_arg.checkpoint, model, trainer) print() print("Total Train Duration: %.3fs" % (time.time() - start_time))
def main(): logging.warning("dummy warning!!!") logging.error("dummy error!!!") logging.info("dummy info!!!") logging.debug("dummy debug!!!") logging.warning(f"Inside {__file__}") parser = argparse.ArgumentParser() parser.add_argument("--subscription_id", type=str, dest="subscription_id", help="The Azure subscription ID") parser.add_argument("--resource_group", type=str, dest="resource_group", help="The resource group name") parser.add_argument("--workspace_name", type=str, dest="workspace_name", help="The workspace name") parser.add_argument("--experiments_config_filepath", type=str, dest="experiments_config_filepath", help="A path to the JSON config file") # noqa: E501 parser.add_argument("--model_name", type=str, dest="model_name", help="Name of the Model") parser.add_argument("--should_register_model", type=str2bool, dest="should_register_model", default=False, help="Register trained model") # noqa: E501 args = parser.parse_args() logging.warning(f"Argument 1: {args.subscription_id}") logging.warning(f"Argument 2: {args.resource_group}") logging.warning(f"Argument 3: {args.workspace_name}") logging.warning(f"Argument 4: {args.experiments_config_filepath}") logging.warning(f"Argument 5: {args.model_name}") logging.warning(f"Argument 6: {args.should_register_model}") # Get current service context run = Run.get_context() workspace = run.experiment.workspace # Load training configuration experiment_configuration = ExperimentConfigurationWrapper() experiment_configuration.load(args.experiments_config_filepath) training_config = experiment_configuration.json["feature_extractor"][ "training"] # initialize empty collections for data # train_set = [] # test_set = [] # dev_set = [] download_root_dir = os.path.join('/mnt', 'tmp', 'datasets') data_splitter = HDF5TrainTestSplitter() for data_config in training_config["data"]: cropped_cells_dataset_name = data_config['input'][ 'cropped_cells_dataset_name'] cropped_cells_dataset_version = data_config['input'][ 'cropped_cells_dataset_version'] cropped_cells_dataset = Dataset.get_by_name( workspace=workspace, name=cropped_cells_dataset_name, version=cropped_cells_dataset_version) msg = ( f"Dataset '{cropped_cells_dataset_name}', id: {cropped_cells_dataset.id}" f", version: {cropped_cells_dataset.version} will be used to prepare data for a feature extractor training." ) logging.warning(msg) # Create a folder where datasets will be downloaded to dataset_target_path = os.path.join(download_root_dir, cropped_cells_dataset_name) os.makedirs(dataset_target_path, exist_ok=True) # Download 'cropped cells' dataset (consisting of HDF5 and CSV files) dataset_target_path = download_registered_file_dataset( workspace, cropped_cells_dataset, download_root_dir) list_all_files_in_location(dataset_target_path) # Split data (indices) into subsets df_metadata = pd.read_csv( os.path.join(dataset_target_path, 'cropped_nuclei.csv')) logging.warning(f"Metadata dataframe (shape): {df_metadata.shape}") logging.warning("Splitting data into subsets...") data_splitter.add_dataset( name=data_config['input']['cropped_cells_dataset_name'], fname=os.path.join(dataset_target_path, 'cropped_nuclei_images.h5'), metadata=df_metadata) data_splitter.train_dev_test_split() # -------- # Training # -------- # Init dataloaders #train_dataset = CellDataset(cell_list=train_set, target_cell_shape=INPUT_IMAGE_SIZE) train_dataset = CellDataset(splitter=data_splitter, dset_type='train') train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=DATA_LOADER_WORKERS, ) #dev_dataset = CellDataset(cell_list=dev_set, target_cell_shape=INPUT_IMAGE_SIZE) dev_dataset = CellDataset(splitter=data_splitter, dset_type='dev') dev_data_loader = torch.utils.data.DataLoader( dev_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=DATA_LOADER_WORKERS, ) #test_dataset = CellDataset(cell_list=test_set, target_cell_shape=INPUT_IMAGE_SIZE) test_dataset = CellDataset(splitter=data_splitter, dset_type='test') test_data_loader = torch.utils.data.DataLoader( test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=DATA_LOADER_WORKERS, ) # Define and Train model device = torch.device(DEVICE) model = AUTOENCODER( latent_dim_size=LATENT_DIM_SIZE, input_image_size=INPUT_IMAGE_SIZE, device=device, ) optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) print(f"Using {torch.cuda.device_count()} GPUs for training") # model = torch.nn.DataParallel(model, device_ids=[0, 1, 2, 3]) trainer = ModelTrainer(model, device) tr_losses, dev_losses = trainer.train( epochs=EPOCHS, optimizer=optimizer, train_data_loader=train_data_loader, dev_data_loader=dev_data_loader, ) test_loss = trainer.test_model(test_data_loader) run.log("dev_loss", np.max(dev_losses)) run.log("train_loss", np.max(tr_losses)) run.log("test_loss", test_loss) # Plot training metrics and model sample reconstructions trainer.get_training_plot(tr_losses=tr_losses, dev_losses=dev_losses) run.log_image("model training metrics", plot=plt) dataiter = iter(test_data_loader) images = dataiter.next() trainer.get_pred_samples(images, figsize=(40, 40)) run.log_image("sample reconstructions", plot=plt) # Training completed! Let's save the model and upload it to AML os.makedirs("./models", exist_ok=True) model_file_name = "model.ext" model_output_loc = os.path.join(".", "models", model_file_name) torch.save(model, model_output_loc) run.upload_files(names=[model_output_loc], paths=[model_output_loc]) # Register model (ideally, this should be a separate step) if args.should_register_model: logging.warning("List of the associated stored files:") logging.warning(run.get_file_names()) logging.warning("Registering a new model...") # TODO: prepare a list of metrics that were logged using run.log() metric_names = [] if os.path.exists(model_output_loc): register_model( run=run, model_name=args.model_name, model_description="Feature extraction model", model_path=model_output_loc, training_context="PythonScript", metric_names=metric_names, ) else: logging.warning( f"Cannot register model as path {model_output_loc} does not exist." ) else: logging.warning("A trained model will not be registered.") logging.warning("Done!") logging.info("Done Info Style!")
def main(args): total_step = 100//args.EF # set random seed np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) random.seed(args.seed) # prepare checkpoints and log folders if not os.path.exists(args.checkpoints_dir): os.makedirs(args.checkpoints_dir) if not os.path.exists(args.logs_dir): os.makedirs(args.logs_dir) # initialize dataset if args.dataset == 'visda': args.data_dir = os.path.join(args.data_dir, 'visda') data = Visda_Dataset(root=args.data_dir, partition='train', label_flag=None) elif args.dataset == 'office': args.data_dir = os.path.join(args.data_dir, 'Office') data = Office_Dataset(root=args.data_dir, partition='train', label_flag=None, source=args.source_name, target=args.target_name) elif args.dataset == 'home': args.data_dir = os.path.join(args.data_dir, 'OfficeHome') data = Home_Dataset(root=args.data_dir, partition='train', label_flag=None, source=args.source_name, target=args.target_name) elif args.dataset == 'visda18': args.data_dir = os.path.join(args.data_dir, 'visda18') data = Visda18_Dataset(root=args.data_dir, partition='train', label_flag=None) else: print('Unknown dataset!') args.class_name = data.class_name args.num_class = data.num_class args.alpha = data.alpha # setting experiment name label_flag = None selected_idx = None args.experiment = set_exp_name(args) logger = Logger(args) if not args.visualization: for step in range(total_step): print("This is {}-th step with EF={}%".format(step, args.EF)) trainer = ModelTrainer(args=args, data=data, step=step, label_flag=label_flag, v=selected_idx, logger=logger) # train the model args.log_epoch = 4 + step//2 trainer.train(step, epochs= 4 + (step) * 2, step_size=args.log_epoch) # pseudo_label pred_y, pred_score, pred_acc = trainer.estimate_label() # select data from target to source selected_idx = trainer.select_top_data(pred_score) # add new data label_flag, data = trainer.generate_new_train_data(selected_idx, pred_y, pred_acc) else: # load trained weights trainer = ModelTrainer(args=args, data=data) trainer.load_model_weight(args.checkpoint_path) vgg_feat, node_feat, target_labels, split = trainer.extract_feature() visualize_TSNE(node_feat, target_labels, args.num_class, args, split) plt.savefig('./node_tsne.png', dpi=300)
parser = argparse.ArgumentParser() parser.add_argument("--config_path", default='../config.ini', required=False) args = parser.parse_args() cfg = OCTConfig(args.config_path) oct_logger = OCTLogger(cfg, RUN_TIMESTAMP) oct_logger.print_cfg() generator_resolver = GeneratorResolver(cfg) training_data_iterator, test_data_iterator, val_data_iterator = generator_resolver.resolve_data_iterators( ) model_resolver = ModelResolver(cfg) model = model_resolver.resolve_model() augmented_image_data_generator = generator_resolver.provide_augmented_image_data_generator( ) augmentation_processor = AugmentationProcessor( cfg, augmented_image_data_generator) augmentation_processor.perform_data_augmentation() model_trainer = ModelTrainer(cfg, model, training_data_iterator, val_data_iterator, RUN_TIMESTAMP) model_trainer.train_model() model_evaluator = ModelEvaluator(cfg, model, test_data_iterator) model_evaluator.evaluate_model()
from model_trainer import ModelTrainer from logger import Logger if __name__ == '__main__': logger = Logger(show=True, html_output=True, config_file="config.txt", data_folder="drive") trainer = ModelTrainer(logger) trainer.run_training(num_epochs=50, save_after_epochs=10)
def main(args): # Modified here total_step = 100 // args.EF # set random seed np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) random.seed(args.seed) # prepare checkpoints and log folders if not os.path.exists(args.checkpoints_dir): os.makedirs(args.checkpoints_dir) if not os.path.exists(args.logs_dir): os.makedirs(args.logs_dir) # initialize dataset if args.dataset == 'nusimg': args.data_dir = os.path.join(args.data_dir, 'visda') data = NUSIMG_Dataset(root=args.data_dir, partition='train', label_flag=None, source=args.source_path, target=args.target_path) elif args.dataset == 'office': args.data_dir = os.path.join(args.data_dir, 'Office') data = Office_Dataset(root=args.data_dir, partition='train', label_flag=None, source=args.source_path, target=args.target_path) elif args.dataset == 'mrc': data = MRC_Dataset(root=args.data_dir, partition='train', label_flag=None, source=args.source_path, target=args.target_path) else: print('Unknown dataset!') args.class_name = data.class_name args.num_class = data.num_class args.alpha = data.alpha # setting experiment name label_flag = None selected_idx = None args.experiment = set_exp_name(args) logger = Logger(args) trainer = ModelTrainer(args=args, data=data, label_flag=label_flag, v=selected_idx, logger=logger) for step in range(total_step): print("This is {}-th step with EF={}%".format(step, args.EF)) # train the model args.log_epoch = 5 trainer.train(epochs=24, step=step) #24 # psedo_label pred_y, pred_score, pred_acc = trainer.estimate_label() # select data from target to source selected_idx = trainer.select_top_data(pred_score) # add new data trainer.generate_new_train_data(selected_idx, pred_y, pred_acc)
def train_small_test_version(self, hyperparams_dict): """perform training on small test data""" trainer = ModelTrainer(self.dataloaders, hyperparams_dict, self.wv_wrapper, self.path) model, losses, accuracies = trainer.train(epochs=3) return model, losses, accuracies
def main(): config = get_config_args() RWAModel = RecurrentWeightedAverage(config); Trainer = ModelTrainer(config, RWAModel) Trainer.do_training();
'max_depth': [30, 80], 'max_features': [2, 15, 'auto', None], 'min_samples_leaf': [3, 5], 'min_samples_split': [2, 5, 8], 'n_estimators': [500, 800], 'n_jobs': [10] } } # rf = { # 'estimator': RandomForestClassifier(), # 'parameters': { # 'bootstrap': [True], # 'max_depth': [30, 50], # 'max_features': [None], # 'min_samples_leaf': [3, 5], # 'min_samples_split': [2, 5, 8], # 'n_estimators': [500, 600] # } # } dt = {'estimator': DecisionTreeClassifier(), 'parameters': {}} model_trainer = ModelTrainer(german_config.testset_ratio, german_config.logger) model_trainer.add_estimators([rf]) german_classifier = WordSenseAlignmentClassifier(german_config, feature_extractor, model_trainer) german_classifier.load_data() \ .extract_features(['similarities', 'len_diff', 'pos_diff']) \ .train(with_testset=True)
def main(): config = get_config_args() GRUModel = SequencePredictor(config) Trainer = ModelTrainer(config, GRUModel) Trainer.do_training()
# Build Model Graph from Config model_config = configparser.ConfigParser() model_config.read(args.model) model = ModelBuilder(model_config) model.build_graph() model.compile() model.summary_txt() model.print_png() model.save_graph() # Train Model if args.train: train_config = configparser.ConfigParser() train_config.read(args.train) trainer = ModelTrainer(train_config) trainer.get_hyperparameters() trainer.get_train_set() trainer.get_dev_set() trainer.get_callbacks() train_history = model.train(trainer.fit_options) model.save_weights(trainer.outputs_config['weights']) trainer.write_outputs(train_history) # Test Model if args.test: test_config = configparser.ConfigParser() test_config.read(args.test) tester = ModelTester(test_config) tester.get_weights() model.load_weights(tester.weights)
def __init__(self): self.ut = Utility() self.fwe = FiveWExtractor() self.fex = FeatureExtractor() self.nlp = NLPHelper() self.tr = ModelTrainer()
def main(): config = get_config_args() GRUModel = SequencePredictor(config); Trainer = ModelTrainer(config, GRUModel) Trainer.do_training();
.similarity_diff_to_target()\ .max_dependency_tree_depth() \ .target_word_synset_count()\ .token_count_norm_diff()\ .semicol_count()\ .elmo_similarity() rf = { 'estimator': RandomForestClassifier(), 'parameters': { 'bootstrap': [True], 'class_weight': ['balanced', 'balanced_subsample', 'None'], 'max_depth': [5, 10, 30, 50, 80], 'max_features': [2, 10, 15, 'auto', 'sqrt', 'log2'], 'min_samples_leaf': [2, 5, 10], 'min_samples_split': [2, 5, 10, 20], 'n_estimators': [500, 800, 1000, 1500], 'n_jobs': [8] } } model_trainer = ModelTrainer(english_config.testset_ratio, english_config.logger) model_trainer.add_estimators([rf]) english_classifier = WordSenseAlignmentClassifier(english_config, feature_extractor, model_trainer) english_classifier.load_data() \ .extract_features(['len_diff', 'pos_diff']) \ .train()
logger='dutch_testset', is_testdata=True) feature_extractor = FeatureExtractor() \ .first_word() \ .similarity() \ .diff_pos_count() \ .tfidf() \ .ont_hot_pos() \ .matching_lemma() \ .count_each_pos() \ .cosine() \ .jaccard() \ .difference_in_length() model_trainer = ModelTrainer(german_config, german_config.logger) german_classifier = WordSenseAlignmentClassifier(german_config, feature_extractor, model_trainer) data = german_classifier.load_data().get_preprocessed_data() feats = feature_extractor.extract( data, feats_to_scale=['similarities', 'len_diff', 'pos_diff']) feats = feature_extractor.keep_feats([ 'similarities', 'cos_tfidf', 'ADP', 'DET', 'pos_diff', 'len_diff', 'PRON', 'CONJ', 'X', 'PROPN', 'NOUN', 'cos', 'ADJ', 'VERB', 'jaccard', 'PUNCT', 'noun', 'ADV', 'adjective' ]) x_trainset, x_testset = model_trainer.split_data(feats, 0.0)
test_pairs = load_clean_sentences('testpairs.pkl') training_pairs = load_clean_sentences('trainingpairs.pkl') # print("tp[0] = ", training_pairs[:,0]) # print("tp[1] =", training_pairs[:,1]) #Setting up a syllabus syl = WeightedTaskSyllabus(data=(training_pairs[:, 0], training_pairs[:, 1]), weightings=weightings, validation_split=0.1, difficulty_sorter=sort_data, task_count=task_count, pre_run=False) #Create a model trainer with our syllabus trainer = ModelTrainer(model, syl, verbose_level=1) #Create a callback for our model trainer and pass it in def preprocess_data(data, syllabus, model): data['x'], data['y'], data['val_x'], data[ 'val_y'] = language_translation_encode_data(data['x'], data['y'], data['val_x'], data['val_y']) trainer.on_task_start(preprocess_data) #Use the model trainer trainer.train()