def train(cfg_file: str, ckpt=None) -> None: # Load the config file cfg = load_config(cfg_file) # Set the random seed set_seed(seed=cfg["training"].get("random_seed", 42)) # Load the data - Trg as (batch, # of frames, joints + 1 ) train_data, dev_data, test_data, src_vocab, trg_vocab = load_data(cfg=cfg) # Build the Progressive Transformer model model = build_model(cfg, src_vocab=src_vocab, trg_vocab=trg_vocab) if ckpt is not None: use_cuda = cfg["training"].get("use_cuda", False) model_checkpoint = load_checkpoint(ckpt, use_cuda=use_cuda) # Build model and load parameters from the checkpoint model.load_state_dict(model_checkpoint["model_state"]) # for training management, e.g. early stopping and model selection trainer = TrainManager(model=model, config=cfg) # Store copy of original training config in model dir shutil.copy2(cfg_file, trainer.model_dir + "/config.yaml") # Log all entries of config log_cfg(cfg, trainer.logger) # Train the model trainer.train_and_validate(train_data=train_data, valid_data=dev_data) # Test the model with the best checkpoint test(cfg_file)
def perform_single_test(network, train_config, seed, data_collect_freq): observer = TestObserver(freq=data_collect_freq) helpers.set_seed(seed=seed) best_network = trainer.train_network(network=network, config=train_config, observer=observer) return observer.get_results(), best_network
def __init__(self, train_set, validation_set, batch_size, epochs, lr, momentum, criterion, seed=None): helpers.set_seed(seed) self.train_set_loader = torch.utils.data.DataLoader( train_set, batch_size=batch_size, shuffle=True ) self.validation_set_loader = torch.utils.data.DataLoader( validation_set, batch_size=batch_size, shuffle=True ) self.epochs = epochs self.lr = lr self.momentum = momentum self.criterion = criterion
outputs[t] = output #decide if we are going to use teacher forcing or not teacher_force = random.random() < teacher_forcing_ratio #get the highest predicted token from our predictions top1 = output.argmax(1) #if teacher forcing, use actual next token as next input #if not, use predicted token input = trg[t] if teacher_force else top1 return outputs set_seed(42) BATCH_SIZE = 128 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') train_iterator, valid_iterator, test_iterator, SRC, TRG = get_data( BATCH_SIZE, device, reverse=True) INPUT_DIM = len(SRC.vocab) OUTPUT_DIM = len(TRG.vocab) ENC_EMB_DIM = 256 DEC_EMB_DIM = 256 HID_DIM = 512 N_LAYERS = 4 ENC_DROPOUT = 0.5 DEC_DROPOUT = 0.5
EXPERIMENT_SEEDS = range(1) print(f'\n=======================' + '\n Debugging mode:' + '\n=======================\n') else: EXPERIMENT_SEEDS = range(N_SEEDS) print(f'Each experiment will be run with {len(EXPERIMENT_SEEDS)} different seeds.\n') print('These are the embedding strategies that will be tested:') for e in ALL_EMBEDDINGS: print(e) print('') # Experiment loop: for strategy in ALL_EMBEDDINGS: for seed in EXPERIMENT_SEEDS: # Set a seed for reproducibility set_seed(seed_value=seed) print(f'\nSeed: {seed + 1}/{len(EXPERIMENT_SEEDS)}.') experiment = Experiment( seed_value=seed, embedding_strategy=strategy, name=EXPERIMENT_NAME, config=deepcopy(CONFIG)) experiment.run()