features_train = y_train[:, :, :, 1:] targets_val = y_val[:, :, :, [0]] features_val = y_val[:, :, :, 1:] targets_test = y_test[:, :, :, [0]] features_test = y_test[:, :, :, 1:] lstm = LSTM(input_size, hidden_size, output_size, n_layers, dropout) if os.path.isfile(checkpoint_file): print("Loading checkpoint...") lstm.load_state_dict(torch.load(checkpoint_file)) if use_cuda: lstm.cuda() # optimizer = optim.Adam(lstm.parameters(), lr=lr) # # best_val_loss = 1000 # train_loss = 0 # for epoch in range(n_epochs): # n_batches = x_train.shape[0] # for i in range(n_batches): # lstm.hidden = None # input_batches = x_train[i] # target_batches = targets_train[i] # train_loss = train(input_batches, target_batches, lstm, optimizer, use_cuda) # # epoch_train_loss = evaluate(x_train, targets_train, lstm, use_cuda) # epoch_val_loss = evaluate(x_val, targets_val, lstm, use_cuda)
def setup(config): if config.task.name == 'copy': task = CopyTask( batch_size=config.task.batch_size, min_len=config.task.min_len, max_len=config.task.max_len, bit_width=config.task.bit_width, seed=config.task.seed, ) elif config.task.name == 'repeat': task = RepeatCopyTask( batch_size=config.task.batch_size, bit_width=config.task.bit_width, min_len=config.task.min_len, max_len=config.task.max_len, min_rep=config.task.min_rep, max_rep=config.task.max_rep, norm_max=config.task.norm_max, seed=config.task.seed, ) elif config.task.name == 'recall': task = AssociativeRecallTask( batch_size=config.task.batch_size, bit_width=config.task.bit_width, item_len=config.task.item_len, min_cnt=config.task.min_cnt, max_cnt=config.task.max_cnt, seed=config.task.seed, ) else: logging.info('Unknown task') exit(0) torch.manual_seed(config.model.seed) if config.model.name == 'lstm': model = LSTM( n_inputs=task.full_input_width, n_outputs=task.full_output_width, n_hidden=config.model.n_hidden, n_layers=config.model.n_layers, ) elif config.model.name == 'ntm': model = NTM( input_size=task.full_input_width, output_size=task.full_output_width, mem_word_length=config.model.mem_word_length, mem_cells_count=config.model.mem_cells_count, n_writes=config.model.n_writes, n_reads=config.model.n_reads, controller_n_hidden=config.model.controller_n_hidden, controller_n_layers=config.model.controller_n_layers, clip_value=config.model.clip_value, ) elif config.model.name == 'dnc': model = DNC( input_size=task.full_input_width, output_size=task.full_output_width, cell_width=config.model.cell_width, n_cells=config.model.n_cells, n_reads=config.model.n_reads, controller_n_hidden=config.model.controller_n_hidden, controller_n_layers=config.model.controller_n_layers, clip_value=config.model.clip_value, masking=config.model.masking, mask_min=config.model.mask_min, dealloc=config.model.dealloc, diff_alloc=config.model.diff_alloc, links=config.model.links, links_sharpening=config.model.links_sharpening, normalization=config.model.normalization, dropout=config.model.dropout, ) else: logging.info('Unknown model') exit(0) if config.gpu and torch.cuda.is_available(): model = model.cuda() # Setup optimizer if config.optimizer == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=config.learning_rate, momentum=config.momentum) if config.optimizer == 'rmsprop': optimizer = torch.optim.RMSprop( model.parameters(), lr=config.learning_rate, momentum=config.momentum, ) if config.optimizer == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate) step = 0 if config.load: logging.info('Restoring model from checkpoint') model, optimizer, task, step = utils.load_checkpoint( model, optimizer, task, config.load, ) return model, optimizer, task, step
dset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True # CUDA only ) # Network Definition + Optimizer + Scheduler model = LSTM(hidden_size=n_hidden1, hidden_size2=n_hidden2, num_securities=n_stocks, dropout=0.2, n_layers=2, T=T) if use_cuda: model.cuda() optimizer = optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=0.0) # n scheduler_model = lr_scheduler.StepLR(optimizer, step_size=1, gamma=1.0) # loss function criterion = nn.MSELoss(size_average=True).cuda() # Store successive losses losses = [] it = 0 for i in range(max_epochs): loss_ = 0. # Store current predictions predicted = [] gt = []
def setup_model(config): # Load data if config.task.name == 'arithmetic': train_data = Arithmetic( batch_size=config.task.batch_size, min_len=config.task.min_len, max_len=config.task.max_len, task=config.task.task, seed=config.seed, ) np.random.seed(config.seed) params = [20, 30, 40, 60] validation_data = [] for length in params: example = train_data.gen_batch(batch_size=50, min_len=length, max_len=length, distribution=np.array([ 1, ])) validation_data.append((example, length)) loss = Arithmetic.loss else: logging.info('Unknown task') exit(0) # Setup model torch.manual_seed(config.seed) if config.model.name == 'lstm': model = LSTM( n_inputs=train_data.symbols_amount, n_outputs=train_data.symbols_amount, n_hidden=config.model.n_hidden, n_layers=config.model.n_layers, ) elif config.model.name == 'ntm': model = NTM(input_size=train_data.symbols_amount, output_size=train_data.symbols_amount, mem_word_length=config.model.mem_word_length, mem_cells_count=config.model.mem_cells_count, n_writes=config.model.n_writes, n_reads=config.model.n_reads, controller_n_hidden=config.model.controller_n_hidden, controller_n_layers=config.model.controller_n_layers, controller=config.model.controller, layer_sizes=config.model.layer_sizes, controller_output=config.model.controller_output, clip_value=config.model.clip_value, dropout=config.model.dropout) elif config.model.name == 'dnc': model = DNC( input_size=train_data.symbols_amount, output_size=train_data.symbols_amount, n_cells=config.model.n_cells, cell_width=config.model.cell_width, n_reads=config.model.n_reads, controller_n_hidden=config.model.controller_n_hidden, controller_n_layers=config.model.controller_n_layers, clip_value=config.model.clip_value, ) else: logging.info('Unknown model') exit(0) if config.gpu and torch.cuda.is_available(): model = model.cuda() logging.info('Loaded model') logging.info('Total number of parameters %d', model.calculate_num_params()) # Setup optimizer if config.optimizer == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=config.learning_rate, momentum=config.momentum) if config.optimizer == 'rmsprop': optimizer = torch.optim.RMSprop( model.parameters(), lr=config.learning_rate, momentum=config.momentum, ) if config.optimizer == 'adam': optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate) if config.scheduler is not None: scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode='min', factor=config.scheduler.factor, patience=config.scheduler.patience, verbose=config.scheduler.verbose, threshold=config.scheduler.threshold, ) optimizer = (optimizer, scheduler) if config.load: model, optimizer, train_data, step = utils.load_checkpoint( model, optimizer, train_data, config.load, ) return model, optimizer, loss, train_data, validation_data