def main(): model = LSTM(settings.vocab_size, settings.word_embedding_size, settings.hidden_size, settings.num_layers, settings.out_dim, settings.drop_out) ''' pre-train word embedding init ''' dataset = Dataset(args.data) model.word_embed.weight = nn.Parameter(torch.from_numpy(dataset.get_wordembedding())) if torch.cuda.is_available(): torch.cuda.manual_seed(settings.seed) model.cuda() optimizer = optim.SGD(model.parameters(), lr=settings.lr, weight_decay=1e-5) criteria = nn.CrossEntropyLoss() best_dev_acc = 0.0 best_test_acc = 0.0 for i in xrange(dataset.size/settings.batch_size*settings.max_epochs): batch_data = dataset.get_batch() loss = train(model, batch_data, optimizer, criteria) if (i+1) % settings.validate_freq == 0: print "validating..." dev_acc = test(model, dataset.dev_data) test_acc = test(model, dataset.test_data) if dev_acc > best_dev_acc: best_dev_acc = dev_acc best_test_acc = test_acc torch.save(model, os.path.join(args.model_dir, "sa_{}.model".format(best_dev_acc))) with open(os.path.join(args.model_dir, "log.txt"), "a") as logger: logger.write("epoch: {}, dev acc: {}, test acc: {}, " \ "batch loss: {}, best dev acc:{}, best test acc:{}\n".format(i*settings.batch_size/float(dataset.size), dev_acc, test_acc, loss.cpu().numpy()[0], best_dev_acc, best_test_acc)) print "epoch: {}, dev acc: {}, test acc: {}, " \ "batch loss: {}, best dev acc:{}, best test acc:{}".format(i*settings.batch_size/float(dataset.size), dev_acc, test_acc, loss.cpu().numpy()[0], best_dev_acc, best_test_acc)
def main(args): if args.model == 'base': postprocessing = None elif args.model == 'jump': postprocessing = pick_fix_length(400, PAD_TOKEN) TEXT = data.Field(lower=True, postprocessing=postprocessing, pad_token=PAD_TOKEN, include_lengths=True) LABEL = data.Field(sequential=False, pad_token=None, unk_token=None) train, test = datasets.IMDB.splits(TEXT, LABEL) TEXT.build_vocab(train) LABEL.build_vocab(train) train_iter, test_iter = data.BucketIterator.splits( (train, test), batch_sizes=(args.batch, args.batch * 4), device=args.gpu, repeat=False, sort_within_batch=True) if args.model == 'base': model = LSTM(len(TEXT.vocab), 300, 128, len(LABEL.vocab)) elif args.model == 'jump': model = LSTMJump(len(TEXT.vocab), 300, 128, len(LABEL.vocab), args.R, args.K, args.N, 80, 8) model.load_pretrained_embedding( get_word2vec(TEXT.vocab.itos, '.vector_cache/GoogleNews-vectors-negative300.bin')) model.cuda(args.gpu) optimizer = optim.Adam(model.parameters(), lr=args.lr) max_accuracy = 0 for i in range(args.epoch): print('Epoch: {}'.format(i + 1)) sum_loss = 0 model.train() for batch in train_iter: optimizer.zero_grad() xs, lengths = batch.text loss = model(xs, lengths, batch.label) loss.backward() torch.nn.utils.clip_grad_norm(model.parameters(), 1.) optimizer.step() sum_loss += loss.data[0] print(f'Loss: {sum_loss / len(train_iter)}') sum_correct = 0 total = 0 model.eval() for batch in test_iter: y = model.inference(*batch.text) sum_correct += y.eq(batch.label).sum().float() total += batch.label.size(0) accuracy = (sum_correct / total).data[0] max_accuracy = max(accuracy, max_accuracy) print(f'Accuracy: {accuracy}') print(f'Max Accuracy: {max_accuracy}')
def main(opt): train_dataset = BADataset(opt.dataroot, opt.L, True, False, False) train_dataloader = BADataloader(train_dataset, batch_size=opt.batchSize, \ shuffle=True, num_workers=opt.workers, drop_last=True) valid_dataset = BADataset(opt.dataroot, opt.L, False, True, False) valid_dataloader = BADataloader(valid_dataset, batch_size=opt.batchSize, \ shuffle=True, num_workers=opt.workers, drop_last=True) test_dataset = BADataset(opt.dataroot, opt.L, False, False, True) test_dataloader = BADataloader(test_dataset, batch_size=opt.batchSize, \ shuffle=True, num_workers=opt.workers, drop_last=True) all_dataset = BADataset(opt.dataroot, opt.L, False, False, False) all_dataloader = BADataloader(all_dataset, batch_size=opt.batchSize, \ shuffle=False, num_workers=opt.workers, drop_last=False) opt.n_edge_types = train_dataset.n_edge_types opt.n_node = train_dataset.n_node opt.n_existing_node = all_node_num net = LSTM(opt, hidden_state=opt.state_dim*5) net.double() print(net) criterion = nn.CosineSimilarity(dim=1, eps=1e-6) if opt.cuda: net.cuda() criterion.cuda() optimizer = optim.Adam(net.parameters(), lr=opt.lr) early_stopping = EarlyStopping(patience=opt.patience, verbose=True) os.makedirs(OutputDir, exist_ok=True) train_loss_ls = [] valid_loss_ls = [] test_loss_ls = [] for epoch in range(0, opt.niter): train_loss = train(epoch, train_dataloader, net, criterion, optimizer, opt) valid_loss = valid(valid_dataloader, net, criterion, opt) test_loss = test(test_dataloader, net, criterion, opt) train_loss_ls.append(train_loss) valid_loss_ls.append(valid_loss) test_loss_ls.append(test_loss) early_stopping(valid_loss, net, OutputDir) if early_stopping.early_stop: print("Early stopping") break df = pd.DataFrame({'epoch':[i for i in range(1, len(train_loss_ls)+1)], 'train_loss': train_loss_ls, 'valid_loss': valid_loss_ls, 'test_loss': test_loss_ls}) df.to_csv(OutputDir + '/loss.csv', index=False) net.load_state_dict(torch.load(OutputDir + '/checkpoint.pt')) inference(all_dataloader, net, criterion, opt, OutputDir)
def create_model(): model = LSTM(input_size=input_size, num_classes=num_classes, hidden=args.hidden_unit, num_layers=args.num_layers, mean_after_fc=args.mean_after_fc, mask_empty_frame=args.mask_empty_frame) model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) return (model, optimizer)
def load_model(): print("==> loading existing lstm model") model_info = torch.load(model_path) model = LSTM(input_size=input_size, num_classes=model_info['num_classes'], hidden=model_info['hidden'], num_layers=model_info['num_layers'], mean_after_fc=model_info['mean_after_fc'], mask_empty_frame=model_info['mask_empty_frame']) model.cuda() model.load_state_dict(model_info['state_dict']) best_acc = model_info['best_acc'] optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) optimizer.load_state_dict(model_info['optimizer']) return (model, optimizer)
def test(test, feature, model, hidden, layer, output, index2char, index2phone, phone_map, phone2index): ans = open(output,'w') ans.write('id,phone_sequence\n') test_set = Feature_Dataset(feature,'test') if feature == 'mfcc': feature_dim = 39 elif feature == 'fbank': feature_dim = 69 elif feature == 'all': feature_dim = 108 if model == 'LSTM': test_model = LSTM(feature_dim, hidden, layer) elif model == 'BiLSTM': test_model = LSTM(feature_dim,hidden,layer,bi = True) elif model == 'C_RNN': group_size = 5 test_model = C_RNN(group_size, feature_dim, hidden, layer) checkpoint = torch.load(test) test_model.load_state_dict(checkpoint['model']) test_model.eval() if USE_CUDA: test_model = test_model.cuda() for i in tqdm(range(1,len(test_set)+1)): data = test_set[i-1] speaker = data[0] test_feature = Variable(data[1].float()) test_hidden = test_model.init_hidden() output = torch.max(test_model(test_feature,test_hidden),1)[1] result = test_trim(index2char,index2phone, phone_map, phone2index, output.data.cpu().numpy()) ans.write('{},{}\n'.format(speaker,result)) ans.close()
def main(): global args, best_prec1 best_prec1 = 1e6 args = parser.parse_args() args.original_lr = 1e-6 args.lr = 1e-6 args.momentum = 0.95 args.decay = 5 * 1e-4 args.start_epoch = 0 args.epochs = 5000 args.steps = [-1, 1, 100, 150] args.scales = [1, 1, 1, 1] args.workers = 4 args.seed = time.time() args.print_freq = 30 args.feature_size = 100 args.lSeq=5 wandb.config.update(args) wandb.run.name = f"Default_{wandb.run.name}" if (args.task == wandb.run.name) else f"{args.task}_{wandb.run.name}" conf = configparser.ConfigParser() conf.read(args.config) # print(conf) TRAIN_DIR = conf.get("lstm", "train") VALID_DIR = conf.get("lstm", "valid") TEST_DIR = conf.get("lstm", "test") LOG_DIR = conf.get("lstm", "log") create_dir_not_exist(LOG_DIR) # TODO: train_list to train_file train_list = [os.path.join(TRAIN_DIR, item) for item in os.listdir(TRAIN_DIR)] val_list = [os.path.join(VALID_DIR, item) for item in os.listdir(VALID_DIR)] os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu torch.cuda.manual_seed(int(args.seed)) model = LSTM(args.feature_size, args.feature_size, args.feature_size) model = model.cuda() criterion = nn.MSELoss().cuda() optimizer = torch.optim.Adam(model.parameters(), args.lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=args.decay) model = DataParallel_withLoss(model, criterion) for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) train(train_list, model, criterion, optimizer, epoch) prec1 = validate(val_list, model, criterion, epoch) with open(os.path.join(LOG_DIR, args.task + ".txt"), "a") as f: f.write("epoch " + str(epoch) + " MSELoss: " + str(float(prec1))) f.write("\n") wandb.save(os.path.join(LOG_DIR, args.task + ".txt")) is_best = prec1 < best_prec1 best_prec1 = min(prec1, best_prec1) print(' * best MSELoss {MSELoss:.3f} '.format(MSELoss=best_prec1)) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.pre, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best, args.task, epoch=epoch, path=os.path.join(LOG_DIR, args.task))
from Parameters import Parameter import torch.nn as nn import torch import torch.nn.functional as F import torch.optim as optim from DataProcessing import DataProcessing from model import LSTM import numpy as np import time models = LSTM().double() models = models.cuda() loss_function = nn.BCELoss(size_average=True, reduce=True) optimizer = optim.Adam(models.parameters()) # See what the scores are before training # Note that element i,j of the output is the score for tag j for word i. # Here we don't need to train, so the code is wrapped in torch.no_grad() DataObject = DataProcessing() for epoch in range( 300): # again, normally you would NOT do 300 epochs, it is toy data print("Beginning as a batch") StepsOfEpoch = 0 DataMethodObject = DataObject.FetchInputsAndLabels() for wav, label in DataMethodObject: then = time.time() StepsOfEpoch += 1 # Step 1. Remember that Pytorch accumulates gradients. # We need to clear them out before each instance
class dl_model(): def __init__(self, mode): # read config fiel which contains parameters self.config_file = read_yaml() self.mode = mode arch_name = '_'.join( [self.config_file['rnn'], str(self.config_file['num_layers']), str(self.config_file['hidden_dim'])]) self.config_file['dir']['models'] = self.config_file['dir']['models'].split('/')[0] + '_' + arch_name + '/' self.config_file['dir']['plots'] = self.config_file['dir']['plots'].split('/')[0] + '_' + arch_name + '/' #if not os.path.exists(self.config_file['dir']['models']): # os.mkdir(self.config_file['dir']['models']) #if not os.path.exists(self.config_file['dir']['plots']): # os.mkdir(self.config_file['dir']['plots']) if self.config_file['rnn'] == 'LSTM': from model import LSTM as Model elif self.config_file['rnn'] == 'GRU': from model import GRU as Model else: print("Model not implemented") exit(0) self.cuda = (self.config_file['cuda'] and torch.cuda.is_available()) self.output_dim = self.config_file['num_phones'] if mode == 'train' or mode == 'test': self.plots_dir = self.config_file['dir']['plots'] # store hyperparameters self.total_epochs = self.config_file['train']['epochs'] self.test_every = self.config_file['train']['test_every_epoch'] self.test_per = self.config_file['train']['test_per_epoch'] self.print_per = self.config_file['train']['print_per_epoch'] self.save_every = self.config_file['train']['save_every'] self.plot_every = self.config_file['train']['plot_every'] # dataloader which returns batches of data self.train_loader = timit_loader('train', self.config_file) self.test_loader = timit_loader('test', self.config_file) self.start_epoch = 1 self.test_acc = [] self.train_losses, self.test_losses = [], [] # declare model self.model = Model(self.config_file, weights=self.train_loader.weights) else: self.model = Model(self.config_file, weights=None) if self.cuda: self.model.cuda() # resume training from some stored model if self.mode == 'train' and self.config_file['train']['resume']: self.start_epoch, self.train_losses, self.test_losses, self.test_acc = self.model.load_model(mode, self.config_file[ 'rnn'], self.model.num_layers, self.model.hidden_dim) self.start_epoch += 1 # load best model for testing/feature extraction elif self.mode == 'test' or mode == 'test_one': self.model.load_model(mode, self.config_file['rnn'], self.model.num_layers, self.model.hidden_dim) self.replacement = {'aa': ['ao'], 'ah': ['ax', 'ax-h'], 'er': ['axr'], 'hh': ['hv'], 'ih': ['ix'], 'l': ['el'], 'm': ['em'], 'n': ['en', 'nx'], 'ng': ['eng'], 'sh': ['zh'], 'pau': ['pcl', 'tcl', 'kcl', 'bcl', 'dcl', 'gcl', 'h#', 'epi', 'q'], 'uw': ['ux']}
def main(args): # Create model directory if not os.path.exists(args.model_path): os.makedirs(args.model_path) # Build the data loader dataset, targets = load_dataset() print('\nThe data are loaded') # Build the models lstm = LSTM(args.input_size, args.output_size) print('The model is build') print(lstm) if torch.cuda.is_available(): lstm.cuda() # Loss and Optimizer criterion = nn.MSELoss() optimizer = torch.optim.Adam(lstm.parameters(), lr=args.learning_rate) # Train the Models toatal_time = 0 sm = 50 # start saving models after 100 epochs for epoch in range(args.num_epochs): print('\nepoch ' + str(epoch) + ':') avg_loss = 0 start = time.time() for i in range(0, len(dataset), args.batch_size): lstm.zero_grad() bi, bt = get_input(i, dataset, targets, args.batch_size) bi = bi.view(-1, 1, 32) bi = to_var(bi) bt = to_var(bt) bo = lstm(bi) loss = criterion(bo, bt) avg_loss = avg_loss + loss.item() loss.backward() optimizer.step() epoch_avg_loss = avg_loss / (len(dataset) / args.batch_size) print('--average loss:', epoch_avg_loss) end = time.time() epoch_time = end - start toatal_time = toatal_time + epoch_time print('time of per epoch:', epoch_time) # save the data into csv data = [epoch_avg_loss] with open(args.model_path + 'lstm_loss.csv', 'a+') as csvfile: writer = csv.writer(csvfile) writer.writerow(data) if epoch == sm: model_path = 'lstm_' + str(sm) + '.pkl' torch.save(lstm.state_dict(), os.path.join(args.model_path, model_path)) sm = sm + args.save_step model_path = 'lstm_final.pkl' torch.save(lstm.state_dict(), os.path.join(args.model_path, model_path))
def main(): ''' 主要目的為 計算測試資料的 error rate ''' parser = argparse.ArgumentParser() # Observed length of the trajectory parameter parser.add_argument('--obs_length', type=int, default=240, help='Observed length of the trajectory') # Predicted length of the trajectory parameter # parser.add_argument('--pred_length', type=int, default=378-60-1, # help='Predicted length of the trajectory') parser.add_argument('--pred_length', type=int, default=240, help='Predicted length of the trajectory') # Model to be loaded parser.add_argument('--epoch', type=int, default=199, help='Epoch of model to be loaded') # cuda support parser.add_argument('--use_cuda', action="store_true", default=True, help='Use GPU or not') # gru model parser.add_argument('--gru', action="store_true", default=False, help='True : GRU cell, False: LSTM cell') # method selection parser.add_argument( '--method', type=int, default=1, help= 'Method of lstm will be used (1 = social lstm, 2 = obstacle lstm, 3 = vanilla lstm)' ) # Parse the parameters sample_args = parser.parse_args() # for drive run prefix = '' f_prefix = '.' method_name = "VANILLALSTM" model_name = "LSTM" save_tar_name = method_name + "_lstm_model_" if sample_args.gru: model_name = "GRU" save_tar_name = method_name + "_gru_model_" print("Selected method name: ", method_name, " model name: ", model_name) # Save directory save_directory = os.path.join(f_prefix, 'model/', method_name, model_name) # plot directory for plotting in the future plot_directory = os.path.join(f_prefix, 'plot/', method_name, model_name) result_directory = os.path.join(f_prefix, 'result/', method_name) plot_test_file_directory = 'test' # Define the path for the config file for saved args with open(os.path.join(save_directory, 'config.pkl'), 'rb') as f: saved_args = pickle.load(f) seq_lenght = sample_args.pred_length + sample_args.obs_length # Create the DataLoader object dataloader = DataLoader(f_prefix, 1, sample_args.pred_length + sample_args.obs_length, forcePreProcess=True, infer=True) create_directories(os.path.join(result_directory, model_name), dataloader.get_all_directory_namelist()) create_directories(plot_directory, [plot_test_file_directory]) dataloader.reset_batch_pointer(valid=False) dataset_pointer_ins = dataloader.dataset_pointer smallest_err = 100000 smallest_err_iter_num = -1 origin = (0, 0) reference_point = (0, 1) submission_store = [] # store submission data points (txt) result_store = [] # store points for plotting # Initialize net net = LSTM(saved_args, True) if sample_args.use_cuda: net = net.cuda() # Get the checkpoint path checkpoint_path = os.path.join( save_directory, save_tar_name + str(sample_args.epoch) + '.tar') if os.path.isfile(checkpoint_path): print('Loading checkpoint') checkpoint = torch.load(checkpoint_path) model_epoch = checkpoint['epoch'] net.load_state_dict(checkpoint['state_dict']) print('Loaded checkpoint at epoch', model_epoch) results_it = [] for iterator in range(50): x_seq_arr = [] ret_x_seq_arr = [] error_arr = [] expected_day_arr = [] predicted_day_arr = [] total_error = 0 for batch in range(dataloader.num_batches): # Get data x, y, d = dataloader.next_batch(randomUpdate=False) # Get the sequence x_seq, y_seq, d_seq = x[0], y[0], d[0] x_seq = np.array(x_seq) ''' x_seq = dataloader.inverse_transform_MinMaxScaler(x_seq) print('{}/{}'.format(batch, dataloader.num_batches)) x_seq[sample_args.obs_length:,-2]= 17 x_seq[sample_args.obs_length:,-1]= 28 x_seq = dataloader.fit_transform_MinMaxScaler(x_seq) ''' x_seq = Variable(torch.from_numpy(x_seq).float()) temp = x_seq[:, -2:] # x_seq = x_seq[:,:-2] if sample_args.use_cuda: x_seq = x_seq.cuda() temp = temp.cuda() obs_data = x_seq[:sample_args.obs_length] ret_x_seq = sample(sample_args, x_seq, temp, net) error = get_mean_error(x_seq[sample_args.obs_length:, :-2], ret_x_seq[sample_args.obs_length:, :-2], False) total_error += error # 顯示預測 # x_seq = result[0] x_seq = x_seq.data.cpu().numpy() # print(x_seq.size()) # x_seq = np.reshape(x_seq,(x_seq.shape[0], saved_args.input_size)) x_seq = dataloader.inverse_transform_MinMaxScaler(x_seq) # ret_x_seq = result[1] ret_x_seq = ret_x_seq.data.cpu().numpy() # ret_x_seq = np.reshape(ret_x_seq,(ret_x_seq.shape[0], saved_args.input_size)) ret_x_seq = dataloader.inverse_transform_MinMaxScaler(ret_x_seq) gt = (x_seq[:, 0] - x_seq[:, 2]) / (x_seq[:, 1] - x_seq[:, 0]) pred = (ret_x_seq[:, 0] - ret_x_seq[:, 2]) / (ret_x_seq[:, 1] - ret_x_seq[:, 0]) gt2 = gt[sample_args.obs_length:] pred2 = pred[sample_args.obs_length:] expected_day = np.mean(gt2) predicted_day = np.mean(pred2) # print(expected_day, predicted_day, expected_day-predicted_day) # print('Error: ',error) expected_day = np.mean(gt2) predicted_day = np.mean(pred2) x_seq_arr.append(x_seq) ret_x_seq_arr.append(ret_x_seq) error_arr.append(error.data.cpu().numpy()) expected_day_arr.append(expected_day) predicted_day_arr.append(predicted_day) # fig, axs = plt.subplots(6, 1) # axs[0].plot(ret_x_seq[:,0], color = 'blue' , label = 'Predict h1', linestyle='--', marker='^') # axs[0].plot(x_seq[:,0], color = 'red', label = 'Real h1', linestyle='-', marker='.') # axs[1].plot(ret_x_seq[:,1], color = 'blue' , label = 'Predict h2', linestyle='--', marker='^') # axs[1].plot(x_seq[:,1], color = 'red', label = 'Real h2', linestyle='-', marker='.') # axs[2].plot(ret_x_seq[:,2], color = 'blue' , label = 'Predict h3', linestyle='--', marker='^') # axs[2].plot(x_seq[:,2], color = 'red', label = 'Real h3', linestyle='-', marker='.') # axs[3].plot(pred, color = 'blue' , label = 'Predict h3', linestyle='--', marker='^') # axs[3].plot(gt, color = 'red', label = 'Real h3', linestyle='-', marker='.') # axs[4].plot(ret_x_seq[:,-2], color = 'blue' , label = 'Predict Tevwi', linestyle='--', marker='^') # axs[4].plot(x_seq[:,-2], color = 'red', label = 'Real Tevwi', linestyle='-', marker='.') # axs[5].plot(ret_x_seq[:,-1], color = 'blue' , label = 'Predict Tcdwi', linestyle='--', marker='^') # axs[5].plot(x_seq[:,-1], color = 'red', label = 'Real Tcdwi', linestyle='-', marker='.') # for ax in axs: # ax.legend() # ax.grid() # plt.show() total_error = total_error / dataloader.num_batches if total_error < smallest_err: print("**********************************************************") print('Best iteration has been changed. Previous best iteration: ', smallest_err_iter_num, 'Error: ', smallest_err) print('New best iteration : ', iterator, 'Error: ', total_error) smallest_err_iter_num = iterator smallest_err = total_error results_it.append((sample_args.pred_length, sample_args.obs_length, x_seq_arr, ret_x_seq_arr, error_arr)) dataloader.write_to_plot_file([results_it[smallest_err_iter_num]], os.path.join(plot_directory, plot_test_file_directory))
return Variable(tmp) use_gpu = torch.cuda.is_available() # print(use_gpu) input_size = 900 output_size = 900 hidden_dim = 2000 num_layer = 4 model = LSTM(input_size, hidden_dim, num_layer, output_size) loss_function = nn.MSELoss() # optimizer = torch.optim.Adam(model.parameters(), lr=0.001) optimizer = optim.RMSprop(model.parameters(), lr=0.001, alpha=0.9) if use_gpu: model = model.cuda() root_path = "dataset" data_num = 100 time_step = 5 datalist = create_datalist(root_path) train_data, test_data = create_dataset(data_num, datalist, time_step) # print(len(train_data)) #17*80 # print(len(test_data)) #17*20 def train(epoch): for step, input_data in enumerate(train_data, 1): seq = ToVariable(input_data[0]) outs = ToVariable(input_data[1]) if use_gpu:
from tqdm import tqdm import os import sys from yahoo_fin.stock_info import get_day_gainers batch_size = 3 input_sizes = [1, 6, 20] hidden_size = 300 num_layers = 2 dropout = 0.5 output_size = 5 lr = 0.0001 seq_length = 20 epochs = 100000 model = LSTM(input_sizes, hidden_size, num_layers, dropout, output_size) model.cuda() csv = pd.read_csv("nasdaq.csv") stocks = {} num_models = 15 hidden = {} ppo = {} reward_list = {} last_profit = {} for i in range(num_models): hidden[i] = model.init_state(batch_size) stocks[i] = random.choices(csv["Symbol"], k=batch_size) ppo[i] = DQN(model, lr, stocks[i], output_size, hidden[i], batch_size) reward_list[i] = deque(maxlen=100) last_profit[i] = 0 rewards = {} for e in tqdm(range(epochs)):
# train = False, debug=False) # lafan_loader_test = DataLoader(lafan_data_test, \ # batch_size=opt['train']['batch_size'], \ # shuffle=True, num_workers=opt['data']['num_workers']) ## initialize model ## state_encoder = StateEncoder(in_dim=opt['model']['state_input_dim']) state_encoder = state_encoder.cuda() offset_encoder = OffsetEncoder(in_dim=opt['model']['offset_input_dim']) offset_encoder = offset_encoder.cuda() target_encoder = TargetEncoder(in_dim=opt['model']['target_input_dim']) target_encoder = target_encoder.cuda() lstm = LSTM(in_dim=opt['model']['lstm_dim'], hidden_dim=opt['model']['lstm_dim'] * 2) lstm = lstm.cuda() decoder = Decoder(in_dim=opt['model']['lstm_dim'] * 2, out_dim=opt['model']['state_input_dim']) decoder = decoder.cuda() if len(opt['train']['pretrained']) > 0: state_encoder.load_state_dict( torch.load( os.path.join(opt['train']['pretrained'], 'state_encoder.pkl'))) offset_encoder.load_state_dict( torch.load( os.path.join(opt['train']['pretrained'], 'offset_encoder.pkl'))) target_encoder.load_state_dict( torch.load( os.path.join(opt['train']['pretrained'], 'target_encoder.pkl')))
def train(feature,label, epochs, model, layer, hidden, save,postfix, index2char, index2phone, phone_map, phone2index): dataset = Feature_Dataset(feature,'train') train_size = int(0.9*len(dataset)) if feature == 'mfcc': feature_dim = 39 elif feature == 'fbank': feature_dim = 69 elif feature == 'all': feature_dim = 108 print("Building model and optimizer...") if model == 'LSTM': train_model = LSTM(feature_dim,hidden,layer) elif model == 'C_RNN': group_size = 5 train_model = C_RNN(group_size,feature_dim,hidden,layer) elif model == 'BiLSTM': train_model = LSTM(feature_dim, hidden, layer, bi = True) if USE_CUDA: train_model = train_model.cuda() optimizer = optim.Adam(train_model.parameters(), lr = 0.005) #optimizer = optim.SGD(train_model.parameters(),lr = 0.1) criterion = nn.NLLLoss() if USE_CUDA: criterion = criterion.cuda() for epoch in range(1,epochs+1): print("Epoch {}".format(epoch)) epoch_loss = 0 epoch_edit = 0 for i in tqdm(range(1,train_size+1)): data = dataset[i-1] speaker = data[0] train_model.zero_grad() input_hidden = train_model.init_hidden() train_feature = Variable(data[1].float()) output = train_model(train_feature,input_hidden) output_seq = test_trim(index2char, index2phone, phone_map, phone2index, torch.max(output,1)[1].data.cpu().numpy()) target_seq = trim_and_map(index2char,index2phone, phone_map, phone2index, [[int(l)] for l in label[speaker]]) target = Variable(torch.from_numpy(np.array(label[speaker]).astype('int'))) target = target.cuda() if USE_CUDA else target loss = criterion(output,target) edit = editdistance.eval(output_seq,target_seq) epoch_loss += loss.data[0]/train_size epoch_edit += edit/train_size loss.backward() optimizer.step() print("Negative log-likelihood: {}".format(epoch_loss)) print("Edit distance: {} ".format(epoch_edit)) val_loss = 0 val_edit = 0 for i in tqdm(range(train_size+1,len(dataset)+1)): data = dataset[i-1] speaker = data[0] val_feature = Variable(data[1].float()) output = train_model(val_feature,train_model.init_hidden()) target = Variable(torch.from_numpy(np.array(label[speaker]).astype('int'))) target = target.cuda() if USE_CUDA else target val_loss += criterion(output,target).data[0] output_seq = test_trim(index2char,index2phone, phone_map, phone2index,torch.max(output,1)[1].data.cpu().numpy()) target_seq = trim_and_map(index2char,index2phone, phone_map, phone2index,[[int(l)] for l in label[speaker]]) val_edit += editdistance.eval(output_seq,target_seq) print("Validation loss: {}".format(val_loss/(len(dataset)-train_size))) print("Validation edit distance: {}".format(val_edit/(len(dataset)-train_size))) if epoch%save == 0: directory = os.path.join(SAVE_DIR, feature, model, '{}-{}{}'.format(layer,hidden,postfix)) if not os.path.exists(directory): os.makedirs(directory) torch.save({ 'model': train_model.state_dict(), 'opt': optimizer.state_dict(), 'val_loss': val_loss/(len(dataset)-train_size), 'val_edit': val_edit/(len(dataset)-train_size), }, os.path.join(directory, '{}.tar'.format(epoch))) print("Finish training")
class dl_model(): def __init__(self, mode): # read config fiel which contains parameters self.config_file = read_yaml() self.mode = mode arch_name = '_'.join([ self.config_file['rnn'], str(self.config_file['num_layers']), str(self.config_file['hidden_dim']) ]) self.config_file['dir']['models'] = self.config_file['dir'][ 'models'].split('/')[0] + '_' + arch_name + '/' self.config_file['dir']['plots'] = self.config_file['dir'][ 'plots'].split('/')[0] + '_' + arch_name + '/' #if not os.path.exists(self.config_file['dir']['models']): # os.mkdir(self.config_file['dir']['models']) #if not os.path.exists(self.config_file['dir']['plots']): # os.mkdir(self.config_file['dir']['plots']) if self.config_file['rnn'] == 'LSTM': from model import LSTM as Model elif self.config_file['rnn'] == 'GRU': from model import GRU as Model else: print("Model not implemented") exit(0) self.cuda = (self.config_file['cuda'] and torch.cuda.is_available()) self.output_dim = self.config_file['num_phones'] if mode == 'train' or mode == 'test': self.plots_dir = self.config_file['dir']['plots'] # store hyperparameters self.total_epochs = self.config_file['train']['epochs'] self.test_every = self.config_file['train']['test_every_epoch'] self.test_per = self.config_file['train']['test_per_epoch'] self.print_per = self.config_file['train']['print_per_epoch'] self.save_every = self.config_file['train']['save_every'] self.plot_every = self.config_file['train']['plot_every'] # dataloader which returns batches of data self.train_loader = timit_loader('train', self.config_file) self.test_loader = timit_loader('test', self.config_file) self.start_epoch = 1 self.test_acc = [] self.train_losses, self.test_losses = [], [] # declare model self.model = Model(self.config_file, weights=self.train_loader.weights) else: self.model = Model(self.config_file, weights=None) if self.cuda: self.model.cuda() # resume training from some stored model if self.mode == 'train' and self.config_file['train']['resume']: self.start_epoch, self.train_losses, self.test_losses, self.test_acc = self.model.load_model( mode, self.config_file['rnn'], self.model.num_layers, self.model.hidden_dim) self.start_epoch += 1 # load best model for testing/feature extraction elif self.mode == 'test' or mode == 'test_one': self.model.load_model(mode, self.config_file['rnn'], self.model.num_layers, self.model.hidden_dim) self.replacement = { 'aa': ['ao'], 'ah': ['ax', 'ax-h'], 'er': ['axr'], 'hh': ['hv'], 'ih': ['ix'], 'l': ['el'], 'm': ['em'], 'n': ['en', 'nx'], 'ng': ['eng'], 'sh': ['zh'], 'pau': ['pcl', 'tcl', 'kcl', 'bcl', 'dcl', 'gcl', 'h#', 'epi', 'q'], 'uw': ['ux'] } def train(self): print("Starting training at t =", datetime.datetime.now()) print('Batches per epoch:', len(self.train_loader)) self.model.train() # when to print losses during the epoch print_range = list( np.linspace(0, len(self.train_loader), self.print_per + 2, dtype=np.uint32)[1:-1]) if self.test_per == 0: test_range = [] else: test_range = list( np.linspace(0, len(self.train_loader), self.test_per + 2, dtype=np.uint32)[1:-1]) for epoch in range(self.start_epoch, self.total_epochs + 1): print("Epoch:", str(epoch)) epoch_loss = 0.0 i = 0 while True: i += 1 inputs, labels, lens, status = self.train_loader.return_batch() inputs, labels, lens = torch.from_numpy( np.array(inputs)).float(), torch.from_numpy( np.array(labels)).long(), torch.from_numpy( np.array(lens)).long() if self.cuda: inputs = inputs.cuda() labels = labels.cuda() lens = lens.cuda() # zero the parameter gradients self.model.optimizer.zero_grad() # forward + backward + optimize outputs = self.model(inputs, lens) loss = self.model.calculate_loss(outputs, labels, lens) loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.config_file['grad_clip']) self.model.optimizer.step() # store loss epoch_loss += loss.item() if i in print_range: try: print( 'After %i batches, Current Loss = %.7f, Avg. Loss = %.7f' % (i + 1, epoch_loss / (i + 1), np.mean([x[0] for x in self.train_losses]))) except: pass if i in test_range: self.test(epoch) self.model.train() if status == 1: break self.train_losses.append( (epoch_loss / len(self.train_loader), epoch)) # test every 5 epochs in the beginning and then every fixed no of epochs specified in config file # useful to see how loss stabilises in the beginning if epoch % 5 == 0 and epoch < self.test_every: self.test(epoch) self.model.train() elif epoch % self.test_every == 0: self.test(epoch) self.model.train() # plot loss and accuracy if epoch % self.plot_every == 0: self.plot_loss_acc(epoch) # save model if epoch % self.save_every == 0: self.model.save_model(False, epoch, self.train_losses, self.test_losses, self.test_acc, self.config_file['rnn'], self.model.num_layers, self.model.hidden_dim) def test(self, epoch=None): self.model.eval() correct = 0 total = 0 correct_nopause = 0 total_nopause = 0 pause_id = 27 # confusion matrix data is stored in this matrix matrix = np.zeros((self.output_dim, self.output_dim)) pad_id = self.output_dim print("Testing...") print('Total batches:', len(self.test_loader)) test_loss = 0 with torch.no_grad(): while True: inputs, labels, lens, status = self.train_loader.return_batch() inputs, labels, lens = torch.from_numpy( np.array(inputs)).float(), torch.from_numpy( np.array(labels)).long(), torch.from_numpy( np.array(lens)).long() # print(inputs.shape, labels.shape, lens) if self.cuda: inputs = inputs.cuda() labels = labels.cuda() lens = lens.cuda() # zero the parameter gradients self.model.optimizer.zero_grad() # forward + backward + optimize outputs = self.model(inputs, lens) loss = self.model.calculate_loss(outputs, labels, lens) test_loss += loss.item() outputs = outputs.cpu().numpy() labels = labels.cpu().numpy( )[:, : outputs.shape[1]] # remove extra padding from current batch outputs = np.reshape( outputs[:, :, :-1], (-1, self.output_dim)) # ignore blank token labels = np.reshape(labels, (-1)) total_pad_tokens = np.sum(labels == pad_id) argmaxed = np.argmax(outputs, 1) # total number of correct phone predictions for i in range(len(labels)): if labels[i] != pause_id and labels[ i] != pad_id: # is not pause or pad if argmaxed[i] == labels[i]: correct_nopause += 1 total_nopause += 1 correct += np.sum(argmaxed == labels) total += len(argmaxed) - total_pad_tokens # matrix[i][j] denotes the no of examples classified by model as class j but have ground truth label i for k in range(argmaxed.shape[0]): if labels[k] == pad_id: continue matrix[labels[k]][argmaxed[k]] += 1 if status == 1: break for i in range(self.output_dim): matrix[i] /= sum(matrix[i]) acc_all = correct / total acc_nopause = correct_nopause / total_nopause print(acc_all, acc_nopause) test_loss /= len(self.test_loader) # plot confusion matrix if epoch is not None: filename = self.plots_dir + 'confmat_epoch_acc_' + str( epoch) + '_' + str(int(100 * acc_all)) + '.png' plt.clf() plt.imshow(matrix, cmap='hot', interpolation='none') plt.gca().invert_yaxis() plt.xlabel("Predicted Label ID") plt.ylabel("True Label ID") plt.colorbar() plt.savefig(filename) print("Testing accuracy: All - %.4f, No Pause - %.4f , Loss: %.7f" % (acc_all, acc_nopause, test_loss)) self.test_acc.append((acc_all, epoch)) self.test_losses.append((test_loss, epoch)) # if testing loss is minimum, store it as the 'best.pth' model, which is used for feature extraction if test_loss == min([x[0] for x in self.test_losses]): print("Best new model found!") self.model.save_model(True, epoch, self.train_losses, self.test_losses, self.test_acc, self.config_file['rnn'], self.model.num_layers, self.model.hidden_dim) return acc_all # Called during feature extraction. Takes log mel filterbank energies as input and outputs the phone predictions def test_one(self, file_path): (rate, sig) = wav.read(file_path) assert rate == 16000 # sig ranges from -32768 to +32768 AND NOT -1 to +1 feat, energy = fbank(sig, samplerate=rate, nfilt=self.config_file['feat_dim'], winfunc=np.hamming) tsteps, hidden_dim = feat.shape # calculate log mel filterbank energies for complete file feat_log_full = np.reshape(np.log(feat), (1, tsteps, hidden_dim)) lens = np.array([tsteps]) inputs, lens = torch.from_numpy( np.array(feat_log_full)).float(), torch.from_numpy( np.array(lens)).long() id_to_phone = {v[0]: k for k, v in self.model.phone_to_id.items()} self.model.eval() with torch.no_grad(): if self.cuda: inputs = inputs.cuda() lens = lens.cuda() # Pass through model a = time.time() outputs = self.model(inputs, lens).cpu().numpy() print(time.time() - a) # Since only one example per batch and ignore blank token outputs = outputs[0, :, :-1] softmax = np.exp(outputs) / np.sum(np.exp(outputs), axis=1)[:, None] return softmax, id_to_phone # Test for each wav file in the folder and also compare with ground truth def test_folder(self, test_folder, top_n=1, show_graphs=False): accs = [] for wav_file in sorted(os.listdir(test_folder)): # Read input test file wav_path = os.path.join(test_folder, wav_file) dump_path = wav_path[:-4] + '_pred.txt' # Read only wav if wav_file == '.DS_Store' or wav_file.split( '.')[-1] != 'wav': # or os.path.exists(dump_path): continue (rate, sig) = wav.read(wav_path) assert rate == 16000 # sig ranges from -32768 to +32768 AND NOT -1 to +1 feat, energy = fbank(sig, samplerate=rate, nfilt=self.config_file['feat_dim'], winfunc=np.hamming) tsteps, hidden_dim = feat.shape # calculate log mel filterbank energies for complete file feat_log_full = np.reshape(np.log(feat), (1, tsteps, hidden_dim)) lens = np.array([tsteps]) inputs, lens = torch.from_numpy( np.array(feat_log_full)).float(), torch.from_numpy( np.array(lens)).long() id_to_phone = {v[0]: k for k, v in self.model.phone_to_id.items()} self.model.eval() with torch.no_grad(): if self.cuda: inputs = inputs.cuda() lens = lens.cuda() # Pass through model outputs = self.model(inputs, lens).cpu().numpy() # Since only one example per batch and ignore blank token outputs = outputs[0, :, :-1] softmax = np.exp(outputs) / np.sum(np.exp(outputs), axis=1)[:, None] softmax_probs = np.max(softmax, axis=1) # print(softmax) # Take argmax ot generate final string argmaxed = np.argmax(outputs, axis=1) final_str = [id_to_phone[a] for a in argmaxed] # Generate dumpable format of phone, start time and end time ans = compress_seq(final_str) print("Predicted:", ans) phone_path = wav_path[:-3] + 'PHN' # If .PHN file exists, report accuracy if os.path.exists(phone_path): grtuth = read_phones(phone_path, self.replacement) print("Ground truth:", grtuth) unrolled_truth = [] for elem in grtuth: unrolled_truth += [elem[0]] * (elem[2] - elem[1] + 1) truth_softmax = [] top_n_softmax = [[] for x in range(top_n)] # Check for top-n correct, total = 0, 0 for i in range(min(len(unrolled_truth), len(final_str))): truth_softmax.append(softmax[i][self.model.phone_to_id[ unrolled_truth[i]][0]]) indices = list(range(len(final_str))) zipped = zip(indices, outputs[i]) desc = sorted(zipped, key=lambda x: x[1], reverse=True) cur_frame_res = [id_to_phone[x[0]] for x in desc][:top_n] for k in range(top_n): top_n_softmax[k].append(softmax[i][ self.model.phone_to_id[cur_frame_res[k]][0]]) if unrolled_truth[i] in cur_frame_res: # print truth softmax # if unrolled_truth[i] != cur_frame_res[0]: # print(i, truth_softmax[-1]) correct += 1 total += 1 accs.append(correct / total) if show_graphs: # Plot actual softmax and predicted softmax for i in range(top_n): plt.plot(top_n_softmax[i], label=str(i + 1) + ' prob.') print(top_n_softmax) plt.plot(truth_softmax, label='Ground Truth prob', alpha=0.6) plt.xlabel("Frame number") plt.ylabel("Prob") plt.legend() plt.show() with open(dump_path, 'w') as f: f.write('Predicted:\n') for t in ans: f.write(' '.join(str(s) for s in t) + '\n') f.write('\nGround Truth:\n') for t in grtuth: f.write(' '.join(str(s) for s in t) + '\n') f.write('\nTop-' + str(top_n) + ' accuracy is ' + str(correct / total)) else: with open(dump_path, 'w') as f: f.write('Predicted:\n') for t in ans: f.write(' '.join(str(s) for s in t) + '\n') print(accs) # take train/test loss and test accuracy input and plot it over time def plot_loss_acc(self, epoch): plt.clf() plt.plot([x[1] for x in self.train_losses], [x[0] for x in self.train_losses], c='r', label='Train') plt.plot([x[1] for x in self.test_losses], [x[0] for x in self.test_losses], c='b', label='Test') plt.title("Train/Test loss") plt.xlabel("Epochs") plt.ylabel("Loss") plt.legend() plt.grid(True) filename = self.plots_dir + 'loss' + '_' + str(epoch) + '.png' plt.savefig(filename) plt.clf() plt.plot([x[1] for x in self.test_acc], [100 * x[0] for x in self.test_acc], c='r') plt.title("Test accuracy") plt.xlabel("Epochs") plt.ylabel("Accuracy in %%") plt.grid(True) filename = self.plots_dir + 'test_acc' + '_' + str(epoch) + '.png' plt.savefig(filename) print("Saved plots")
def train(args): prefix = '' f_prefix = '.' if not os.path.isdir("log/"): print("Directory creation script is running...") subprocess.call([f_prefix+'/make_directories.sh']) args.freq_validation = np.clip(args.freq_validation, 0, args.num_epochs) validation_epoch_list = list(range(args.freq_validation, args.num_epochs+1, args.freq_validation)) validation_epoch_list[-1]-=1 # Create the data loader object. This object would preprocess the data in terms of # batches each of size args.batch_size, of length args.seq_length dataloader = DataLoader(f_prefix, args.batch_size, args.seq_length, args.num_validation, forcePreProcess=True) method_name = "VANILLALSTM" model_name = "LSTM" save_tar_name = method_name+"_lstm_model_" if args.gru: model_name = "GRU" save_tar_name = method_name+"_gru_model_" # Log directory log_directory = os.path.join(prefix, 'log/') plot_directory = os.path.join(prefix, 'plot/', method_name, model_name) plot_train_file_directory = 'validation' # Logging files log_file_curve = open(os.path.join(log_directory, method_name, model_name,'log_curve.txt'), 'w+') log_file = open(os.path.join(log_directory, method_name, model_name, 'val.txt'), 'w+') # model directory save_directory = os.path.join(f_prefix, 'model') # Save the arguments int the config file with open(os.path.join(save_directory, method_name, model_name,'config.pkl'), 'wb') as f: pickle.dump(args, f) # Path to store the checkpoint file def checkpoint_path(x): return os.path.join(save_directory, method_name, model_name, save_tar_name+str(x)+'.tar') # model creation net = LSTM(args) if args.use_cuda: net = net.cuda() # optimizer = torch.optim.Adagrad(net.parameters(), weight_decay=args.lambda_param) optimizer = torch.optim.RMSprop(net.parameters(), lr=args.learning_rate) loss_f = torch.nn.MSELoss() learning_rate = args.learning_rate best_val_loss = 100 best_val_data_loss = 100 smallest_err_val = 100000 smallest_err_val_data = 100000 best_epoch_val = 0 best_epoch_val_data = 0 best_err_epoch_val = 0 best_err_epoch_val_data = 0 all_epoch_results = [] grids = [] num_batch = 0 # Training for epoch in range(args.num_epochs): print('****************Training epoch beginning******************') if dataloader.additional_validation and (epoch-1) in validation_epoch_list: dataloader.switch_to_dataset_type(True) dataloader.reset_batch_pointer(valid=False) loss_epoch = 0 # For each batch # num_batches 資料可以被分多少批 要跑幾個iter for batch in range(dataloader.num_batches): start = time.time() # print(dataloader.num_batches, dataloader.batch_size) # Get batch data x, y, d = dataloader.next_batch(randomUpdate=False) loss_batch = 0 # x_cat = Variable(torch.from_numpy(np.array(x[0])).float()) x_seq = np.array(x) y_seq = np.array(y) x_seq = Variable(torch.from_numpy(x_seq).float()) y_seq = Variable(torch.from_numpy(y_seq).float()) temp = x_seq[:,:,-2:] x_seq = x_seq[:,:,:-2] y_seq = y_seq[:,:,:3] hidden_states = Variable(torch.zeros(x_seq.size()[0], args.rnn_size)) cell_states = Variable(torch.zeros(x_seq.size()[0], args.rnn_size)) if args.use_cuda: x_seq = x_seq.cuda() y_seq = y_seq.cuda() temp = temp.cuda() hidden_states = hidden_states.cuda() cell_states = cell_states.cuda() # Zero out gradients net.zero_grad() optimizer.zero_grad() outputs, _, _ = net(x_seq, temp, hidden_states, cell_states) loss = loss_f(outputs, y_seq) loss_batch = loss.detach().item() # Compute gradients loss.backward() # Clip gradients torch.nn.utils.clip_grad_norm_(net.parameters(), args.grad_clip) # Update parameters optimizer.step() end = time.time() loss_epoch += loss_batch print('{}/{} (epoch {}), train_loss = {:.3f}, time/batch = {:.3f}'.format((batch+1) * dataloader.batch_size, dataloader.num_batches * dataloader.batch_size, epoch, loss_batch, end - start)) loss_epoch /= dataloader.num_batches print("Training epoch: "+str(epoch)+" loss: "+str(loss_epoch)) #Log loss values log_file_curve.write("Training epoch: "+str(epoch)+" loss: "+str(loss_epoch)+'\n') # Validation dataset if dataloader.additional_validation and (epoch) in validation_epoch_list: dataloader.switch_to_dataset_type() print('****************Validation with dataset epoch beginning******************') dataloader.reset_batch_pointer(valid=False) dataset_pointer_ins = dataloader.dataset_pointer validation_dataset_executed = True loss_epoch = 0 err_epoch = 0 num_of_batch = 0 smallest_err = 100000 #results of one epoch for all validation datasets epoch_result = [] #results of one validation dataset results = [] # For each batch for batch in range(dataloader.num_batches): # Get batch data x, y, d = dataloader.next_batch(randomUpdate=False) # Loss for this batch loss_batch = 0 err_batch = 0 # For each sequence for sequence in range(len(x)): # Get the sequence x_seq = x[sequence] y_seq = y[sequence] x_seq= np.array(x_seq) y_seq= np.array(y_seq)[:,:3] x_seq = Variable(torch.from_numpy(x_seq).float()) y_seq = Variable(torch.from_numpy(y_seq).float()) temp = x_seq[:,-2:] x_seq = x_seq[:,:-2] y_seq = y_seq[:,:3] if args.use_cuda: x_seq = x_seq.cuda() y_seq = y_seq.cuda() temp = temp.cuda() #will be used for error calculation orig_x_seq = y_seq.clone() # print(x_seq.size(), args.seq_length) with torch.no_grad(): hidden_states = Variable(torch.zeros(1, args.rnn_size)) cell_states = Variable(torch.zeros(1, args.rnn_size)) ret_x_seq = Variable(torch.zeros(args.seq_length, net.input_size)) # all_outputs = Variable(torch.zeros(1, args.seq_length, net.input_size)) # Initialize the return data structure if args.use_cuda: ret_x_seq = ret_x_seq.cuda() hidden_states = hidden_states.cuda() cell_states = cell_states.cuda() total_loss = 0 # For the observed part of the trajectory for tstep in range(args.seq_length): outputs, hidden_states, cell_states = net(x_seq[tstep].view(1, 1, net.input_size), temp[tstep].view(1, 1, temp.size()[-1]), hidden_states, cell_states) ret_x_seq[tstep, 0] = outputs[0,0,0] ret_x_seq[tstep, 1] = outputs[0,0,1] ret_x_seq[tstep, 2] = outputs[0,0,2] print(outputs.size(), ) loss = loss_f(outputs, y_seq[tstep].view(1, 1, y_seq.size()[1])) total_loss += loss total_loss = total_loss / args.seq_length #get mean and final error # print(ret_x_seq.size(), y_seq.size()) err = get_mean_error(ret_x_seq.data, y_seq.data, args.use_cuda) loss_batch += total_loss.item() err_batch += err print('Current file : ',' Batch : ', batch+1, ' Sequence: ', sequence+1, ' Sequence mean error: ', err, 'valid_loss: ',total_loss.item()) results.append((y_seq.data.cpu().numpy(), ret_x_seq.data.cpu().numpy())) loss_batch = loss_batch / dataloader.batch_size err_batch = err_batch / dataloader.batch_size num_of_batch += 1 loss_epoch += loss_batch err_epoch += err_batch epoch_result.append(results) all_epoch_results.append(epoch_result) if dataloader.num_batches != 0: loss_epoch = loss_epoch / dataloader.num_batches err_epoch = err_epoch / dataloader.num_batches # avarage_err = (err_epoch + f_err_epoch)/2 # Update best validation loss until now if loss_epoch < best_val_data_loss: best_val_data_loss = loss_epoch best_epoch_val_data = epoch if err_epoch<smallest_err_val_data: # Save the model after each epoch print('Saving model') torch.save({ 'epoch': epoch, 'state_dict': net.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, checkpoint_path(epoch)) smallest_err_val_data = err_epoch best_err_epoch_val_data = epoch print('(epoch {}), valid_loss = {:.3f}, valid_mean_err = {:.3f}'.format(epoch, loss_epoch, err_epoch)) print('Best epoch', best_epoch_val_data, 'Best validation loss', best_val_data_loss, 'Best error epoch',best_err_epoch_val_data, 'Best error', smallest_err_val_data) log_file_curve.write("Validation dataset epoch: "+str(epoch)+" loss: "+str(loss_epoch)+" mean_err: "+str(err_epoch.data.cpu().numpy())+'\n') optimizer = time_lr_scheduler(optimizer, epoch, lr_decay_epoch = args.freq_optimizer) if dataloader.valid_num_batches != 0: print('Best epoch', best_epoch_val, 'Best validation Loss', best_val_loss, 'Best error epoch',best_err_epoch_val, 'Best error', smallest_err_val) # Log the best epoch and best validation loss log_file.write('Validation Best epoch:'+str(best_epoch_val_data)+','+' Best validation Loss: '+str(best_val_data_loss)) if dataloader.additional_validation: print('Best epoch acording to validation dataset', best_epoch_val_data, 'Best validation Loss', best_val_data_loss, 'Best error epoch',best_err_epoch_val_data, 'Best error', smallest_err_val_data) log_file.write("Validation dataset Best epoch: "+str(best_epoch_val_data)+','+' Best validation Loss: '+str(best_val_data_loss)+'Best error epoch: ',str(best_err_epoch_val_data),'\n') #dataloader.write_to_plot_file(all_epoch_results[best_epoch_val_data], plot_directory) #elif dataloader.valid_num_batches != 0: # dataloader.write_to_plot_file(all_epoch_results[best_epoch_val], plot_directory) #else: if validation_dataset_executed: dataloader.switch_to_dataset_type(load_data=False) create_directories(plot_directory, [plot_train_file_directory]) dataloader.write_to_plot_file(all_epoch_results[len(all_epoch_results)-1], os.path.join(plot_directory, plot_train_file_directory)) # Close logging files log_file.close() log_file_curve.close()