def evaluate(args): label_map = load_label_map(args.dataset) n_classes = 50 if args.dataset == "include": n_classes = 263 if args.use_cnn: dataset = FeaturesDatset( features_dir=os.path.join(args.data_dir, f"{args.dataset}_test_features"), label_map=label_map, mode="test", ) else: dataset = KeypointsDataset( keypoints_dir=os.path.join(args.data_dir, f"{args.dataset}_test_keypoints"), use_augs=False, label_map=label_map, mode="test", max_frame_len=169, ) dataloader = data.DataLoader( dataset, batch_size=args.batch_size, shuffle=False, num_workers=4, pin_memory=True, ) if args.model == "lstm": config = LstmConfig() if args.use_cnn: config.input_size = CnnConfig.output_dim model = LSTM(config=config, n_classes=n_classes) else: config = TransformerConfig(size=args.transformer_size) if args.use_cnn: config.input_size = CnnConfig.output_dim model = Transformer(config=config, n_classes=n_classes) model = model.to(device) if args.use_pretrained == "evaluate": model, _, _ = load_pretrained(args, n_classes, model) print("### Model loaded ###") else: exp_name = get_experiment_name(args) model_path = os.path.join(args.save_path, exp_name) + ".pth" ckpt = torch.load(model_path) model.load_state_dict(ckpt["model"]) print("### Model loaded ###") test_loss, test_acc = validate(dataloader, model, device) print("Evaluation Results:") print(f"Loss: {test_loss}, Accuracy: {test_acc}")
def make_lstm(self, embedd_dim: int = None, hidden_size: int = None, clone=None): assert ((embedd_dim is not None) and (hidden_size is not None)) or (clone is not None) if clone is not None: model = LSTM(clone.embedding_dim, clone.hidden_dim, vocab_size=self.vocab_size, tagset_size=self.vocab_size) model.load_state_dict(clone.state_dict()) else: model = LSTM(embedd_dim, hidden_size, vocab_size=self.vocab_size, tagset_size=self.vocab_size) model = model.to(self.device) return model
def fit(args): exp_name = get_experiment_name(args) logging_path = os.path.join(args.save_path, exp_name) + ".log" logging.basicConfig(filename=logging_path, level=logging.INFO, format="%(message)s") seed_everything(args.seed) label_map = load_label_map(args.dataset) if args.use_cnn: train_dataset = FeaturesDatset( features_dir=os.path.join(args.data_dir, f"{args.dataset}_train_features"), label_map=label_map, mode="train", ) val_dataset = FeaturesDatset( features_dir=os.path.join(args.data_dir, f"{args.dataset}_val_features"), label_map=label_map, mode="val", ) else: train_dataset = KeypointsDataset( keypoints_dir=os.path.join(args.data_dir, f"{args.dataset}_train_keypoints"), use_augs=args.use_augs, label_map=label_map, mode="train", max_frame_len=169, ) val_dataset = KeypointsDataset( keypoints_dir=os.path.join(args.data_dir, f"{args.dataset}_val_keypoints"), use_augs=False, label_map=label_map, mode="val", max_frame_len=169, ) train_dataloader = data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=4, pin_memory=True, ) val_dataloader = data.DataLoader( val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=4, pin_memory=True, ) n_classes = 50 if args.dataset == "include": n_classes = 263 if args.model == "lstm": config = LstmConfig() if args.use_cnn: config.input_size = CnnConfig.output_dim model = LSTM(config=config, n_classes=n_classes) else: config = TransformerConfig(size=args.transformer_size) if args.use_cnn: config.input_size = CnnConfig.output_dim model = Transformer(config=config, n_classes=n_classes) model = model.to(device) optimizer = torch.optim.AdamW(model.parameters(), lr=args.learning_rate, weight_decay=0.01) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="max", factor=0.2) if args.use_pretrained == "resume_training": model, optimizer, scheduler = load_pretrained(args, n_classes, model, optimizer, scheduler) model_path = os.path.join(args.save_path, exp_name) + ".pth" es = EarlyStopping(patience=15, mode="max") for epoch in range(args.epochs): print(f"Epoch: {epoch+1}/{args.epochs}") train_loss, train_acc = train(train_dataloader, model, optimizer, device) val_loss, val_acc = validate(val_dataloader, model, device) logging.info( "Epoch: {}, train loss: {}, train acc: {}, val loss: {}, val acc: {}" .format(epoch + 1, train_loss, train_acc, val_loss, val_acc)) scheduler.step(val_acc) es( model_path=model_path, epoch_score=val_acc, model=model, optimizer=optimizer, scheduler=scheduler, ) if es.early_stop: print("Early stopping") break print("### Training Complete ###")
if __name__ == "__main__": train_dataset_dict, test_dataset_dict = load_ECG_dataset(root_dir) train_dataset, validation_dataset = split_dataset(train_dataset_dict, val_num=100, seed=0) train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True) val_dataloader = torch.utils.data.DataLoader(validation_dataset, batch_size=16, shuffle=False) dataloaders_dict = {"train": train_dataloader, "val": val_dataloader} model = LSTM(num_classes, input_size, hidden_size, num_layers, device) model = model.to(device) criterion = nn.CrossEntropyLoss().to(device) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate * 1e-3) exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.9) trained_model = train_model(model, dataloaders_dict, criterion, optimizer, exp_lr_scheduler, device, num_epochs, stopping_epoch, savedirpath=out)
learning_rate=1e-3, epochs=5) # TODO: nothing right now... dataset = Dataset(batch_size=flags.batch_size, embedding_size=flags.embedding_size, max_batch=flags.max_batch, batch_first=True) # TODO: n_vocab === train set model = LSTM(input_size=flags.embedding_size, hidden_size=flags.hidden_size, output_size=dataset.vocabulary.vocab_size) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model.to(device) # hidden state and cell state (needed for LSTM) state_h, state_c = model.zero_state(flags.batch_size) state_h = state_h.to(device) state_c = state_c.to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=flags.learning_rate) epoch = 1 iterator = 0 for i in range(flags.epochs * flags.max_batch): if iterator >= epoch * flags.max_batch: epoch += 1
def main(load=False): # Init hps hps = init_hps() criterion = nn.CrossEntropyLoss() torch.manual_seed(0) # Read file if load: print("Loading file", data_file, "for testing") else: print("Using file", data_file, "for training") lines = utils.read_file(data_file) global data_file_size data_file_size = len(lines) start = time.time() unique_words, vocab_size, n = utils.create_unique_words(lines) print("vocab_size", vocab_size) print("Constructing unique words took:", (time.time() - start)) # Construct dataloader dataset = utils.ReadLines(data_file) print("data set length:", len(dataset)) train_set_len = int(len(dataset) * 0.6) test_set_len = int(len(dataset) * 0.2) validation_set_len = int(len(dataset) * 0.2) while train_set_len + test_set_len + validation_set_len != len(dataset): validation_set_len += 1 train_set, test_set, validation_set = torch.utils.data.random_split( dataset, [train_set_len, test_set_len, validation_set_len]) train_loader = torch.utils.data.DataLoader(dataset=train_set, batch_size=hps.batch_size, num_workers=8, shuffle=True, collate_fn=collate_fn) test_loader = torch.utils.data.DataLoader(dataset=test_set, batch_size=hps.batch_size, num_workers=8, shuffle=True, collate_fn=collate_fn) validation_loader = torch.utils.data.DataLoader(dataset=validation_set, batch_size=hps.batch_size, num_workers=8, shuffle=True, collate_fn=collate_fn) # Init model if not load: word_to_idx, idx_to_word = utils.build_index(unique_words) mapper = SentenceMapper(lines, word_to_idx, idx_to_word) vocab_info = { 'idx_to_word': idx_to_word, 'word_to_idx': word_to_idx, 'vocab_size': vocab_size } with open( vocab_info_save_path(data_file_size, hps.lstm_h_dim, hps.embedding_dim), 'wb') as f: pickle.dump(vocab_info, f, protocol=pickle.HIGHEST_PROTOCOL) embedding = fasttext.train_unsupervised(data_file, model='cbow', dim=hps.embedding_dim) embedding.save_model( embedding_model_save_path(data_file_size, hps.lstm_h_dim, hps.embedding_dim)) print("Training...") model = LSTM(hps, vocab_size) train_model(hps, idx_to_word, model, train_loader, validation_loader, mapper, embedding) else: with open(vocab_info_load_path, 'rb') as f: vocab_info = pickle.load(f, encoding='utf-8') idx_to_word = vocab_info['idx_to_word'] word_to_idx = vocab_info['word_to_idx'] vocab_size = vocab_info['vocab_size'] mapper = SentenceMapper(lines, word_to_idx, idx_to_word) embedding = fasttext.load_model( embedding_model_save_path(data_file_size, hps.lstm_h_dim, hps.embedding_dim)) print("Loading model...") model = LSTM(hps, vocab_size) model = nn.DataParallel(model).to(device) model.load_state_dict(torch.load(model_load_path, map_location=device)) model.to(device) model.eval() counter = 0 perplexities = [] for _, (data, N) in enumerate(test_loader): padded_data = mapper.pad_sentences(data, N) og_inputs, targets = utils.inputs_and_targets_from_sequences( padded_data) inputs = mapper.map_sentences_to_padded_embedding( og_inputs, embedding=embedding, embedding_size=hps.embedding_dim, N=N) targets = mapper.map_words_to_indices(targets, N=N) if cuda: inputs = inputs.cuda() targets = targets.cuda() outputs = model(inputs) loss = criterion(outputs.permute(0, 2, 1), targets) perplexities.append(np.exp(loss.detach().cpu().numpy())) topk = F.softmax(outputs, dim=2)[0, :, :] topk = torch.topk(topk, 1, dim=1)[1].squeeze(1) # print(topk.shape) outputs = F.softmax(outputs, dim=2)[0, :, :].detach().cpu().numpy() outs = [] idxs = np.array(list(range(vocab_size))) for i in range(outputs.shape[0]): outs.append(np.random.choice(idxs, p=np.array(outputs[i, :]))) output = torch.tensor(outs) input_sequence = og_inputs[0, :] predicted_sequence = [ idx_to_word[c] for c in topk.detach().cpu().numpy() ] sampled_sequence = [ idx_to_word[c] for c in output.detach().cpu().numpy() ] print('\nInput sequence') print(input_sequence) print('\nPredicted sequence:') print(predicted_sequence) print('\nSampled sequence:') print(sampled_sequence) prev_word = "" for i in range(1, len(predicted_sequence)): words = input_sequence[:i] predicted_next_word = predicted_sequence[i - 1] sampled_next_word = sampled_sequence[i - 1] if sampled_next_word == '</s>' and ( prev_word == '</s>' or input_sequence[i] == '</s>'): break prev_word = sampled_next_word print( " ".join(list(words)), "[" + predicted_next_word + "|" + sampled_next_word + "]") print("Moving on to next prediction....\n") print(perplexities) mean_perplexity = np.mean(perplexities) print(f'Perplexity: {mean_perplexity}') with open( perplexity_test_save_path(data_file_size, hps.lstm_h_dim, hps.embedding_dim), 'a') as f: f.write(str(mean_perplexity) + "\n") return vocab_size, hps