f"Number of batches: {len(train_loader)}" f"\nTotal samples test: {len(dataset_test)}, " f"Number of batches: {len(test_loader)}") # Define model n_features = len(config["data"]["features"]) n_classes = 4 if config["data"]["all_labels"]: n_classes = 9 print(f"Num classes: {n_classes}\n") model = BiLSTM(n_features, n_classes, **config["network"]).to(device) # Define loss and optimizer criterion = nn.CrossEntropyLoss() lr = config["optimizer"]["learning_rate"] optimizer = torch.optim.Adam(model.parameters(), lr=lr) if args.resume: model.load_state_dict(checkpoint["model_state_dict"]) optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) def decentralized_coordinate(coords): decentralized_coords = coords - torch.min(coords, axis=0).values return decentralized_coords def train(loader, log_interval, max_batches=None): model.train() if max_batches is None: max_batches = len(loader) history_acc_train = [] history_loss_train = []
def train(): print("Training the model.") print("Split method:", SPLIT_METHOD) print("Sequence Length:", SEQ_LENGTH) model_path = get_model_path(SPLIT_METHOD, SEQ_LENGTH, FEAT_MODEL, FEAT_NUM) print("Model path/name:", model_path) n_epochs = 200 between_name = get_between_name(SPLIT_METHOD, SEQ_LENGTH, FEAT_MODEL, FEAT_NUM) X_tr = np.load(os.path.join(VARS_DIR, "X_" + between_name + "_train.npy")) Y_tr = np.load(os.path.join(VARS_DIR, "Y_" + between_name + "_train.npy")) X_val = np.load(os.path.join(VARS_DIR, "X_" + between_name + "_val.npy")) Y_val = np.load(os.path.join(VARS_DIR, "Y_" + between_name + "_val.npy")) X_test = np.load(os.path.join(VARS_DIR, "X_" + between_name + "_test.npy")) Y_test = np.load(os.path.join(VARS_DIR, "Y_" + between_name + "_test.npy")) print(X_tr.shape, Y_tr.shape) print(X_val.shape, Y_val.shape) print(X_test.shape, Y_test.shape) X_tr, X_val, X_test = normalize([X_tr, X_val, X_test]) if SEQ_LENGTH > 1: model = BiLSTM(FEAT_NUM, 256, nb_classes=NB_CLASS).to(DEVICE) else: model = SalakhNet(input_size=FEAT_NUM, nb_class=NB_CLASS).to(DEVICE) load = True if load and os.path.exists(model_path): model.load_state_dict(torch.load(model_path)) print("Model Loaded") optimizer = optim.Adam(model.parameters(), lr=0.001) device = next(model.parameters()).device loss_fn = nn.CrossEntropyLoss() with torch.no_grad(): model.eval() X_val = torch.Tensor(X_val).to(device) X_test = torch.Tensor(X_test).to(device) preds = model(X_val).log_softmax(dim=1).cpu().numpy().argmax(axis=1) best_val_acc = np.sum(preds == Y_val) / len(preds) * 100 preds = model(X_test).log_softmax(dim=1).cpu().numpy().argmax(axis=1) test_acc = np.sum(preds == Y_test) / len(preds) * 100 for epoch in range(1, n_epochs + 1): model.train() losses = [] n_batches = math.ceil(len(Y_tr) / get_batch_size(SEQ_LENGTH)) for batch_idx in range(n_batches): optimizer.zero_grad() s = batch_idx * get_batch_size(SEQ_LENGTH) e = min(len(Y_tr), (batch_idx + 1) * get_batch_size(SEQ_LENGTH)) X_batch, Y_batch = torch.Tensor( X_tr[s:e]).to(device), torch.LongTensor(Y_tr[s:e]).to(device) preds = model(X_batch) loss = loss_fn(preds, Y_batch) losses.append(loss.item()) loss.backward() optimizer.step() # print("Train Loss:", np.mean(losses)) with torch.no_grad(): model.eval() preds = model(X_val).log_softmax(dim=1).cpu().numpy().argmax( axis=1) val_acc = np.sum(preds == Y_val) / len(preds) * 100 if val_acc > best_val_acc: best_val_acc = val_acc preds = model(X_test).log_softmax(dim=1).cpu().numpy().argmax( axis=1) test_acc = np.sum(preds == Y_test) / len(preds) * 100 torch.save(model.state_dict(), model_path) print("Val ACC: %.2f" % best_val_acc, "Test Acc: %.2f" % test_acc)
train_vocab_size = len(train_word_id_lists) dev_vocab_size = len(dev_word_id_lists) test_vocab_size = len(test_word_id_lists) embedding_dim = args.embedding hidden_dim = args.hidden num_layers = args.num_layers dropout = args.dropout # tag out_dim = len(tag2id) + 2 model = BiLSTM(train_vocab_size, embedding_dim, num_layers, hidden_dim, dropout, out_dim) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) batch_size = args.batch_size if args.cuda: model.cuda() train_word_id_lists = train_word_id_lists.cuda() train_tag_id_lists = train_tag_id_lists.cuda() dev_word_id_lists = dev_word_id_lists.cuda() dev_tag_id_lists = dev_tag_id_lists.cuda() test_word_id_lists = test_word_id_lists.cuda() test_tag_id_lists = test_tag_id_lists.cuda() train_total_step = train_vocab_size // batch_size + 1