# teacher teacher = BiLSTMClassifier(n_feature=Config.n_features, n_class=Config.n_classes, n_hidden=Config.n_hidden_nodes, num_layers=3) teacher_save = torch.load( Config.teacher_tar_fmt.format(plbl=Config.part_labeled)) teacher.load_state_dict(teacher_save['state_dict']) teacher.to(device) # ======================================================================== # student student = LSTMClassifier(n_feature=Config.n_features, n_class=Config.n_classes, n_hidden=Config.n_hidden_nodes, num_layers=3) student.to(device) # ======================================================================== # Pre evaluate if not shuffle if not Config.shuffle: teacher.eval() target_logist_list = [] with torch.no_grad(): for pack_inputs, _ in tqdm(trainloader, desc="TeacherTagging"): pack_inputs = pack_inputs.to(device) target_logit = teacher(pack_inputs) target_logist_list.append(target_logit) else: target_logist_list = None # ======================================================================= # make optimizer and scheduler
import time import torch import torch.optim as optim from torch.utils.data import DataLoader from utils.dataloader import TIMITDataset from utils.dataloader import pad_seqs_to_batch from models.lstm import LSTMClassifier import torch.backends.cudnn as cudnn from utils import train device = 'cuda' if torch.cuda.is_available() else 'cpu' if device == 'cuda': cudnn.benchmark = True if __name__ == "__main__": net = LSTMClassifier(39, 48, n_hidden=78, num_layers=3) traindata = TIMITDataset(root="./data", split="train") trainloader = DataLoader(dataset=traindata, batch_size=100, shuffle=True, collate_fn=pad_seqs_to_batch) # ============================== optimizer = optim.Adam(net.parameters(), lr=0.05) net.to(device) # net.train() for epoch in range(30): train(trainloader, net, optimizer, device=device)
train_pairs = preprocess.read_pairs(mode='train', config=conf) for pair in train_pairs: x_train.append(pair[0]) y_train.append(pair[1]) val_pairs = preprocess.read_pairs(mode='test', config=conf) for pair in val_pairs: x_val.append(pair[0]) y_val.append(pair[1]) print('Train and Test Label distribution respectively:') print(Counter(y_train), Counter(y_val)) device = 'cuda:0' if torch.cuda.is_available() else 'cpu' model = LSTMClassifier(conf, embedding_wts, n_lables=len(labels_dict)) model = model.to(device) criterion = nn.CrossEntropyLoss(reduction='sum') optimizer = optim.Adadelta(model.parameters(), lr=conf['lr'], weight_decay=1e-5) best_f1 = 0 for e in range(conf['n_epochs']): losses = [] all_train_predictions = np.array([]) all_train_targets = np.array(y_train) for iter in range(0, len(x_train), conf['batch_size']): input_seq, input_lengths = preprocess.btmcd( vocab, x_train[iter:iter + conf['batch_size']]) targets = torch.tensor(y_train[iter:iter + conf['batch_size']])