def start_exp(args): trainer = Trainer(dataset_dict[args.dataset], args.cuda) results = trainer.train_model(args) if not path.exists('results'): os.mkdir('results') f = open( "results/" + args.dataset + '_teacher_emb_' + str(args.teacher_embed_size) + '_teacher_heads_' + str(args.teacher_n_heads) + '_window_' + str(args.window) + '_student_emb_' + str(args.student_emb) + '_student_heads_' + str(args.student_heads) + '_distillation_' + str(args.distillation) + '.pkl', "wb") pkl.dump(results, f) f.close()
all_gram_idx = torch.arange( self.n_gram).cuda() if use_cuda else torch.arange(self.n_gram) all_vocab_idx = torch.arange( self.vocab_size).cuda() if use_cuda else torch.arange( self.vocab_size) position_matrix = self.C(all_gram_idx).reshape(-1, self.embedding_dim, self.embedding_dim) context_features = torch.tensordot(context_word_features, position_matrix) all_word = self.embeddings_word_output(all_vocab_idx) decoded = torch.mm( context_features, all_word.T) + self.embedding_bias(all_vocab_idx).view(-1) logits = F.log_softmax(decoded, dim=1) return logits TEXT, train_iter, val_iter, test_iter = get_iter(args.batch_size) model = LBLModel(TEXT=TEXT, embedding_dim=args.embedding_dim, batch_size=args.batch_size, n_gram=args.n_gram) if use_cuda: model.cuda() trainer = Trainer(train_iter=train_iter, val_iter=val_iter, TEXT=TEXT, lr=args.lr, n_gram=args.n_gram) trainer.train_model(model=model, num_epochs=args.num_epochs)
position_matrix, dim=1) if args.sep_output: all_word = self.embeddings_word_output(all_vocab_idx) else: all_word = self.embeddings_word(all_vocab_idx) decoded = torch.mm( context_features, all_word.T) + self.embedding_bias(all_vocab_idx).view(-1) logits = F.log_softmax(decoded, dim=1) return logits TEXT, train_iter, val_iter, test_iter = get_iter(args.batch_size, args.dataset) model = LBLModel(TEXT=TEXT, embedding_dim=args.embedding_dim, batch_size=args.batch_size, n_gram=args.n_gram) if use_cuda: model.cuda() trainer = Trainer(train_iter=train_iter, val_iter=val_iter, TEXT=TEXT, lr=args.lr, n_gram=args.n_gram) trainer.train_model(model=model, num_epochs=args.num_epochs, path=wandb.run.dir)