vectorizer = dataset.vectorizer # Create embeddings embeddings = None if args.use_med_embeddings: words = vectorizer.paper_vocab.token_to_idx.keys() embeddings = make_embeddings_matrix(words=words) print ("<Embeddings(words={0}, dim={1})>".format(np.shape(embeddings)[0], np.shape(embeddings)[1])) del(med_w2v) # Initialize model model = PapersModel(embedding_dim=args.embedding_dim, num_embeddings=len(vectorizer.paper_vocab), num_input_channels=args.embedding_dim, filter_sizes=args.filter_sizes, num_channels=args.num_filters, hidden_dim=args.hidden_dim, num_classes=len(vectorizer.label_vocab), dropout_p=args.dropout_p, pretrained_embeddings=embeddings, # pretrained_embeddings=None, padding_idx=vectorizer.paper_vocab.mask_index) print(model.named_modules) #%% Train trainer = Trainer(dataset=dataset, model=model, model_state_file=args.model_state_file, save_dir=args.save_dir, device=args.device, shuffle=args.shuffle, num_epochs=args.num_epochs, batch_size=args.batch_size, learning_rate=args.learning_rate, early_stopping_criteria=args.early_stopping_criteria)
print("Using CUDA: {}".format(args.cuda)) #%% Using embeddings #%% Initialization dataset = PapersDataset.load_dataset_and_make_vectorizer(df=split_df, cutoff=args.cutoff) dataset.save_vectorizer(args.vectorizer_file) vectorizer = dataset.vectorizer model = PapersModel(embedding_dim=args.embedding_dim, num_embeddings=len(vectorizer.title_vocab), num_input_channels=args.embedding_dim, num_channels=args.num_filters, hidden_dim=args.hidden_dim, num_classes=len(vectorizer.category_vocab), dropout_p=args.dropout_p, pretrained_embeddings=None, padding_idx=vectorizer.title_vocab.mask_index) print (model.named_modules) #%% Train trainer = Trainer(dataset=dataset, model=model, model_state_file=args.model_state_file, save_dir=args.save_dir, device=args.device, shuffle=args.shuffle, num_epochs=args.num_epochs, batch_size=args.batch_size, learning_rate=args.learning_rate, early_stopping_criteria=args.early_stopping_criteria) trainer.run_train_loop()
embeddings = None if args.use_med_embeddings: words = vectorizer.paper_vocab.token_to_idx.keys() embeddings = make_embeddings_matrix(words=words) print("<Embeddings(words={0}, dim={1})>".format( np.shape(embeddings)[0], np.shape(embeddings)[1])) del (med_w2v) # Initialize model model = PapersModel(embedding_dim=args.embedding_dim, num_embeddings=len(vectorizer.paper_vocab), rnn_hidden_dim=args.rnn_hidden_dim, hidden_dim=args.hidden_dim, output_dim=len(vectorizer.label_vocab), num_layers=args.num_layers, bidirectional=args.bidirectional, dropout_p=args.dropout_p, pretrained_embeddings=embeddings, padding_idx=vectorizer.paper_vocab.mask_index) print(model.named_modules) ###### Train ###### trainer = Trainer(dataset=dataset, model=model, model_state_file=args.model_state_file, save_dir=args.save_dir, device=args.device, shuffle=args.shuffle, num_epochs=args.num_epochs, batch_size=args.batch_size,