def get_model(config, args, train_dataset, device): # Pass vocabulary to construct Embedding layer. encoder = Encoder(config["model"], train_dataset.vocabulary) decoder = Decoder(config["model"], train_dataset.vocabulary) print("Encoder: {}".format(config["model"]["encoder"])) print("Decoder: {}".format(config["model"]["decoder"])) # New: Initializing word_embed using GloVe if "glove_npy" in config["dataset"]: encoder.word_embed.weight.data = torch.from_numpy( np.load(config["dataset"]["glove_npy"])) print("Loaded glove vectors from {}".format( config["dataset"]["glove_npy"])) # Share word embedding between encoder and decoder. if encoder.word_embed and decoder.word_embed: decoder.word_embed = encoder.word_embed # Wrap encoder and decoder in a model. model = EncoderDecoderModel(encoder, decoder).to(device) if -1 not in args.gpu_ids: model = nn.DataParallel(model, args.gpu_ids) return model
# Pass vocabulary to construct Embedding layer. encoder = Encoder(config["model"], val_dataset.vocabulary) decoder = Decoder(config["model"], val_dataset.vocabulary) print("Encoder: {}".format(config["model"]["encoder"])) print("Decoder: {}".format(config["model"]["decoder"])) # Share word embedding between encoder and decoder. if args.load_pthpath == "": print('load glove') decoder.word_embed = encoder.word_embed glove = np.load('data/glove.npy') encoder.word_embed.weight.data = torch.tensor(glove) # Wrap encoder and decoder in a model. model = EncoderDecoderModel(encoder, decoder).to(device) if -1 not in args.gpu_ids: model = nn.DataParallel(model, args.gpu_ids) # ============================================================================= # SETUP BEFORE TRAINING LOOP # ============================================================================= start_time = datetime.datetime.strftime(datetime.datetime.utcnow(), '%d-%b-%Y-%H:%M:%S') sparse_metrics = SparseGTMetrics() ndcg = NDCG() # loading checkpoint start_epoch = 0 model_state_dict, _ = load_checkpoint(args.load_pthpath) if isinstance(model, nn.DataParallel):
) val_dataloader = DataLoader( val_dataset, batch_size=config["solver"]["batch_size"], num_workers=args.cpu_workers ) # pass vocabulary to construct nn.Embedding encoder = Encoder(config["model"], train_dataset.vocabulary) decoder = Decoder(config["model"], train_dataset.vocabulary) print("Encoder: {}".format(config["model"]["encoder"])) print("Decoder: {}".format(config["model"]["decoder"])) # share word embedding between encoder and decoder decoder.word_embed = encoder.word_embed # wrap encoder and decoder in a model model = EncoderDecoderModel(encoder, decoder).to(device) if -1 not in args.gpu_ids: model = nn.DataParallel(model, args.gpu_ids) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=config["solver"]["initial_lr"]) scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=config["solver"]["lr_gamma"]) # ================================================================================================ # SETUP BEFORE TRAINING LOOP # ================================================================================================ summary_writer = SummaryWriter(log_dir=args.save_dirpath) checkpoint_manager = CheckpointManager(model, optimizer, args.save_dirpath, config=config) sparse_metrics = SparseGTMetrics()
sample_flag=False) val_dataloader = DataLoader( val_dataset, batch_size=config["solver"]["batch_size"], num_workers=args.cpu_workers, ) # Pass vocabulary to construct Embedding layer. encoder_dict = Dict_Encoder(config_dict["model"], train_sample_dataset.vocabulary) encoder = Encoder(config["model"], train_sample_dataset.vocabulary) decoder = Decoder(config["model"], train_sample_dataset.vocabulary) decoder.word_embed = encoder.word_embed model_dict = encoder_dict.to(device) # Wrap encoder and decoder in a model. model = EncoderDecoderModel(encoder, decoder).to(device) if -1 not in args.gpu_ids: model = nn.DataParallel(model, args.gpu_ids) criterion = nn.CrossEntropyLoss() criterion_bce = nn.BCEWithLogitsLoss() iterations = len(train_sample_dataset) // config["solver"]["batch_size"] + 1 def lr_lambda_fun(current_iteration: int) -> float: """Returns a learning rate multiplier. Till `warmup_epochs`, learning rate linearly increases to `initial_lr`, and then gets multiplied by `lr_gamma` every time a milestone is crossed. """ current_epoch = float(current_iteration) / iterations
encoder = Encoder(config["model"], train_dataset.vocabulary) if word_embedding_type == 'bert': decoder = Decoder(config["model"], train_dataset.vocabulary, bert_model=encoder.word_embed.bert) else: decoder = Decoder(config["model"], train_dataset.vocabulary) logger.info("Encoder: {}".format(config["model"]["encoder"])) logger.info("Decoder: {}".format(config["model"]["decoder"])) # Share word embedding between encoder and decoder. if not word_embedding_type == 'bert': decoder.word_embed = encoder.word_embed # Wrap encoder and decoder in a model. model = EncoderDecoderModel(encoder, decoder).to(device) if -1 not in args.gpu_ids: model = nn.DataParallel(model, args.gpu_ids) # Loss function. if config["model"]["decoder"] == "disc": if config["model"]["loss"] == "ce": criterion = nn.CrossEntropyLoss() elif config["model"]["loss"] == "np": criterion = NpairLoss(scale=0.25) else: raise NotImplementedError elif config["model"]["decoder"] == "gen": criterion = nn.CrossEntropyLoss( ignore_index=(model.module.decoder.padding_idx if isinstance( model, nn.DataParallel) else model.decoder.padding_idx))
config["dataset"], "data/visdial_1.0_val.json", "data/visdial_1.0_val_dense_annotations.json", return_options=True, add_boundary_toks=False if config["model"]["decoder"] == "disc" else True, ) # Pass vocabulary to construct Embedding layer. encoder = Encoder(config["model"], val_dataset.vocabulary) decoder = Decoder(config["model"], val_dataset.vocabulary) print("Encoder: {}".format(config["model"]["encoder"])) print("Decoder: {}".format(config["model"]["decoder"])) model = EncoderDecoderModel(encoder, decoder) model_state_dict, _ = load_checkpoint('checkpoints/new_features_baseline/checkpoint_10.pth') if isinstance(model, nn.DataParallel): model.module.load_state_dict(model_state_dict) else: model.load_state_dict(model_state_dict) model.eval() with open('data/val_data.pkl','rb') as file: (img_ids, caption_vectors, all_captions, all_questions, all_questions_vectors, all_answers, all_questions) = pickle.load(file) def jon(query, questions, answers, image_id): index = img_ids.index(18472.0) caption = all_captions[index]