def get_model(config, args, train_dataset, device): # Pass vocabulary to construct Embedding layer. encoder = Encoder(config["model"], train_dataset.vocabulary) decoder = Decoder(config["model"], train_dataset.vocabulary) print("Encoder: {}".format(config["model"]["encoder"])) print("Decoder: {}".format(config["model"]["decoder"])) # New: Initializing word_embed using GloVe if "glove_npy" in config["dataset"]: encoder.word_embed.weight.data = torch.from_numpy( np.load(config["dataset"]["glove_npy"])) print("Loaded glove vectors from {}".format( config["dataset"]["glove_npy"])) # Share word embedding between encoder and decoder. if encoder.word_embed and decoder.word_embed: decoder.word_embed = encoder.word_embed # Wrap encoder and decoder in a model. model = EncoderDecoderModel(encoder, decoder).to(device) if -1 not in args.gpu_ids: model = nn.DataParallel(model, args.gpu_ids) return model
args.val_dense_json, overfit=args.overfit, in_memory=args.in_memory, return_options=True, add_boundary_toks=False, sample_flag=False ) val_dataloader = DataLoader( val_dataset, batch_size=config["solver"]["batch_size"], num_workers=args.cpu_workers, shuffle=True, ) # Pass vocabulary to construct Embedding layer. encoder = Encoder(config["model"], val_dataset.vocabulary) decoder = Decoder(config["model"], val_dataset.vocabulary) print("Encoder: {}".format(config["model"]["encoder"])) print("Decoder: {}".format(config["model"]["decoder"])) # Share word embedding between encoder and decoder. if args.load_pthpath == "": print('load glove') decoder.word_embed = encoder.word_embed glove = np.load('data/glove.npy') encoder.word_embed.weight.data = torch.tensor(glove) # Wrap encoder and decoder in a model. model = EncoderDecoderModel(encoder, decoder).to(device) if -1 not in args.gpu_ids: model = nn.DataParallel(model, args.gpu_ids)
train_dataset = VisDialDataset( config["dataset"], args.train_json, overfit=args.overfit, in_memory=args.in_memory ) train_dataloader = DataLoader( train_dataset, batch_size=config["solver"]["batch_size"], num_workers=args.cpu_workers ) val_dataset = VisDialDataset( config["dataset"], args.val_json, args.val_dense_json, overfit=args.overfit, in_memory=args.in_memory ) val_dataloader = DataLoader( val_dataset, batch_size=config["solver"]["batch_size"], num_workers=args.cpu_workers ) # pass vocabulary to construct nn.Embedding encoder = Encoder(config["model"], train_dataset.vocabulary) decoder = Decoder(config["model"], train_dataset.vocabulary) print("Encoder: {}".format(config["model"]["encoder"])) print("Decoder: {}".format(config["model"]["decoder"])) # share word embedding between encoder and decoder decoder.word_embed = encoder.word_embed # wrap encoder and decoder in a model model = EncoderDecoderModel(encoder, decoder).to(device) if -1 not in args.gpu_ids: model = nn.DataParallel(model, args.gpu_ids) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=config["solver"]["initial_lr"]) scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=config["solver"]["lr_gamma"])
KAT.append(key_and_token) elmo_token = {} for item in KAT: elmo_token[item[1]] = elmo[item[0]] elmo_list = [] for i in range(len(glovevocabulary)): if i in elmo_token.keys(): elmo_list.append(elmo_token[i]) else: randArray = random.random(size=(1, 1024)).tolist() elmo_list.append(randArray[0]) elmo_token = torch.Tensor(elmo_list).view(len(glovevocabulary), -1) # Pass vocabulary to construct Embedding layer. encoder = Encoder(config["model"], val_dataset.vocabulary, glove_token, elmo_token) decoder = Decoder(config["model"], val_dataset.vocabulary, glove_token, elmo_token) print("Encoder: {}".format(config["model"]["encoder"])) print("Decoder: {}".format(config["model"]["decoder"])) # Share word embedding between encoder and decoder. decoder.glove_embed = encoder.glove_embed decoder.elmo_embed = encoder.elmo_embed decoder.embed_change = encoder.embed_change # Wrap encoder and decoder in a model. model = EncoderDecoderModel(encoder, decoder).to(device) if -1 not in args.gpu_ids: model = nn.DataParallel(model, args.gpu_ids)