def __init__(self, vocab, emo_number, model_file_path=None, is_eval=False, load_optim=False): super(Transformer, self).__init__() self.vocab = vocab self.vocab_size = vocab.n_words self.embedding = share_embedding(self.vocab, config.pretrain_emb) self.encoder = Encoder(config.emb_dim, config.hidden_dim, num_layers=config.hop, num_heads=config.heads, total_key_depth=config.depth, total_value_depth=config.depth, filter_size=config.filter, universal=config.universal) self.decoder = Decoder(config.emb_dim, hidden_size=config.hidden_dim, num_layers=config.hop, num_heads=config.heads, total_key_depth=config.depth, total_value_depth=config.depth, filter_size=config.filter) self.generator = Generator(config.hidden_dim, self.vocab_size) if config.weight_sharing: # Share the weight matrix between target word embedding & the final logit dense layer self.generator.proj.weight = self.embedding.lut.weight self.criterion = nn.NLLLoss(ignore_index=config.PAD_idx) self.optimizer = torch.optim.Adam(self.parameters(), lr=config.lr) if (config.noam): self.optimizer = NoamOpt( config.hidden_dim, 1, 8000, torch.optim.Adam(self.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)) if model_file_path is not None: print("loading weights") state = torch.load(model_file_path, map_location=lambda storage, location: storage) self.encoder.load_state_dict(state['encoder_state_dict']) self.decoder.load_state_dict(state['decoder_state_dict']) self.generator.load_state_dict(state['generator_dict']) self.embedding.load_state_dict(state['embedding_dict']) if (load_optim): self.optimizer.load_state_dict(state['optimizer']) self.eval() self.model_dir = config.save_path if not os.path.exists(self.model_dir): os.makedirs(self.model_dir) self.best_path = ""
def __init__(self, vocab, emo_number, model_file_path=None, is_eval=False, load_optim=False): super(CvaeTrans, self).__init__() self.vocab = vocab self.vocab_size = vocab.n_words self.embedding = share_embedding(self.vocab, pretrain=False) self.word_encoder = WordEncoder(config.emb_dim, config.hidden_dim, config.bidirectional) self.encoder = Encoder(config.hidden_dim, num_layers=config.hop, num_heads=config.heads, total_key_depth=config.depth, total_value_depth=config.depth, filter_size=config.filter, universal=config.universal) self.r_encoder = Encoder(config.hidden_dim, num_layers=config.hop, num_heads=config.heads, total_key_depth=config.depth, total_value_depth=config.depth, filter_size=config.filter, universal=config.universal) self.decoder = VarDecoder(config.emb_dim, hidden_size=config.hidden_dim, num_layers=config.hop, num_heads=config.heads, total_key_depth=config.depth, total_value_depth=config.depth, filter_size=config.filter, vocab_size=self.vocab_size) self.generator = Generator(config.hidden_dim, self.vocab_size) self.linear = nn.Linear(2 * config.hidden_dim, config.hidden_dim) if config.weight_sharing: # Share the weight matrix between target word embedding & the final logit dense layer self.generator.proj.weight = self.embedding.lut.weight self.criterion = nn.NLLLoss(ignore_index=config.PAD_idx) if model_file_path: print("loading weights") state = torch.load(model_file_path, map_location= lambda storage, location: storage) self.encoder.load_state_dict(state['encoder_state_dict']) #self.r_encoder.load_state_dict(state['r_encoder_state_dict']) self.decoder.load_state_dict(state['decoder_state_dict']) self.generator.load_state_dict(state['generator_dict']) self.embedding.load_state_dict(state['embedding_dict']) if (config.USE_CUDA): self.cuda() if is_eval: self.eval() else: self.optimizer = torch.optim.Adam(self.parameters(), lr=config.lr) if(config.noam): self.optimizer = NoamOpt(config.hidden_dim, 1, 8000, torch.optim.Adam(self.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)) if (load_optim): self.optimizer.load_state_dict(state['optimizer']) if config.USE_CUDA: for state in self.optimizer.state.values(): for k, v in state.items(): if isinstance(v, torch.Tensor): state[k] = v.cuda() self.model_dir = config.save_path if not os.path.exists(self.model_dir): os.makedirs(self.model_dir) self.best_path = ""
def __init__(self, vocab, model_file_path=None, is_eval=False, load_optim=False): super(PGNet, self).__init__() self.vocab = vocab self.vocab_size = vocab.n_words self.embedding = share_embedding(self.vocab, config.pretrain_emb) self.encoder = Encoder() self.decoder = Decoder() self.reduce_state = ReduceState() self.generator = Generator(config.rnn_hidden_dim, self.vocab_size) if config.weight_sharing: # Share the weight matrix between target word embedding & the final logit dense layer self.generator.proj.weight = self.embedding.lut.weight self.criterion = nn.NLLLoss(ignore_index=config.PAD_idx) if config.label_smoothing: self.criterion = LabelSmoothing(size=self.vocab_size, padding_idx=config.PAD_idx, smoothing=0.1) self.criterion_ppl = nn.NLLLoss(ignore_index=config.PAD_idx) self.optimizer = torch.optim.Adam(self.parameters(), lr=config.lr) if config.noam: self.optimizer = NoamOpt( config.rnn_hidden_dim, 1, 8000, torch.optim.Adam(self.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)) if model_file_path is not None: print("loading weights") state = torch.load(model_file_path, map_location=lambda storage, location: storage) self.encoder.load_state_dict(state['encoder_state_dict']) self.decoder.load_state_dict(state['decoder_state_dict']) self.generator.load_state_dict(state['generator_dict']) self.embedding.load_state_dict(state['embedding_dict']) if load_optim: self.optimizer.load_state_dict(state['optimizer']) self.eval() self.model_dir = config.save_path if not os.path.exists(self.model_dir): os.makedirs(self.model_dir) self.best_path = ""
def __init__(self, vocab, model_file_path=None, is_eval=False): super(SeqToSeq, self).__init__() self.vocab = vocab self.vocab_size = vocab.n_words self.embedding = None self.embedding = share_embedding(self.vocab, config.pretrain_emb) self.encoder = Encoder(self.vocab_size, self.embedding) self.encoder_r = Encoder(self.vocab_size, self.embedding) self.decoder = Decoder(self.vocab_size, self.embedding) self.bow = SoftmaxOutputLayer(config.hidden_dim, self.vocab_size) self.latent = Latent(is_eval) #reduce_state = ReduceState() # shared the embedding between encoder and decoder # decoder.embedding.weight = encoder.embedding.weight if model_file_path is not None: state = torch.load(model_file_path, map_location=lambda storage, location: storage) self.encoder.load_state_dict(state['encoder_state_dict']) self.decoder.load_state_dict(state['decoder_state_dict'], strict=False) self.encoder_r.load_state_dict(state['encoder_r_state_dict']) self.latent.load_state_dict(state['latent']) #self.reduce_state.load_state_dict(state['reduce_state_dict']) if config.USE_CUDA: self.encoder = self.encoder.cuda() self.decoder = self.decoder.cuda() self.encoder_r = self.encoder_r.cuda() self.latent = self.latent.cuda() self.bow = self.bow.cuda() #reduce_state = reduce_state.cuda() if is_eval: self.encoder = self.encoder.eval() self.decoder = self.decoder.eval() self.encoder_r = self.encoder_r.eval() self.latent = self.latent.eval() #reduce_state = reduce_state.eval() self.optimizer = torch.optim.Adam(self.parameters(), lr=config.lr) self.model_dir = config.save_path if not os.path.exists(self.model_dir): os.makedirs(self.model_dir) self.best_path = "" self.criterion = nn.NLLLoss(ignore_index=config.PAD_idx)
def __init__(self, vocab, model_file_path=None, is_eval=False, load_optim=False): super(Seq2SPG, self).__init__() self.vocab = vocab self.vocab_size = vocab.n_words self.embedding = share_embedding(self.vocab, config.preptrained) self.encoder = nn.LSTM(config.emb_dim, config.hidden_dim, config.hop, bidirectional=False, batch_first=True, dropout=0.2) self.encoder2decoder = nn.Linear(config.hidden_dim, config.hidden_dim) self.decoder = LSTMAttentionDot(config.emb_dim, config.hidden_dim, batch_first=True) self.memory = MLP( config.hidden_dim + config.emb_dim, [config.private_dim1, config.private_dim2, config.private_dim3], config.hidden_dim) self.dec_gate = nn.Linear(config.hidden_dim, 2 * config.hidden_dim) self.mem_gate = nn.Linear(config.hidden_dim, 2 * config.hidden_dim) self.generator = Generator(config.hidden_dim, self.vocab_size) self.hooks = { } #Save the model structure of each task as masks of the parameters if config.weight_sharing: # Share the weight matrix between target word embedding & the final logit dense layer self.generator.proj.weight = self.embedding.weight self.criterion = nn.NLLLoss(ignore_index=config.PAD_idx) if (config.label_smoothing): self.criterion = LabelSmoothing(size=self.vocab_size, padding_idx=config.PAD_idx, smoothing=0.1) self.criterion_ppl = nn.NLLLoss(ignore_index=config.PAD_idx) if is_eval: self.encoder = self.encoder.eval() self.encoder2decoder = self.encoder2decoder.eval() self.decoder = self.decoder.eval() self.generator = self.generator.eval() self.embedding = self.embedding.eval() self.memory = self.memory.eval() self.dec_gate = self.dec_gate.eval() self.mem_gate = self.mem_gate.eval() self.optimizer = torch.optim.Adam(self.parameters(), lr=config.lr) if (config.noam): self.optimizer = NoamOpt( config.hidden_dim, 1, 4000, torch.optim.Adam(self.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)) if config.use_sgd: self.optimizer = torch.optim.SGD(self.parameters(), lr=config.lr) if model_file_path is not None: print("loading weights") state = torch.load(model_file_path, map_location=lambda storage, location: storage) print("LOSS", state['current_loss']) self.encoder.load_state_dict(state['encoder_state_dict']) self.encoder2decoder.load_state_dict( state['encoder2decoder_state_dict']) self.decoder.load_state_dict(state['decoder_state_dict']) self.generator.load_state_dict(state['generator_dict']) self.embedding.load_state_dict(state['embedding_dict']) self.memory.load_state_dict(state['memory_dict']) self.dec_gate.load_state_dict(state['dec_gate_dict']) self.mem_gate.load_state_dict(state['mem_gate_dict']) if (load_optim): self.optimizer.load_state_dict(state['optimizer']) if (config.USE_CUDA): self.encoder = self.encoder.cuda() self.encoder2decoder = self.encoder2decoder.cuda() self.decoder = self.decoder.cuda() self.generator = self.generator.cuda() self.criterion = self.criterion.cuda() self.embedding = self.embedding.cuda() self.memory = self.memory.cuda() self.dec_gate = self.dec_gate.cuda() self.mem_gate = self.mem_gate.cuda() self.model_dir = config.save_path if not os.path.exists(self.model_dir): os.makedirs(self.model_dir) self.best_path = ""
def __init__(self, vocab, decoder_number, model_file_path=None, is_eval=False, load_optim=False): super(Transformer_experts, self).__init__() self.vocab = vocab self.vocab_size = vocab.n_words self.embedding = share_embedding(self.vocab, config.pretrain_emb) self.encoder = Encoder(config.emb_dim, config.hidden_dim, num_layers=config.hop, num_heads=config.heads, total_key_depth=config.depth, total_value_depth=config.depth, filter_size=config.filter, universal=config.universal) self.decoder_number = decoder_number ## multiple decoders self.decoder = MulDecoder(decoder_number, config.emb_dim, config.hidden_dim, num_layers=config.hop, num_heads=config.heads, total_key_depth=config.depth, total_value_depth=config.depth, filter_size=config.filter) self.decoder_key = nn.Linear(config.hidden_dim, decoder_number, bias=False) self.generator = Generator(config.hidden_dim, self.vocab_size) self.emoji_embedding = nn.Linear(64, config.emb_dim, bias=False) if config.weight_sharing: # Share the weight matrix between target word embedding & the final logit dense layer self.generator.proj.weight = self.embedding.lut.weight self.criterion = nn.NLLLoss(ignore_index=config.PAD_idx) if (config.label_smoothing): self.criterion = LabelSmoothing(size=self.vocab_size, padding_idx=config.PAD_idx, smoothing=0.1) self.criterion_ppl = nn.NLLLoss(ignore_index=config.PAD_idx) if config.softmax: self.attention_activation = nn.Softmax(dim=1) else: self.attention_activation = nn.Sigmoid() #nn.Softmax() self.optimizer = torch.optim.Adam(self.parameters(), lr=config.lr) if (config.noam): self.optimizer = NoamOpt( config.hidden_dim, 1, 8000, torch.optim.Adam(self.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)) if model_file_path is not None: print("loading weights") state = torch.load(model_file_path, map_location=lambda storage, location: storage) self.encoder.load_state_dict(state['encoder_state_dict']) self.decoder.load_state_dict(state['decoder_state_dict']) self.decoder_key.load_state_dict(state['decoder_key_state_dict']) #self.emoji_embedding.load_state_dict(state['emoji_embedding_dict']) self.generator.load_state_dict(state['generator_dict']) self.embedding.load_state_dict(state['embedding_dict']) if (load_optim): self.optimizer.load_state_dict(state['optimizer']) self.eval() self.model_dir = config.save_path if not os.path.exists(self.model_dir): os.makedirs(self.model_dir) self.best_path = ""
def __init__(self, vocab, decoder_number, model_file_path=None, is_eval=False, load_optim=False): super().__init__() self.vocab = vocab self.vocab_size = vocab.n_words self.embedding = share_embedding(self.vocab, config.pretrain_emb) self.encoder = Encoder(config.emb_dim, config.hidden_dim, num_layers=config.hop, num_heads=config.heads, total_key_depth=config.depth, total_value_depth=config.depth, filter_size=config.filter, universal=config.universal) self.decoder_number = decoder_number self.decoder = DecoderContextV(config.emb_dim, config.hidden_dim, num_layers=config.hop, num_heads=config.heads, total_key_depth=config.depth, total_value_depth=config.depth, filter_size=config.filter) self.vae_sampler = VAESampling(config.hidden_dim, config.hidden_dim, out_dim=300) # outputs m self.emotion_input_encoder_1 = EmotionInputEncoder( config.emb_dim, config.hidden_dim, num_layers=config.hop, num_heads=config.heads, total_key_depth=config.depth, total_value_depth=config.depth, filter_size=config.filter, universal=config.universal, emo_input=config.emo_input) # outputs m~ self.emotion_input_encoder_2 = EmotionInputEncoder( config.emb_dim, config.hidden_dim, num_layers=config.hop, num_heads=config.heads, total_key_depth=config.depth, total_value_depth=config.depth, filter_size=config.filter, universal=config.universal, emo_input=config.emo_input) if config.emo_combine == "att": self.cdecoder = ComplexResDecoder(config.emb_dim, config.hidden_dim, num_layers=config.hop, num_heads=config.heads, total_key_depth=config.depth, total_value_depth=config.depth, filter_size=config.filter, universal=config.universal) elif config.emo_combine == "gate": self.cdecoder = ComplexResGate(config.emb_dim) self.s_weight = nn.Linear(config.hidden_dim, config.emb_dim, bias=False) self.decoder_key = nn.Linear(config.hidden_dim, decoder_number, bias=False) # v^T tanh(W E[i] + H c + b) method3 = True if method3: self.e_weight = nn.Linear(config.emb_dim, config.emb_dim, bias=True) self.v = torch.rand(config.emb_dim, requires_grad=True) if config.USE_CUDA: self.v = self.v.cuda() self.generator = Generator(config.hidden_dim, self.vocab_size) self.emoji_embedding = nn.Embedding(32, config.emb_dim) if config.init_emo_emb: self.init_emoji_embedding_with_glove() if config.weight_sharing: # Share the weight matrix between target word embedding & the final logit dense layer self.generator.proj.weight = self.embedding.lut.weight self.criterion = nn.NLLLoss(ignore_index=config.PAD_idx) if (config.label_smoothing): self.criterion = LabelSmoothing(size=self.vocab_size, padding_idx=config.PAD_idx, smoothing=0.1) self.criterion_ppl = nn.NLLLoss(ignore_index=config.PAD_idx) if config.softmax: self.attention_activation = nn.Softmax(dim=1) else: self.attention_activation = nn.Sigmoid() # nn.Softmax() self.optimizer = torch.optim.Adam(self.parameters(), lr=config.lr) if (config.noam): self.optimizer = NoamOpt( config.hidden_dim, 1, 8000, torch.optim.Adam(self.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)) if model_file_path is not None: print("loading weights") state = torch.load(model_file_path, map_location=lambda storage, location: storage) self.load_state_dict(state['model']) if (load_optim): self.optimizer.load_state_dict(state['optimizer']) self.eval() self.model_dir = config.save_path if not os.path.exists(self.model_dir): os.makedirs(self.model_dir) self.best_path = "" # Added positive emotions self.positive_emotions = [ 11, 16, 6, 8, 3, 1, 28, 13, 31, 17, 24, 0, 27 ] self.negative_emotions = [ 9, 4, 2, 22, 14, 30, 29, 25, 15, 10, 23, 19, 18, 21, 7, 20, 5, 26, 12 ]