def define_model(self): self.encoder = Encoder(self.INPUT_DIM, config.ENC_EMB_DIM, config.ENC_HID_DIM, config.DEC_HID_DIM, config.ENC_DROPOUT, config.USE_BIDIRECTION) self.attention = Attention(config.ENC_HID_DIM, config.DEC_HID_DIM) self.decoder = Decoder(self.OUTPUT_DIM, config.DEC_EMB_DIM, config.ENC_HID_DIM, config.DEC_HID_DIM, config.DEC_DROPOUT, self.attention, config.MAXOUT_SIZE) self.model = Seq2Seq(self.encoder, self.decoder, self.DEVICE).to(self.DEVICE) self.model.apply(init_weights) count_parameters(self.model) self.optimizer = torch.optim.Adam(self.model.parameters(), lr=config.LEARNING_RATE) self.criterion = torch.nn.CrossEntropyLoss( ignore_index=self.dataloader.TRG.vocab.stoi[ self.dataloader.TRG.pad_token])
def __init__(self, params, encoder_embedding_matrix, decoder_embedding_matrix, use_bigru=True): super().__init__() self.params = params self.encoder = Encoder(params['vocab_size'], params['embedding_dim'], params['enc_units'], params['batch_size'], embedding_matrix=encoder_embedding_matrix, use_bigru=False) self.attention = BahdanauAttention(params['attn_units']) self.decoder = Decoder(params['vocab_size'], params['embedding_dim'], params['dec_units'], params['batch_size'], embedding_matrix=decoder_embedding_matrix) self.pointer = Pointer()
def __init__(self): ## 加载数据 input_tensor, target_tensor, self._length_inp, self._length_tar, self._input_wix, self._target_wix = load_data( ) vocab_inp_size = len(self._input_wix.word2ix) vocab_tar_size = len(self._target_wix.word2ix) ## 分为训练数据和测试数据 self._input_tensor_train, self._input_tensor_val, self._target_tensor_train, self._target_tensor_val = train_test_split( input_tensor, target_tensor, test_size=0.2) ## 超参数 self._units = 512 self._batch_sz = 32 self._embdding_dim = 200 self._buffer_size = len(self._input_tensor_train) self._n_batchs = self._buffer_size // self._batch_sz ## 数据集 self._dataset = tf.data.Dataset.from_tensor_slices( (self._input_tensor_train, self._target_tensor_train)).shuffle(self._buffer_size) self._dataset = self._dataset.batch(self._batch_sz, drop_remainder=True) ## 初始化encoder以及decoder self._encoder = Encoder(vocab_inp_size, self._embdding_dim, self._units, self._batch_sz) self._decoder = Decoder(vocab_tar_size, self._embdding_dim, self._units, self._batch_sz) ## optimizer self._optimizer = tf.keras.optimizers.Adam() ## 模型的保存位置 self._checkpoint_dir = './tranning_checkpoints' self._checkpoint_prefix = os.path.join(self._checkpoint_dir, "ckpt") self._checkpoint = tf.train.Checkpoint(optimizer=self._optimizer, encoder=self._encoder, decoder=self._decoder)
'sated_model/out_target.npy', max_length_inp, max_length_targ) print('Preparing model') with open('sated_model/inp_lang.pickle', 'rb') as handle, open('sated_model/targ_lang.pickle', 'rb') as handle2: inp_lang = pickle.load(handle) targ_lang = pickle.load(handle2) vocab_inp_size = len(inp_lang.word_index) + 1 vocab_tar_size = len(targ_lang.word_index) + 1 dec_input_start = targ_lang.word_index['<start>'] dec_input_end = targ_lang.word_index['<end>'] encoder = Encoder(vocab_inp_size, BATCH_SIZE) decoder = Decoder(vocab_tar_size, BATCH_SIZE) optimizer = tf.keras.optimizers.Adam() checkpoint_dir = './sated_model/training_checkpoints' checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt-10") # TODO : Change this path checkpoint = tf.train.Checkpoint(optimizer=optimizer, encoder=encoder, decoder=decoder) checkpoint.restore(checkpoint_prefix) print('Creating attack object') attackobj = meminf(encoder, decoder, optimizer, attack_data, UNITS,
sample_output.shape)) print('Encoder Hidden state shape: (batch size, units) {}'.format( sample_hidden.shape)) attention_layer = BahdanauAttention(128) context_vector, attention_result = attention_layer(sample_hidden, sample_output) print("Attention context_vector shape: (batch size, units) {}".format( context_vector.shape)) print( "Attention weights shape: (batch_size, sequence_length, 1) {}".format( attention_result.shape)) decoder = Decoder(vocab_size=vocab.size(), embedding_dim=256, dec_units=256, batch_sz=32, embedding_matrix=decoder_embedding) sample_decoder_output, _, _ = decoder(tf.random.uniform( (32, 1)), sample_hidden, sample_output, context_vector) print('Decoder output shape: (batch_size, vocab size) {}'.format( sample_decoder_output.shape)) pgn = PGN(params, encoder_embedding, decoder_embedding) enc_hidden, enc_output = pgn.call_encoder(example_input_batch) predictions, _ = pgn( enc_output, sample_hidden, example_input_batch, tf.random.uniform([32, 2], minval=1, maxval=10, dtype=tf.int32), tf.random.uniform((32, 1)), 6) print("finished")
train_test_split(tensor_input[:dataset_num], tensor_target[:dataset_num], test_size=0.2) BUFFER_SIZE = len(input_tensor_train) steps_per_epoch = len(input_tensor_train) // BATCH_SIZE embedding_dim = embedding_size vocab_inp_size = len(word_index_input) + 1 vocab_tar_size = len(word_index_target) + 1 # 构造训练数据集 dataset = tf.data.Dataset.from_tensor_slices( (input_tensor_train, target_tensor_train)).shuffle(BUFFER_SIZE) dataset = dataset.batch(BATCH_SIZE, drop_remainder=True) encoder = Encoder(vocab_inp_size, embedding_dim, units, BATCH_SIZE, encoder_embedding, open_bigru) decoder = Decoder(vocab_tar_size, embedding_dim, units, BATCH_SIZE, decoder_embedding) # optimizer = tf.keras.optimizers.Adam() optimizer = tf.keras.optimizers.Adagrad( params['learning_rate'], initial_accumulator_value=params['adagrad_init_acc'], clipnorm=params['max_grad_norm']) loss_object = tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True, reduction='none') checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt") checkpoint = tf.train.Checkpoint(optimizer=optimizer, encoder=encoder, decoder=decoder) for epoch in range(EPOCHS):
# if __name__ == '__main__': # path = 'data' # # dictionary = Dictionary(path) # run_train(dictionary) # # if __name__ == '__main__': path = 'data' dictionary = Dictionary(path) config = BaseConfig(len(dictionary)) embedding = nn.Embedding(config.vocab_size, config.embedding_size) encoder = Encoder(embedding=embedding, config=config) decoder = Decoder(embedding=embedding, config=config) if use_cuda: encoder.cuda(cuda_core) decoder.cuda(cuda_core) encoder_optim = optim.Adam(encoder.parameters(), lr=config.learning_rate) decoder_optim = optim.Adam(decoder.parameters(), lr=config.learning_rate) criterion = nn.CrossEntropyLoss() dataset = Corpus_DataSet(dictionary) start_word_index = dictionary.get_index('사랑')
embedding = pickle.load(f) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") INPUT_DIM = len(embedding.vocab) OUTPUT_DIM = len(embedding.vocab) ENC_EMB_DIM = 300 DEC_EMB_DIM = 300 ENC_HID_DIM = 128 DEC_HID_DIM = 256 N_LAYERS = 1 ENC_DROPOUT = 0.5 DEC_DROPOUT = 0.5 enc = Encoder(INPUT_DIM, embedding, ENC_EMB_DIM, ENC_HID_DIM, N_LAYERS, ENC_DROPOUT) dec = Decoder(OUTPUT_DIM, embedding, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, N_LAYERS, DEC_DROPOUT) model = Seq2Seq(enc, dec, device).to(device) model.load_state_dict(torch.load('./src/save/seq2seq_model.pt')) SOS_token = 1 EOS_token = 2 def generate_pair(data): pairs = [] input_length = len(data.data) for i in range(input_length): input = data.__getitem__(i)['text'] id = data.__getitem__(i)['id'] pairs.append((input, id)) return pairs