def test_get_same(self): model = get_model( token_num=13, embed_dim=30, encoder_num=3, decoder_num=2, head_num=3, hidden_dim=120, attention_activation=None, feed_forward_activation='relu', dropout_rate=0.05, use_same_embed=True, embed_weights=np.random.random((13, 30)), trainable=False, ) model.compile( optimizer=keras.optimizers.Adam(), loss=keras.losses.categorical_crossentropy, metrics={}, ) model_path = os.path.join( tempfile.gettempdir(), 'test_transformer_%f.h5' % np.random.random()) model.save(model_path) model = keras.models.load_model(model_path, custom_objects=get_custom_objects()) model.summary() try: keras.utils.plot_model(model, 'transformer_same.png') except Exception as e: print(e) self.assertIsNotNone(model)
def predict(): with open('./models/target_token_dict.pkl', 'rb') as f: target_token_dict = pickle.load(f) with open('./models/source_token_dict.pkl', 'rb') as f: source_token_dict = pickle.load(f) target_token_dict_inv = {v: k for k, v in target_token_dict.items()} source_tokens_list = [ t.split() for t in '''He lost. I try. I won! I runs. I came. He run. We lost. We runs in the park every day. He calmed down. See you about 8. He get you. She wears a wig.'''.split('\n') if t ] encode_tokens = [['<START>'] + tokens + ['<END>'] for tokens in source_tokens_list] encode_tokens = [ tokens + ['<PAD>'] * (source_max_len - len(tokens)) for tokens in encode_tokens ] encode_input = [ list( map( lambda x: source_token_dict.get(x, source_token_dict[ '<UNKOWN>']), tokens)) for tokens in encode_tokens ] model = get_model( token_num=max(len(source_token_dict), len(target_token_dict)), embed_dim=32, encoder_num=2, decoder_num=2, head_num=4, hidden_dim=128, dropout_rate=0.05, use_same_embed=False, # Use different embeddings for different languages ) model.load_weights('./models/model.h5', by_name=True, reshape=True) # Predict decoded = decode(model, encode_input, start_token=target_token_dict['<START>'], end_token=target_token_dict['<END>'], pad_token=target_token_dict['<PAD>'], max_repeat=len(encode_input), max_repeat_block=len(encode_input)) for i, source in enumerate(source_tokens_list): predicted = ''.join( map(lambda x: target_token_dict_inv[x], decoded[i][1:-1])) print("{},预测结果:{}".format(source, predicted))
def train( use_checkpoint=True, initial_epoch=0, ): if use_checkpoint: transformer_model = keras_transformer.get_model( token_num=32006, embed_dim=768, encoder_num=4, decoder_num=4, head_num=8, hidden_dim=256, dropout_rate=0.1, ) transformer_model.load_weights( 'data/checkpoint/transformer_model.ckpt') else: bert_model = keras_bert.load_trained_model_from_checkpoint( checkpoint_file=checkpoint_file_path, config_file=config_file_path) bert_weights = bert_model.get_layer( name='Embedding-Token').get_weights()[0] transformer_model = get_transformer_on_bert_model( token_num=32006, embed_dim=768, encoder_num=4, decoder_num=4, head_num=8, hidden_dim=256, dropout_rate=0.1, embed_weights=bert_weights, ) transformer_model.compile( optimizer=keras.optimizers.Adam(beta_2=0.98), # optimizer=keras.optimizers.SGD(), # optimizer='adam', loss=keras.losses.sparse_categorical_crossentropy, metrics=[keras.metrics.mae, keras.metrics.sparse_categorical_accuracy], ) transformer_model.summary() history = transformer_model.fit_generator( generator=_generator(), steps_per_epoch=100, epochs=200, validation_data=_generator(), validation_steps=20, callbacks=[ keras.callbacks.ModelCheckpoint( './data/checkpoint/transformer_model.ckpt', monitor='val_loss'), keras.callbacks.TensorBoard(log_dir='./data/log-adam-4000-D32/'), keras.callbacks.LearningRateScheduler(_decay), # keras.callbacks.EarlyStopping(monitor='val_loss', patience=0, verbose=1, mode='auto'), PredictionCallback(encoder_inputs[0], 20), ], initial_epoch=initial_epoch, )
def create_transformer(self, params): transformer = get_model( token_num=params['dataset'].len_encoding, embed_dim=params['input_embedding_size'], encoder_num=params['encoder_num'], decoder_num=params['decoder_num'], head_num=params["num_heads"], hidden_dim=params["d_model"], attention_activation=None, dropout_rate=params["dropout_rate"], embed_weights=None ) return transformer
def model_transformer(): # chars = chars + '<START>' m = get_model( token_num=len(c_table.char_indices), embed_dim=EMBEDDING_DIM, # word/character embedding dim encoder_num=3, decoder_num=2, head_num=2, hidden_dim=120, attention_activation='relu', feed_forward_activation='relu', dropout_rate=0.05, embed_weights=np.random.random((len(c_table.char_indices), EMBEDDING_DIM)), ) return m
def create_transformer(): model = get_model(token_num=SEQUENCE_LENGTH, embed_dim=FLAGS.embedding_size, encoder_num=3, decoder_num=3, head_num=8, hidden_dim=FLAGS.num_cells, attention_activation='relu', feed_forward_activation='relu', dropout_rate=0.05, embed_weights=np.random.random( (SEQUENCE_LENGTH, FLAGS.embedding_size))) opt = keras.optimizers.Adam(lr=FLAGS.learning_rate) model.compile(optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['acc']) return model
def getModel(self): print("Beginning to build the model.") model = get_model( token_num=max(len(self.source_token_dict), len(self.target_token_dict)), embed_dim=HyperParameters.EMBED_DIM, encoder_num=HyperParameters.ENCODER_NUM, decoder_num=HyperParameters.DECODER_NUM, head_num=HyperParameters.HEAD_NUM, hidden_dim=HyperParameters.HIDDEN_DIM, dropout_rate=HyperParameters.DROPOUT_RATE, use_same_embed=False, ) model.compile('adam', 'sparse_categorical_crossentropy', metrics=['acc']) print("The model has been built successfully and the summary of it is") model.summary() return model
def load_transformer_model(config): model_params = config["model"] source_token_dict = config["vocab"]["source_token_dict"] target_token_dict = config["vocab"]["target_token_dict"] embed_dim = model_params["embed_dim"] hidden_dim = model_params["hidden_dim"] head_num = model_params["head_num"] encoder_num = model_params["encoder_num"] decoder_num = model_params["decoder_num"] dropout_rate = model_params["dropout_rate"] use_same_embed = bool(model_params["use_same_embed"]) model = get_model( token_num=max(len(source_token_dict), len(target_token_dict)), embed_dim=embed_dim, encoder_num=encoder_num, decoder_num=decoder_num, head_num=head_num, hidden_dim=hidden_dim, dropout_rate=dropout_rate, use_same_embed=use_same_embed, ) return model
#print(tokens_codificador[120000]) entrada_codificador = [list(map(lambda x : diccionario_entrada[x], tokens)) for tokens in tokens_codificador] entrada_decodificador = [list(map(lambda x : diccionario_salida[x], tokens)) for tokens in tokens_decodificador] salida_decodificador = [list(map(lambda x : diccionario_salida[x], tokens)) for tokens in tokens_resultado] #print(entrada_codificador[120000]) #Crear la red transformer modelo = get_model( #numero de lapalbras que esta utilizando el modelo token_num=max(len(diccionario_entrada),len(diccionario_salida)), embed_dim= 32, encoder_num=2, decoder_num=2, head_num=4, hidden_dim=128, dropout_rate = 0.05, use_same_embed = False, ) modelo.compile('adam', 'sparse_categorical_crossentropy') #modelo.summary() modelo.load_weights('translator_preentrenado.h5') #ENTRENAMIENTO: #Arreglo con las dos entradas codificadas en español e inglés. x = [np.array(entrada_codificador), np.array(entrada_decodificador)] #salida. y = np.array(salida_decodificador)
decode_tokens = list(map(lambda x: token_dict[x], decode_tokens)) output_tokens = list(map(lambda x: [token_dict[x]], output_tokens)) encoder_inputs_no_padding.append(encode_tokens[:i + 2]) encoder_inputs.append(encode_tokens) decoder_inputs.append(decode_tokens) decoder_outputs.append(output_tokens) print(encoder_inputs) # Build the model model = get_model( token_num=len(token_dict), embed_dim=30, encoder_num=3, decoder_num=2, head_num=3, hidden_dim=120, attention_activation='relu', feed_forward_activation='relu', dropout_rate=0.05, embed_weights=np.random.random((13, 30)), ) model.compile( optimizer='adam', loss='sparse_categorical_crossentropy', ) model.summary() plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)
print(encode_test_input[0]) #print("encode input") #print(encode_input) #print("decode input") #print(decode_input) #print("decode output") #print(decode_output) # Build & fit model with tf.device("/cpu:0"): model_cpu = get_model( token_num=len(token_dict), embed_dim=32, encoder_num=6, decoder_num=6, head_num=8, hidden_dim=128, dropout_rate=0.05, use_same_embed=True, # Use different embeddings for different languages ) model = multi_gpu_model(model_cpu, gpus=4) model.compile('adam', 'sparse_categorical_crossentropy') model.summary() model.fit( x=[np.array(encode_input), np.array(decode_input)], y=np.array(decode_output), epochs=10, batch_size=128, )
def get_transformer_on_bert_model( token_num: int, embed_dim: int, encoder_num: int, decoder_num: int, head_num: int, hidden_dim: int, embed_weights, attention_activation=None, feed_forward_activation: str = 'relu', dropout_rate: float = 0.0, use_same_embed: bool = True, embed_trainable=True, trainable: bool = True) -> keras.engine.training.Model: """ Transformerのモデルのinputsを特徴ベクトルにしたモデル.それ以外は特に変わらない. inputsのshapeは (None, seq_len, embed_dim) となっている, Parameters ---------- token_num トークンのサイズ.(vocab_sizeと同じ) embed_dim 特徴ベクトルの次元.inputsの次元数と同じにする. encoder_num エンコーダの層の数. decoder_num デコーダの層の数. head_num Multi-Head Attentionレイヤの分割ヘッド数. hidden_dim 隠し層の次元数. embed_weights 特徴ベクトルの初期化. attention_activation Attentionレイヤの活性化関数. feed_forward_activation FFNレイヤの活性化関数. dropout_rate Dropoutのレート. use_same_embed エンコーダとデコーダで同じweightsを使用するか. embed_trainable 特徴ベクトルがトレーニング可能かどうか. trainable モデルがトレーニング可能かどうか. Returns ------- model 日本語学習済みのBERTの特徴ベクトルを用いたTransformerモデル """ return keras_transformer.get_model( token_num=token_num, embed_dim=embed_dim, encoder_num=encoder_num, decoder_num=decoder_num, head_num=head_num, hidden_dim=hidden_dim, embed_weights=embed_weights, attention_activation=attention_activation, feed_forward_activation=feed_forward_activation, dropout_rate=dropout_rate, use_same_embed=use_same_embed, embed_trainable=embed_trainable, trainable=trainable)
def test_translate(self): source_tokens = [ 'i need more power'.split(' '), 'eat jujube and pill'.split(' '), ] target_tokens = [ list('我要更多的抛瓦'), list('吃枣💊'), ] # Generate dictionaries source_token_dict = self._build_token_dict(source_tokens) target_token_dict = self._build_token_dict(target_tokens) target_token_dict_inv = {v: k for k, v in target_token_dict.items()} # Add special tokens encode_tokens = [['<START>'] + tokens + ['<END>'] for tokens in source_tokens] decode_tokens = [['<START>'] + tokens + ['<END>'] for tokens in target_tokens] output_tokens = [ tokens + ['<END>', '<PAD>'] for tokens in target_tokens ] # Padding source_max_len = max(map(len, encode_tokens)) target_max_len = max(map(len, decode_tokens)) encode_tokens = [ tokens + ['<PAD>'] * (source_max_len - len(tokens)) for tokens in encode_tokens ] decode_tokens = [ tokens + ['<PAD>'] * (target_max_len - len(tokens)) for tokens in decode_tokens ] output_tokens = [ tokens + ['<PAD>'] * (target_max_len - len(tokens)) for tokens in output_tokens ] encode_input = [ list(map(lambda x: source_token_dict[x], tokens)) for tokens in encode_tokens ] decode_input = [ list(map(lambda x: target_token_dict[x], tokens)) for tokens in decode_tokens ] decode_output = [ list(map(lambda x: [target_token_dict[x]], tokens)) for tokens in output_tokens ] # Build & fit model model = get_model( token_num=max(len(source_token_dict), len(target_token_dict)), embed_dim=32, encoder_num=2, decoder_num=2, head_num=4, hidden_dim=128, dropout_rate=0.05, use_same_embed= False, # Use different embeddings for different languages ) model.compile('adam', 'sparse_categorical_crossentropy') model.summary() model.fit( x=[np.array(encode_input * 1024), np.array(decode_input * 1024)], y=np.array(decode_output * 1024), epochs=10, batch_size=32, ) # Predict decoded = decode( model, encode_input, start_token=target_token_dict['<START>'], end_token=target_token_dict['<END>'], pad_token=target_token_dict['<PAD>'], ) for i in range(len(encode_input)): predicted = ''.join( map(lambda x: target_token_dict_inv[x], decoded[i][1:-1])) self.assertEqual(''.join(target_tokens[i]), predicted)
def main(lang, input_file, output_file): exclude = set(string.punctuation + string.digits) input_token_index = config_lang_tsf[lang.lower()]['input_token_index'] target_token_index = config_lang_tsf[lang.lower()]['target_token_index'] max_encoder_seq_length = config_lang_tsf[ lang.lower()]['max_encoder_seq_length'] params = config_lang_tsf[lang.lower()]['params'] target_max_len = 50 token_num = max(len(target_token_index), len(input_token_index)) model = get_model(token_num=token_num, embed_dim=params['embed_dim'], encoder_num=params['encoder_num'], decoder_num=params['decoder_num'], head_num=params['head_num'], hidden_dim=params['hidden_dim'], dropout_rate=params['dropout_rate'], use_same_embed=False, embed_weights=np.random.random( (token_num, params['embed_dim']))) model_path = 'models_transformer/' + lang.lower( ) + '_clean_28042020.csv_transformer.keras' model.load_weights(model_path) input_texts = [] with open(input_file, 'r', encoding='utf-8') as f: lines = f.readlines()[:] for line in lines: for wd in line.strip().split(): if wd not in input_texts: if all([ ch in input_token_index for ch in wd.lower() if ch not in exclude ]): s = ''.join(ch for ch in wd.lower() if ch not in exclude) if len(s): input_texts.append([x for x in s.lower().strip()]) reverse_input_char_index = dict( (i, char) for char, i in input_token_index.items()) reverse_target_char_index = dict( (i, char) for char, i in target_token_index.items()) test_encode_tokens = [['<START>'] + tokens + ['<END>'] for tokens in input_texts] test_encode_tokens = [ tokens + ['<PAD>'] * (50 - len(tokens)) for tokens in test_encode_tokens ] test_input = [ list(map(lambda x: input_token_index[x], tokens)) for tokens in test_encode_tokens ] print("predicting ...") decoded = {} for i in range(len(test_input)): int_decoded = [] prediction = decode(model, test_input[i], start_token=target_token_index['<START>'], end_token=target_token_index['<END>'], pad_token=target_token_index['<PAD>'], max_len=token_num + 2 + 5) wd = ''.join(input_texts[i]) for j in range(1, len(prediction)): if reverse_target_char_index[prediction[j]] in [ '<PAD>', '<END>', '<START>' ]: break else: int_decoded.append(prediction[j]) decoded[wd] = ' '.join( map(lambda x: reverse_target_char_index[x], int_decoded)) print(decoded) with open(output_file, 'w') as fout: for i in range(len(lines)): fout.write("%s|" % lines[i].strip()) for wd in lines[i].strip().lower().split(): wd_strip = ''.join(ch for ch in wd.lower() if ch not in exclude) if wd_strip in decoded: fout.write("[%s] " % decoded[wd_strip]) else: fout.write("[UNK] ") fout.write("\n") print('\n' + "*" * 20) print("DONE! Wrote %d lines to %s..." % (len(lines), output_file)) print("*" * 20 + '\n')
list(map(lambda x: target_token_dict[x], tokens)) for tokens in decode_tokens ] decode_output = [ list(map(lambda x: [target_token_dict[x]], tokens)) for tokens in output_tokens ] if EPOCHS > 0: # Build & fit model model = get_model( token_num=max(len(source_token_dict), len(target_token_dict)), embed_dim=EMBED_DIM, encoder_num=LAYERS, decoder_num=LAYERS, head_num=ATTN_HEADS, hidden_dim=HIDDEN_DIM, dropout_rate=DROPOUT, use_same_embed= False, # Use different embeddings for different languages ) model.compile('adam', 'sparse_categorical_crossentropy') model.summary() model.fit(x=[ np.array(encode_input * DATA_MULTIPLIER), np.array(decode_input * DATA_MULTIPLIER) ], y=np.array(decode_output * DATA_MULTIPLIER), epochs=EPOCHS,
] decode_input = [ list(map(lambda x: target_token_dict[x], tokens)) for tokens in decode_tokens ] decode_output = [ list(map(lambda x: [target_token_dict[x]], tokens)) for tokens in output_tokens ] # Build & fit model model = get_model( token_num=max(len(source_token_dict), len(target_token_dict)), embed_dim=32, encoder_num=2, decoder_num=2, head_num=4, hidden_dim=128, dropout_rate=0.05, use_same_embed=False, # Use different embeddings for different languages ) model.compile('adam', 'sparse_categorical_crossentropy') model.summary() print(encode_input[0], " : ", decode_input[0], " : ", decode_output[0]) model.fit( x=[np.array(encode_input * 1024), np.array(decode_input * 1024)], y=np.array(decode_output * 1024), epochs=5, batch_size=32, )
def train(self, train_file='/home/gswyhq/data/cmn-eng/cmn.txt'): source_tokens = [ 'i need more power'.split(' '), 'eat jujube and pill'.split(' '), ] target_tokens = [ list('我要更多的抛瓦'), list('吃枣💊'), ] with open(train_file) as f: for data in f.readlines(): if '\t' in data: source, target = data.strip().split('\t', maxsplit=1) source_tokens.append(source.split(' ')) target_tokens.append(list(target)) # Generate dictionaries source_token_dict = self._build_token_dict(source_tokens) target_token_dict = self._build_token_dict(target_tokens) target_token_dict_inv = {v: k for k, v in target_token_dict.items()} # Add special tokens encode_tokens = [['<START>'] + tokens + ['<END>'] for tokens in source_tokens] decode_tokens = [['<START>'] + tokens + ['<END>'] for tokens in target_tokens] output_tokens = [ tokens + ['<END>', '<PAD>'] for tokens in target_tokens ] # print('output_tokens: {}'.format(output_tokens)) # Padding # source_max_len = max(map(len, encode_tokens)) # target_max_len = max(map(len, decode_tokens)) print('source_max_len: {}; target_max_len: {}'.format( source_max_len, target_max_len)) # source_max_len: 34; target_max_len: 46 print("len(source_token_dict): {}, len(target_token_dict): {}".format( len(source_token_dict), len(target_token_dict)) ) # len(source_token_dict): 10814, len(target_token_dict): 3442 with open('./models/target_token_dict.pkl', 'wb') as f: pickle.dump(target_token_dict, f) with open('./models/source_token_dict.pkl', 'wb') as f: pickle.dump(source_token_dict, f) encode_tokens = [ tokens + ['<PAD>'] * (source_max_len - len(tokens)) for tokens in encode_tokens ] decode_tokens = [ tokens + ['<PAD>'] * (target_max_len - len(tokens)) for tokens in decode_tokens ] output_tokens = [ tokens + ['<PAD>'] * (target_max_len - len(tokens)) for tokens in output_tokens ] # print('output_tokens: {}'.format(output_tokens)) encode_input = [ list(map(lambda x: source_token_dict[x], tokens)) for tokens in encode_tokens ] decode_input = [ list(map(lambda x: target_token_dict[x], tokens)) for tokens in decode_tokens ] decode_output = [ list(map(lambda x: [target_token_dict[x]], tokens)) for tokens in output_tokens ] # print("decode_output: {}".format(decode_output)) # Build & fit model model = get_model( token_num=max(len(source_token_dict), len(target_token_dict)), embed_dim=32, encoder_num=2, decoder_num=2, head_num=4, hidden_dim=128, dropout_rate=0.05, use_same_embed= False, # Use different embeddings for different languages ) model.compile('adam', 'sparse_categorical_crossentropy') model.summary() early_stopping = EarlyStopping(monitor='loss', patience=3) model_checkpoint = ModelCheckpoint(filepath=os.path.join( './models', 'translate-{epoch:02d}-{loss:.4f}.hdf5'), save_best_only=False, save_weights_only=False) model.fit(x=[np.array(encode_input * 1), np.array(decode_input * 1)], y=np.array(decode_output * 1), epochs=10, batch_size=32, callbacks=[early_stopping, model_checkpoint]) model.save('./models/model.h5') # Predict encode_input = encode_input[:30] decoded = decode(model, encode_input, start_token=target_token_dict['<START>'], end_token=target_token_dict['<END>'], pad_token=target_token_dict['<PAD>'], max_repeat=len(encode_input), max_repeat_block=len(encode_input)) right_count = 0 error_count = 0 for i in range(len(encode_input)): predicted = ''.join( map(lambda x: target_token_dict_inv[x], decoded[i][1:-1])) print("原始结果:{},预测结果:{}".format(''.join(target_tokens[i]), predicted)) if ''.join(target_tokens[i]) == predicted: right_count += 1 else: error_count += 1 print("正确: {}, 错误:{}, 正确率: {}".format( right_count, error_count, right_count / (right_count + error_count + 0.001)))
def test_decode(self): tokens = 'all work and no play makes jack a dull boy'.split(' ') token_dict = { '<PAD>': 0, '<START>': 1, '<END>': 2, } for token in tokens: if token not in token_dict: token_dict[token] = len(token_dict) model = get_model( token_num=len(token_dict), embed_dim=32, encoder_num=3, decoder_num=2, head_num=4, hidden_dim=128, dropout_rate=0.05, ) model.compile( optimizer='adam', loss='sparse_categorical_crossentropy', ) model.summary() encoder_inputs_no_padding = [] encoder_inputs, decoder_inputs, decoder_outputs = [], [], [] for i in range(1, len(tokens)): encode_tokens, decode_tokens = tokens[:i], tokens[i:] encode_tokens = ['<START>'] + encode_tokens + [ '<END>' ] + ['<PAD>'] * (len(tokens) - len(encode_tokens)) output_tokens = decode_tokens + [ '<END>', '<PAD>' ] + ['<PAD>'] * (len(tokens) - len(decode_tokens)) decode_tokens = ['<START>'] + decode_tokens + [ '<END>' ] + ['<PAD>'] * (len(tokens) - len(decode_tokens)) encode_tokens = list(map(lambda x: token_dict[x], encode_tokens)) decode_tokens = list(map(lambda x: token_dict[x], decode_tokens)) output_tokens = list(map(lambda x: [token_dict[x]], output_tokens)) encoder_inputs_no_padding.append(encode_tokens[:i + 2]) encoder_inputs.append(encode_tokens) decoder_inputs.append(decode_tokens) decoder_outputs.append(output_tokens) current_path = os.path.dirname(os.path.abspath(__file__)) model_path = os.path.join(current_path, 'test_transformer.h5') if os.path.exists(model_path): model.load_weights(model_path, by_name=True) else: model.fit( x=[ np.asarray(encoder_inputs * 2048), np.asarray(decoder_inputs * 2048) ], y=np.asarray(decoder_outputs * 2048), epochs=10, batch_size=128, ) model.save(model_path) model = keras.models.load_model(model_path, custom_objects=get_custom_objects()) decoded = decode( model, encoder_inputs_no_padding * 2, start_token=token_dict['<START>'], end_token=token_dict['<END>'], pad_token=token_dict['<PAD>'], ) token_dict_rev = {v: k for k, v in token_dict.items()} for i in range(len(decoded)): print(' '.join(map(lambda x: token_dict_rev[x], decoded[i][1:-1]))) for i in range(len(decoded)): for j in range(len(decoded[i])): self.assertEqual(decoder_inputs[i % len(decoder_inputs)][j], decoded[i][j]) decoded = decode( model, encoder_inputs_no_padding[2] + [0] * 5, start_token=token_dict['<START>'], end_token=token_dict['<END>'], pad_token=token_dict['<PAD>'], ) for j in range(len(decoded)): self.assertEqual(decoder_inputs[2][j], decoded[j], decoded) decoded = decode( model, encoder_inputs_no_padding, start_token=token_dict['<START>'], end_token=token_dict['<END>'], pad_token=token_dict['<PAD>'], max_len=4, ) token_dict_rev = {v: k for k, v in token_dict.items()} for i in range(len(decoded)): print(' '.join(map(lambda x: token_dict_rev[x], decoded[i][1:-1]))) for i in range(len(decoded)): self.assertTrue(len(decoded[i]) <= 4, decoded[i]) for j in range(len(decoded[i])): self.assertEqual(decoder_inputs[i][j], decoded[i][j], decoded) decoded_top_5 = decode( model, encoder_inputs_no_padding, start_token=token_dict['<START>'], end_token=token_dict['<END>'], pad_token=token_dict['<PAD>'], max_len=4, top_k=5, temperature=1e-10, ) has_diff = False for i in range(len(decoded)): s1 = ' '.join(map(lambda x: token_dict_rev[x], decoded[i][1:-1])) s5 = ' '.join( map(lambda x: token_dict_rev[x], decoded_top_5[i][1:-1])) if s1 != s5: has_diff = True self.assertFalse(has_diff) decoded_top_5 = decode( model, encoder_inputs_no_padding, start_token=token_dict['<START>'], end_token=token_dict['<END>'], pad_token=token_dict['<PAD>'], max_len=4, top_k=5, ) has_diff = False for i in range(len(decoded)): s1 = ' '.join(map(lambda x: token_dict_rev[x], decoded[i][1:-1])) s5 = ' '.join( map(lambda x: token_dict_rev[x], decoded_top_5[i][1:-1])) if s1 != s5: has_diff = True self.assertTrue(has_diff)
] print(output_decoded[120000]) ### CREAR la red transformer ### # token_num numero maximo de palabras del diccionario de ingles y español 25269 # embed_dim emdeding de entrada 32 # encoder_num numero de encoders 6 # decoder_num numero de decodificadores 6 # head_num bloques atencionales busca las relaciones entre frases # hidden_dim capa oculta de 128 neuronas # dropout_rate desactivar 0.05 neuronas aleatoriamente para evitar overfitting model = get_model( token_num=max(len(source_token_dict), len(target_token_dict)), embed_dim=32, encoder_num=2, decoder_num=2, head_num=8, hidden_dim=128, dropout_rate=0.05, use_same_embed=False, ) model.compile('adam', 'sparse_categorical_crossentropy') # resumen del modelo model.summary() # Entrenamiento del modelo x = [np.array(encoder_input), np.array(decoder_input)] y = np.array(output_decoded) # Entrenar 30 epocas con grupos de 32 frases #model.fit(x,y, epochs=30, batch_size=32) # Guardar modelo
def train( use_checkpoint=True, initial_epoch=0, ): if use_checkpoint: transformer_model = keras_transformer.get_model( token_num=32000, embed_dim=768, encoder_num=4, decoder_num=4, head_num=8, hidden_dim=512, attention_activation='relu', feed_forward_activation='relu', dropout_rate=0.1, ) transformer_model.load_weights( './data/checkpoint/transformer_onbert_model-Adam4000-Dall.ckpt') else: bert_model = keras_bert.load_trained_model_from_checkpoint( checkpoint_file=checkpoint_file_path, config_file=config_file_path) bert_weights = bert_model.get_layer( name='Embedding-Token').get_weights()[0] transformer_model = keras_transformer.get_model( token_num=32000, embed_dim=768, encoder_num=4, decoder_num=4, head_num=8, hidden_dim=512, attention_activation='relu', feed_forward_activation='relu', dropout_rate=0.1, embed_weights=bert_weights, ) transformer_model.compile( optimizer=keras.optimizers.Adam(beta_2=0.98), # optimizer=keras.optimizers.SGD(), # optimizer='adam', loss=keras.losses.sparse_categorical_crossentropy, metrics=[keras.metrics.mae, keras.metrics.sparse_categorical_accuracy], ) transformer_model.summary() tb = keras.callbacks.TensorBoard( log_dir='./data/log-adam-4000-Dall-onbert/') try: history = transformer_model.fit_generator( generator=_generator(), steps_per_epoch=100, epochs=1000, validation_data=_generator(), validation_steps=20, callbacks=[ keras.callbacks.ModelCheckpoint( './data/checkpoint/transformer_onbert_model-Adam4000-Dall.ckpt', monitor='val_loss'), tb, keras.callbacks.LearningRateScheduler(_decay), PredictionCallback(generator_data[:2, 0], 30), ], initial_epoch=initial_epoch, ) except KeyboardInterrupt: tb.writer.close()
decoder_inputs.append(decode_tokens) # decoder_outputs.append(output_tokens) return np.asarray(encoder_inputs),np.asarray(decoder_inputs) seq1_input,seq2_input = gen_toy_data(s1s_train,s2s_train) seq1_input_dev,seq2_input_dev = gen_toy_data(s1s_dev,s2s_dev) seq1_input_test,seq2_input_test = gen_toy_data(s1s_test,s2s_test) # Build the model model = get_model( token_num = len(token_dict), embed_dim = 300, encoder_num = 3, decoder_num = 2, head_num = 6, hidden_dim = 256, attention_activation ='relu', feed_forward_activation ='relu', dropout_rate = 0.05, embed_weights = embed_matrix , embed_trainable = True ) def model_qa(): seq1_in = model.inputs[0] seq2_in = model.inputs[1] decode_layer = model.get_layer("Decoder-2-FeedForward-Norm").output final_rep = TimeDistributed(Dense(2, use_bias=False))(decode_layer) return Model(inputs=[seq1_in,seq2_in],outputs=final_rep) model_qa = model_qa()