def test_load_empty_memory(self): current_path = os.path.dirname(os.path.abspath(__file__)) checkpoint_path = os.path.join(current_path, 'test_checkpoint_empty') model = load_trained_model_from_checkpoint( config_path=os.path.join(checkpoint_path, 'xlnet_config.json'), checkpoint_path=os.path.join(checkpoint_path, 'xlnet_model.ckpt'), batch_size=2, memory_len=5, target_len=5, in_train_phase=False, mask_index=0, attention_type=ATTENTION_TYPE_UNI, ) model_path = os.path.join(tempfile.gettempdir(), 'test_xlnet_%f.h5' % np.random.random()) model.save(model_path) model = keras.models.load_model(model_path, custom_objects=get_custom_objects()) model.summary() def _load_numpy(name): return np.load(os.path.join(checkpoint_path, name + '.npy')) input_ids = _load_numpy('input_ids') seg_ids = _load_numpy('seg_ids') tune_output = _load_numpy('empty_output') inputs = [input_ids, seg_ids, np.zeros((2, 1))] output = model.predict_on_batch(inputs) self.assertTrue(np.allclose(tune_output, output, atol=1e-6))
def test_build_not_training(self): model = build_xlnet( units=6, training=False, num_token=31, num_block=2, num_head=2, hidden_dim=12, batch_size=2, memory_len=5, target_len=5, dropout=0.1, attention_dropout=0.1, attention_type=ATTENTION_TYPE_BI, ) model_path = os.path.join(tempfile.gettempdir(), 'test_xlnet_%f.h5' % np.random.random()) model.save(model_path) model = keras.models.load_model(model_path, custom_objects=get_custom_objects()) model.summary() try: current_path = os.path.dirname(os.path.abspath(__file__)) visual_path = os.path.join(current_path, 'test_build_not_training.jpg') keras.utils.vis_utils.plot_model(model, visual_path, show_shapes=True) except Exception as e: pass
def __init__(self, gpu_name, gpu_num, seq_max_len, batch_size): print('--' * 10 + ' Load xlnet model start ' + '--' * 10) gpu_option(gpu_name, gpu_num) self.seq_max_len = seq_max_len # same to train self.batch_size = batch_size spiece_model = 'models/Xlnet/xlnet_model/spiece.model' self.tokenizer = Tokenizer(spiece_model) MODEL_SAVE_PATH = 'models/Xlnet/fine_tune_model/xlnet_fine_tune.hdf5' model = load_model(MODEL_SAVE_PATH, custom_objects=get_custom_objects(), compile=False) if gpu_num >= 2: self.par_model = multi_gpu_model(model, gpus=gpu_num) else: self.par_model = model print('--' * 10 + ' Load xlnet model end ' + '--' * 10)
def test_load_training(self): current_path = os.path.dirname(os.path.abspath(__file__)) checkpoint_path = os.path.join(current_path, 'test_checkpoint_pre') model = load_trained_model_from_checkpoint( config_path=os.path.join(checkpoint_path, 'xlnet_config.json'), checkpoint_path=os.path.join(checkpoint_path, 'xlnet_model.ckpt'), batch_size=2, memory_len=5, target_len=5, in_train_phase=True, mask_index=0, attention_type=ATTENTION_TYPE_UNI, ) model_path = os.path.join(tempfile.gettempdir(), 'test_xlnet_%f.h5' % np.random.random()) model.save(model_path) model = keras.models.load_model(model_path, custom_objects=get_custom_objects()) model.summary() def _load_numpy(name): return np.load(os.path.join(checkpoint_path, name + '.npy')) input_ids = _load_numpy('input_ids') seg_ids = _load_numpy('seg_ids') input_mask = _load_numpy('input_mask') mems_0 = _load_numpy('mems_0') mems_1 = _load_numpy('mems_1') pre_output = _load_numpy('pre_output') pre_new_mems_0 = _load_numpy('pre_new_mems_0') pre_new_mems_1 = _load_numpy('pre_new_mems_1') inputs = [input_ids, seg_ids, np.ones((2, 1)) * 5, input_mask] self._update_memory(model, 'Memory-0', mems_0) self._update_memory(model, 'Memory-1', mems_1) output = model.predict_on_batch(inputs) self.assertTrue(np.allclose(pre_new_mems_0, self._get_memory(model, 'Memory-0', 5), atol=1e-6)) self.assertTrue(np.allclose(pre_new_mems_1, self._get_memory(model, 'Memory-1', 5), atol=1e-6)) self.assertTrue(np.allclose(pre_output, output, atol=1e-6))
target_len=seq_max_len, batch_size=batch_size, mask_index=0) # MODEL结构 x1_in = Input(shape=(None, )) x2_in = Input(shape=(None, )) x3_in = Input(shape=(None, )) x4_in = Input(shape=(None, )) x = xlnet_model([x1_in, x2_in, x3_in, x4_in]) x = Lambda(function=lambda x: x[:, 0])(x) # !!!!!! p = Dense(1, activation='sigmoid')(x) model = Model([x1_in, x2_in, x3_in, x4_in], p) else: model = load_model(MODEL_SAVE_PATH, custom_objects=get_custom_objects(), compile=False) par_model = multi_gpu_model(model, gpus=gpu_num) par_model.compile( loss='binary_crossentropy', optimizer=Adam(1e-5), # 用足够小的学习率 metrics=['accuracy']) train_D = data_generator(train_data, batch_size) valid_D = data_generator(valid_data, batch_size) class ParallelModelCheckpoint(ModelCheckpoint): def __init__(self, model,