コード例 #1
0
    def train(self, X):
        def sampling(args):
            z_mean, z_log_var = args
            batch = K.shape(z_mean)[0]
            dim = K.int_shape(z_mean)[1]
            epsilon = K.random_normal(shape=(batch, dim), seed=0)
            return z_mean + K.exp(0.5 * z_log_var) * epsilon

        input = []
        dims = []
        denses = []
        encoding_dim = self.n_components
        output = []
        for i in range(self.n):
            input.append(Input(shape=(self.shape[i], )))
            dims.append(int(encoding_dim * 1 / self.n))
        for i in range(self.n):
            denses.append(Dense(dims[i])(input[i]))
        if self.n > 1:
            merged_dense = concatenate(denses, axis=-1)
        else:
            merged_dense = denses[0]
        encoded = Dense(encoding_dim)(merged_dense)
        encoded = BatchNormalization()(encoded)
        encoded = Activation('relu')(encoded)
        z_mean = Dense(encoding_dim)(encoded)
        z_log_var = Dense(encoding_dim)(encoded)
        z = Lambda(sampling, output_shape=(encoding_dim, ),
                   name='z')([z_mean, z_log_var])
        model = Dense(self.n_components)(z)
        model = BatchNormalization()(model)
        model = Activation('relu')(model)
        for i in range(self.n):
            output.append(Dense(self.shape[i])(model))
        vae = Model(input, output)
        encoder = Model(input, z)
        kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
        kl_loss = K.sum(kl_loss, axis=-1)
        kl_loss *= -0.5 / np.sum(dims)
        k_mse_loss = 0
        for i in range(self.n):
            k_mse_loss += mse(input[i], output[i]) / self.n
        vae.add_loss(k_mse_loss)
        vae.add_metric(k_mse_loss, name='mse_loss')
        vae.add_loss(kl_loss)
        vae.add_metric(kl_loss, name='kl_loss')
        vae.compile(optimizer=Adam())
        print(vae.summary())
        h = vae.fit(X, epochs=self.epochs, verbose=2)
        log_file = "./mvae.log"
        fp = open(log_file, 'w')
        for hi in h.history['mse_loss']:
            fp.write("%f\n" % (hi))
        fp.close()
        return
コード例 #2
0
def fastbert(teacher, classifier, speed=speed):
    inputs = teacher.inputs
    # frozen layers
    for layer in teacher.model.layers:
        layer.trainable = False
    classifier.trainable = False

    x_pre = teacher.apply_embeddings(inputs)
    emb_name = 'FastBert-embedding'
    clf_pre = teacher.apply(x_pre,
                            FastbertClassifierLayer,
                            name=emb_name,
                            labels_num=num_classes)
    student_outputs = [clf_pre]
    outputs = [clf_pre, x_pre]

    for idx in range(teacher.num_hidden_layers):
        clf_pre, x_pre = outputs
        name = 'FastBert-%d' % idx
        x_next = teacher.apply_attention_layers(x_pre, idx)
        clf_next = teacher.apply(x_pre,
                                 FastbertClassifierLayer,
                                 name=name,
                                 labels_num=num_classes)
        student_outputs.append(clf_next)

        x = SwitchTwo(speed)([clf_pre, x_pre, x_next])
        clf = SwitchTwo(speed)([clf_pre, clf_pre, clf_next])
        outputs = [clf, x]

    clf_prob, x = outputs
    x = classifier(x)

    output = SwitchTwo(speed)([clf_prob, clf_prob, x])
    model_infer = Model(inputs, output)

    label_inputs = Input(shape=(None, ))
    model_train = Model(inputs + [label_inputs], student_outputs)

    for i, prob in enumerate(student_outputs):
        ce_loss = K.sparse_categorical_crossentropy(label_inputs, prob)
        kl_loss = kullback_leibler_divergence(x, prob)
        model_train.add_loss(ce_loss)
        model_train.add_metric(ce_loss, name='ce_loss-%d' % i)
        model_train.add_loss(kl_loss)
        model_train.add_metric(kl_loss, name='loss-%d' % i)

    model_1 = Model(inputs, student_outputs[1])
    model_2 = Model(inputs, student_outputs[2])

    return model_train, model_infer, model_1, model_2
コード例 #3
0
class GAHs_trans:
	def __init__(self, i_tokens, o_tokens, len_limit, d_model=256, \
			  d_inner_hid=512, n_head=4, layers=2, dropout=0.1, \
			  share_word_emb=False):
		self.i_tokens = i_tokens
		self.o_tokens = o_tokens
		self.len_limit = len_limit
		self.d_model = d_model
		self.decode_model = None
		self.readout_model = None
		self.layers = layers
		

		d_emb = d_model

		self.src_loc_info = True

		d_k = d_v = d_model // n_head
		assert d_k * n_head == d_model and d_v == d_k

		self.pos_emb = PosEncodingLayer(len_limit, d_emb) if self.src_loc_info else None

		self.emb_dropout = Dropout(dropout)

		self.i_word_emb = Embedding(i_tokens.num(), d_emb)
		if share_word_emb: 
			assert i_tokens.num() == o_tokens.num()
			self.o_word_emb = i_word_emb
		else: self.o_word_emb = Embedding(o_tokens.num(), d_emb)

		self.encoder = MultiLayerEncoder(d_model, d_inner_hid, n_head, layers, dropout)
		self.decoder = Decoder(d_model, d_inner_hid, n_head, layers, dropout)
		self.target_layer = TimeDistributed(Dense(o_tokens.num(), use_bias=False))


	def compile(self, optimizer='adam', active_layers=999, opt=None):
		src_seq_input = Input(shape=(None,), dtype='int32')
		tgt_seq_input = Input(shape=(None,), dtype='int32')

		# customized masks
		masks = [Input(shape=(self.len_limit,self.len_limit),dtype='float32') for i in range(len(opt.all_roles))]
		mask_comb = []
		for i in opt.sample_i:
			mask_comb.append(masks[i])

		src_seq = src_seq_input
		tgt_seq  = Lambda(lambda x:x[:,:-1])(tgt_seq_input)
		tgt_true = Lambda(lambda x:x[:,1:])(tgt_seq_input)

		src_emb = self.i_word_emb(src_seq)
		tgt_emb = self.o_word_emb(tgt_seq)

		if self.pos_emb: 
			src_emb = add_layer([src_emb, self.pos_emb(src_seq)])
			tgt_emb = add_layer([tgt_emb, self.pos_emb(tgt_seq)])
		src_emb = self.emb_dropout(src_emb)

		# customized masks added
		enc_output = self.encoder(src_emb, src_seq, active_layers=active_layers, masks = mask_comb)
		dec_output = self.decoder(tgt_emb, tgt_seq, src_seq, enc_output, active_layers=active_layers)	
		final_output = self.target_layer(dec_output)

		def get_loss(y_pred, y_true):
			y_true = tf.cast(y_true, 'int32')
			# loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_true, logits=y_pred)
			loss = tf.losses.sparse_categorical_crossentropy(y_true, y_pred, from_logits=True)
			mask = tf.cast(tf.not_equal(y_true, 0), 'float32')
			loss = tf.reduce_sum(loss * mask, -1) / tf.reduce_sum(mask, -1)
			loss = K.mean(loss)
			return loss

		def get_accu(y_pred, y_true):
			mask = tf.cast(tf.not_equal(y_true, 0), 'float32')	# [1,1,1,0,0,0]
			corr = K.cast(K.equal(K.cast(y_true, 'int32'), K.cast(K.argmax(y_pred, axis=-1), 'int32')), 'float32')
			corr = K.sum(corr * mask, -1) / K.sum(mask, -1)
			return K.mean(corr)
		
		# def bleu(target, pred):
		# 	mask = tf.cast(tf.not_equal(target, 0), 'float32')	# [1,1,1,0,0,0]
		# 	predicts = K.eval(K.cast(K.argmax(pred, axis=-1), 'int32'))
		# 	reference = K.eval(K.cast(target, 'int32'))
		# 	reference = [[x] for x in reference]

		# 	score = nltk.translate.bleu_score.corpus_bleu(reference, predicts)
		# 	#score = nltk.translate.bleu_score.corpus_bleu(target, pred, smoothing_function=smoothing.method4)
		# 	return score

		loss = get_loss(final_output, tgt_true)
		

		self.ppl = K.exp(loss)
		self.accu = get_accu(final_output, tgt_true)
		# self.bleu = bleu(tgt_true, final_output)
		# calculate BLEU score
		# print('BLEU-1: %f' % corpus_bleu(tgt_true, final_output, weights=(1.0, 0, 0, 0)))
		# print('BLEU-2: %f' % corpus_bleu(tgt_true, final_output, weights=(0.5, 0.5, 0, 0)))
		# print('BLEU-3: %f' % corpus_bleu(tgt_true, final_output, weights=(0.3, 0.3, 0.3, 0)))
		# print('BLEU-4: %f' % corpus_bleu(tgt_true, final_output, weights=(0.25, 0.25, 0.25, 0.25)))

		self.model = Model([src_seq_input, tgt_seq_input]+masks, final_output)
		self.model.add_loss([loss])
		# self.model.metrics_tensors = []	# added by me
		self.model.compile(optimizer, None)
		self.model.metrics_names.append('ppl')
		# self.model.metrics_tensors.append(self.ppl)
		self.model.add_metric(self.ppl, 'ppl')
		self.model.metrics_names.append('accu')
		self.model.add_metric(self.accu,'accu')
		# self.model.add_metric(self.bleu,'bleu')
		# self.model.metrics_tensors.append(self.accu)

	def make_src_seq_matrix(self, input_seqs):
		if type(input_seqs[0]) == type(''): input_seqs = [input_seqs]
		maxlen = max(map(len, input_seqs))
		src_seq = np.zeros((len(input_seqs), maxlen+3), dtype='int32')
		src_seq[:,0] = self.i_tokens.startid()
		for i, seq in enumerate(input_seqs):
			for ii, z in enumerate(seq):
				src_seq[i,1+ii] = self.i_tokens.id(z)
			src_seq[i,1+len(seq)] = self.i_tokens.endid()
		return src_seq
	
	def make_readout_decode_model(self, max_output_len=32):
		src_seq_input = Input(shape=(None,), dtype='int32')
		tgt_start_input = Input(shape=(1,), dtype='int32')
		src_seq = src_seq_input
		enc_mask = Lambda(lambda x:K.cast(K.greater(x, 0), 'float32'))(src_seq)
		src_emb = self.i_word_emb(src_seq)
		if self.pos_emb: 
			src_emb = add_layer([src_emb, self.pos_emb(src_seq)])

		src_emb = self.emb_dropout(src_emb)
		enc_output = self.encoder(src_emb, src_seq)

		tgt_emb = self.o_word_emb(tgt_start_input)
		tgt_seq = Lambda(lambda x:K.repeat_elements(x, max_output_len, 1))(tgt_start_input)
		rep_input = Lambda(lambda x:K.repeat_elements(x, max_output_len, 1))(tgt_emb)
	
		cell = ReadoutDecoderCell(self.o_word_emb, self.pos_emb, self.decoder, self.target_layer)
		final_output = InferRNN(cell, return_sequences=True)(rep_input, 
				initial_state=[tgt_start_input, K.ones_like(tgt_start_input), K.zeros_like(tgt_seq)] + \
						[rep_input for _ in self.decoder.layers], 
				constants=[enc_output, enc_mask])
		final_output = Lambda(lambda x:K.squeeze(x, -1))(final_output)
		self.readout_model = Model([src_seq_input, tgt_start_input], final_output)
		
	def decode_sequence_readout_x(self, X, batch_size=32, max_output_len=64):
		if self.readout_model is None: self.make_readout_decode_model(max_output_len)
		target_seq = np.zeros((X.shape[0], 1), dtype='int32')
		target_seq[:,0] = self.o_tokens.startid()
		ret = self.readout_model.predict([X, target_seq], batch_size=batch_size, verbose=1)
		return ret

	def generate_sentence(self, rets, delimiter=''):
		sents = []
		for x in rets:
			end_pos = min([i for i, z in enumerate(x) if z == self.o_tokens.endid()]+[len(x)])
			rsent = [*map(self.o_tokens.token, x)][:end_pos]
			sents.append(delimiter.join(rsent))
		return sents

	def decode_sequence_readout(self, input_seqs, delimiter=''):
		if self.readout_model is None: self.make_readout_decode_model()
		src_seq = self.make_src_seq_matrix(input_seqs)
		target_seq = np.zeros((src_seq.shape[0],1), dtype='int32')
		target_seq[:,0] = self.o_tokens.startid()
		rets = self.readout_model.predict([src_seq, target_seq])
		rets = self.generate_sentence(rets, delimiter)
		if type(input_seqs[0]) is type('') and len(rets) == 1: rets = rets[0]
		return rets

	def make_fast_decode_model(self):
		src_seq_input = Input(shape=(None,), dtype='int32')
		src_emb = self.i_word_emb(src_seq_input)
		if self.pos_emb: src_emb = add_layer([src_emb, self.pos_emb(src_seq_input)])
		src_emb = self.emb_dropout(src_emb)
		enc_output = self.encoder(src_emb, src_seq_input)
		self.encode_model = Model(src_seq_input, enc_output)

		self.decoder_pre_step = DecoderPerStep(self.decoder)
		
		src_seq_input = Input(shape=(None,), dtype='int32')
		tgt_one_input = Input(shape=(1,), dtype='int32')
		enc_ret_input = Input(shape=(None, self.d_model))
		dec_ret_inputs = [Input(shape=(None, self.d_model)) for _ in self.decoder.layers]

		tgt_pos = Lambda(lambda x:tf.shape(x)[1])(dec_ret_inputs[0])

		tgt_one = self.o_word_emb(tgt_one_input)
		if self.pos_emb: tgt_one = add_layer([tgt_one, self.pos_emb(tgt_pos, pos_input=True)])

		dec_outputs = self.decoder_pre_step([tgt_one, src_seq_input, enc_ret_input]+dec_ret_inputs)	
		final_output = self.target_layer(dec_outputs[-1])

		self.decode_model = Model([tgt_one_input, src_seq_input, enc_ret_input]+dec_ret_inputs, 
							dec_outputs[:-1]+[final_output])
		

	def decode_sequence_fast(self, input_seqs, batch_size=32, delimiter='', verbose=0):
		if self.decode_model is None: self.make_fast_decode_model()
		src_seq = self.make_src_seq_matrix(input_seqs)

		start_mark, end_mark = self.o_tokens.startid(), self.o_tokens.endid()
		max_len = self.len_limit
		encode_model = self.encode_model
		decode_model = self.decode_model

		decode_batch = lambda x: decode_batch_greedy(x, encode_model, decode_model, start_mark, end_mark, max_len)
		
		rets = []
		rng = range(0, src_seq.shape[0], batch_size)
		if verbose and src_seq.shape[0] > batch_size: rng = tqdm(rng, total=len(rng))
		for iter in rng:
			rets.extend( decode_batch(src_seq[iter:iter+batch_size]) )
			
		rets = [delimiter.join(list(map(self.o_tokens.token, ret))) for ret in rets]
		if type(input_seqs[0]) is type('') and len(rets) == 1: rets = rets[0]
		return rets

	def beam_search(self, input_seqs, topk=5, batch_size=8, length_penalty=1, delimiter='', verbose=0):
		if self.decode_model is None: self.make_fast_decode_model()
		src_seq = self.make_src_seq_matrix(input_seqs)

		start_mark, end_mark = self.o_tokens.startid(), self.o_tokens.endid()
		max_len = self.len_limit
		encode_model = self.encode_model
		decode_model = self.decode_model

		decode_batch = lambda x: decode_batch_beam_search(x, topk, encode_model, decode_model,
													start_mark, end_mark, max_len)
		
		rets = {}
		rng = range(0, src_seq.shape[0], batch_size)
		if verbose and src_seq.shape[0] > batch_size: rng = tqdm(rng, total=len(rng))

		for iter in rng:
			for i, x, y in decode_batch(src_seq[iter:iter+batch_size]):
				rets.setdefault(iter+i, []).append( (x, y/np.power(len(x)+1, length_penalty)) )
		rets = {x:sorted(ys,key=lambda x:x[-1], reverse=True) for x,ys in rets.items()}
		rets = [rets[i] for i in range(len(rets))]

		rets = [[(delimiter.join(list(map(self.o_tokens.token, x))), y) for x, y in r] for r in rets]
		if type(input_seqs[0]) is type('') and len(rets) == 1: rets = rets[0]
		return rets
コード例 #4
0
ファイル: wgan_div.py プロジェクト: withanageyasiru/gan_test
d_loss = K.mean(x_real_score - x_fake_score)

real_grad = K.gradients(x_real_score, [x_real])[0]
fake_grad = K.gradients(x_fake_score, [x_fake])[0]

real_grad_norm = K.sum(real_grad**2, axis=[1, 2, 3])**(p / 2)
fake_grad_norm = K.sum(fake_grad**2, axis=[1, 2, 3])**(p / 2)
grad_loss = K.mean(real_grad_norm + fake_grad_norm) * k / 2

w_dist = K.mean(x_fake_score - x_real_score)

d_train_model.add_loss(d_loss + grad_loss)
d_train_model.compile(optimizer=Adam(2e-4, 0.5))
#d_train_model.metrics_names.append('w_dist')
#d_train_model.metrics_tensors.append(w_dist)
d_train_model.add_metric(w_dist, 'w_dist')  # 自定义的 metrics

# 整合模型(训练生成器)
g_model.trainable = True
d_model.trainable = False

x_fake = g_model(z_in)
x_fake_score = d_model(x_fake)

g_train_model = Model(z_in, x_fake_score)

g_loss = K.mean(x_fake_score)
g_train_model.add_loss(g_loss)
g_train_model.compile(optimizer=Adam(2e-4, 0.5))

# 检查模型结构
コード例 #5
0
def Vae_MNIST_NN1(input_tensor=None, train=False):
    np.random.seed(0)
    # MNIST dataset
    image_size = 28
    if train:
        (x_train, y_train), (x_test, y_test) = mnist.load_data()
        x_train = np.reshape(x_train, [-1, image_size, image_size, 1])
        x_test = np.reshape(x_test, [-1, image_size, image_size, 1])
        x_train = x_train.astype('float32') / 255
        x_test = x_test.astype('float32') / 255

        # network parameters
        input_shape = (image_size, image_size, 1)
        input_tensor = Input(shape=input_shape)

        batch_size = 128
        epochs = 50

    elif input_tensor is None:
        print('you have to proved input_tensor when testing')
        exit()

    latent_dim = 200
    intermediate_dims = np.array([400])

    # VAE model = encoder + decoder
    # build encoder model
    original_dim = image_size * image_size
    inputs = Reshape((original_dim,), name='encoder_input')(input_tensor)
    x = Dense(intermediate_dims[0], activation='relu')(inputs)
    for i in range(intermediate_dims.shape[0]):
        if i != 0:
            x = Dense(intermediate_dims[i], activation='relu')(x)
    z_mean = Dense(latent_dim, name='z_mean')(x)
    z_log_var = Dense(latent_dim, name='z_log_var')(x)

    # use reparameterization trick to push the sampling out as input
    # note that "output_shape" isn't necessary with the TensorFlow backend
    z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])

    # instantiate encoder model
    encoder = Model(input_tensor, [z_mean, z_log_var, z], name='encoder')
    #encoder.summary()

    # build decoder model
    intermediate_dims = np.flipud(intermediate_dims)
    latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
    x = Dense(intermediate_dims[0], activation='relu')(latent_inputs)
    for i in range(intermediate_dims.shape[0]):
        if i != 0:
            x = Dense(intermediate_dims[i], activation='relu')(x)
    pos_mean = Dense(original_dim, name='pos_mean')(x)
    pos_log_var = Dense(original_dim, name='pos_log_var')(x)

    # instantiate decoder model
    decoder = Model(latent_inputs, [pos_mean, pos_log_var], name='decoder')
    #decoder.summary()

    # instantiate VAE model
    outputs = decoder(encoder(input_tensor)[2])
    vae = Model(input_tensor, outputs, name='vae_mlp')
    #vae.summary()
    if train:
        # VAE loss = reconstruction_loss + kl_loss
        loss_a = float(np.log(2 * np.pi)) + outputs[1]
        loss_m = K.square(outputs[0] - inputs) / K.exp(outputs[1])
        reconstruction_loss = -0.5 * K.sum((loss_a + loss_m), axis=-1)

        kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
        kl_loss = K.sum(kl_loss, axis=-1)
        kl_loss *= -0.5
        vae_loss = K.mean(-reconstruction_loss + kl_loss)
        vae.add_loss(vae_loss)
        vae.compile(optimizer="adam")
        vae.summary()
        vae.add_metric(reconstruction_loss, "reconstruct")
        vae.add_metric(kl_loss, "kl")
        vae.fit(x_train, epochs=epochs, batch_size=batch_size, validation_data=(x_test, None))
        # save model
        vae.save_weights('./vae_mnist_nn1.h5')
    else:
        vae.load_weights('./vae_mnist_nn1.h5')

    return vae
コード例 #6
0
    x = K.l2_normalize(x, 1)
    y = K.l2_normalize(y, 1)
    return K.sum(x * y, 1, keepdims=True)


t1_loss = z_real_mean - z_fake_ng_mean
t2_loss = z_fake_mean - z_fake_ng_mean
z_corr = correlation(z_in, z_fake)
qp_loss = 0.25 * t1_loss[:, 0]**2 / K.mean(
    (x_real - x_fake_ng)**2, axis=[1, 2, 3])

train_model.add_loss(K.mean(t1_loss + t2_loss - 1. * z_corr) + K.mean(qp_loss))
train_model.compile(optimizer=RMSprop(1e-4, 0.99))
#train_model.metrics_names.append('t_loss')
#train_model.metrics_tensors.append(K.mean(t1_loss))
train_model.add_metric(K.mean(t1_loss), 't_loss')
#train_model.metrics_names.append('z_corr')
#train_model.metrics_tensors.append(K.mean(z_corr))
train_model.add_metric(K.mean(z_corr), 'z_loss')

# 检查模型结构
train_model.summary()


class ExponentialMovingAverage:
    """对模型权重进行指数滑动平均。
    用法:在model.compile之后、第一次训练之前使用;
    先初始化对象,然后执行inject方法。
    """
    def __init__(self, model, momentum=0.9999):
        self.momentum = momentum
コード例 #7
0
def get_model(num_users,
              num_items,
              layers=None,
              reg_layers=None,
              fake_layers=None,
              fake_reg_layers=None,
              last_activation='sigmoid',
              fake_last_activation='sigmoid'):
    if reg_layers is None:
        reg_layers = [0, 0]
    if layers is None:
        layers = [20, 10]
    if fake_reg_layers is None:
        fake_reg_layers = [0, 0]
    if fake_layers is None:
        fake_layers = [20, 10]
    assert len(layers) == len(reg_layers)
    assert len(fake_layers) == len(fake_reg_layers)
    num_layer = len(layers)  # Number of layers in the MLP
    fake_num_layer = len(layers)

    # Input variables
    fake_user_input = Input(shape=(1, ), dtype='int32', name='fake_user_input')
    user_input = Input(shape=(1, ), dtype='int32', name='user_input')
    item_input = Input(shape=(1, ), dtype='int32', name='item_input')
    rating_output = Input(shape=(1, ), dtype='float32', name='rating_output')

    MLP_Embedding_Fake_User = Embedding(input_dim=num_users,
                                        output_dim=layers[0] // 2,
                                        name='fake_user_embedding',
                                        embeddings_initializer='random_normal',
                                        embeddings_regularizer=l2(
                                            reg_layers[0]),
                                        input_length=1)
    MLP_Embedding_User = Embedding(input_dim=num_users,
                                   output_dim=layers[0] // 2,
                                   name='user_embedding',
                                   embeddings_initializer='random_normal',
                                   embeddings_regularizer=l2(reg_layers[0]),
                                   input_length=1)
    MLP_Embedding_Item = Embedding(input_dim=num_items,
                                   output_dim=layers[0] // 2,
                                   name='item_embedding',
                                   embeddings_initializer='random_normal',
                                   embeddings_regularizer=l2(reg_layers[0]),
                                   input_length=1)

    # Crucial to flatten an embedding vector!
    fake_user_latent = Flatten()(MLP_Embedding_Fake_User(fake_user_input))
    user_latent = Flatten()(MLP_Embedding_User(user_input))
    item_latent = Flatten()(MLP_Embedding_Item(item_input))

    # The 0-th layer is the concatenation of embedding layers
    # vector = merge([user_latent, item_latent], mode = 'concat')
    vector = merge.concatenate([user_latent, item_latent])

    # MLP layers
    for idx in range(1, num_layer):
        layer = Dense(layers[idx],
                      kernel_regularizer=l2(reg_layers[idx]),
                      activation='relu',
                      name='layer%d' % idx)
        vector = layer(vector)

    # Final prediction layer
    prediction = Dense(1,
                       activation=last_activation,
                       kernel_initializer='lecun_uniform',
                       name='prediction')(vector)

    fake_vector = merge.concatenate([fake_user_latent, item_latent])
    for idx in range(1, fake_num_layer):
        layer = Dense(fake_layers[idx],
                      kernel_regularizer=l2(fake_reg_layers[idx]),
                      activation='relu',
                      name='fake_layer%d' % idx)
        fake_vector = layer(fake_vector)

    fake_prediction = Dense(1,
                            activation=fake_last_activation,
                            kernel_initializer='lecun_uniform',
                            name='fake_prediction')(fake_vector)

    model = Model(
        inputs=[fake_user_input, user_input, item_input, rating_output],
        outputs=prediction)

    loss = K.mean(
        K.square(prediction - fake_prediction) +
        K.square(rating_output - prediction))
    model.add_loss(loss)
    model.add_metric(loss, name='loss')
    model.add_metric(K.mean(K.abs(prediction - rating_output)), name='mae')
    model.add_metric(K.sqrt(K.mean(K.square(prediction - rating_output))),
                     name='rmse')
    return model