def DIFM(linear_feature_columns, dnn_feature_columns, att_embedding_size=8, att_head_num=8, att_res=True, dnn_hidden_units=(256, 128, 64), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary'): """Instantiates the DIFM Network architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param att_embedding_size: integer, the embedding size in multi-head self-attention network. :param att_head_num: int. The head number in multi-head self-attention network. :param att_res: bool. Whether or not use standard residual connections before output. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ if not len(dnn_hidden_units) > 0: raise ValueError("dnn_hidden_units is null!") features = build_input_features(linear_feature_columns + dnn_feature_columns) sparse_feat_num = len( list( filter( lambda x: isinstance(x, SparseFeat) or isinstance( x, VarLenSparseFeat), dnn_feature_columns))) inputs_list = list(features.values()) sparse_embedding_list, _ = input_from_feature_columns( features, dnn_feature_columns, l2_reg_embedding, seed) if not len(sparse_embedding_list) > 0: raise ValueError("there are no sparse features") att_input = concat_func(sparse_embedding_list, axis=1) att_out = InteractingLayer(att_embedding_size, att_head_num, att_res, scaling=True)(att_input) att_out = Flatten()(att_out) m_vec = Dense(sparse_feat_num, use_bias=False)(att_out) dnn_input = combined_dnn_input(sparse_embedding_list, []) dnn_output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed=seed)(dnn_input) m_bit = Dense(sparse_feat_num, use_bias=False)(dnn_output) input_aware_factor = add_func([m_vec, m_bit ]) # the complete input-aware factor m_x linear_logit = get_linear_logit( features, linear_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear, sparse_feat_refine_weight=input_aware_factor) fm_input = concat_func(sparse_embedding_list, axis=1) refined_fm_input = Lambda(lambda x: x[0] * tf.expand_dims(x[1], axis=-1))( [fm_input, input_aware_factor]) fm_logit = FM()(refined_fm_input) final_logit = add_func([linear_logit, fm_logit]) output = PredictionLayer(task)(final_logit) model = Model(inputs=inputs_list, outputs=output) return model
def build_3d_cnn(w, h, d, s, num_outputs): #Credit: https://github.com/jessecha/DNRacing/blob/master/3D_CNN_Model/model.py ''' w : width h : height d : depth s : n_stacked ''' input_shape = (s, h, w, d) model = Sequential() #First layer model.add( Cropping3D(cropping=((0, 0), (50, 10), (0, 0)), input_shape=input_shape)) #trim pixels off top # Second layer model.add( Conv3D(filters=16, kernel_size=(3, 3, 3), strides=(1, 3, 3), data_format='channels_last', padding='same')) model.add(Activation('relu')) model.add( MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), padding='valid', data_format=None)) # Third layer model.add( Conv3D(filters=32, kernel_size=(3, 3, 3), strides=(1, 1, 1), data_format='channels_last', padding='same')) model.add(Activation('relu')) model.add( MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), padding='valid', data_format=None)) # Fourth layer model.add( Conv3D(filters=64, kernel_size=(3, 3, 3), strides=(1, 1, 1), data_format='channels_last', padding='same')) model.add(Activation('relu')) model.add( MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), padding='valid', data_format=None)) # Fifth layer model.add( Conv3D(filters=128, kernel_size=(3, 3, 3), strides=(1, 1, 1), data_format='channels_last', padding='same')) model.add(Activation('relu')) model.add( MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), padding='valid', data_format=None)) # Fully connected layer model.add(Flatten()) model.add(Dense(256)) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(256)) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(num_outputs)) #model.add(Activation('tanh')) return model
# Second convolutional layer with ReLU-activation and max-pooling. model.add( Conv2D(kernel_size=5, strides=1, filters=36, padding='same', activation='relu', name='layer_conv2')) model.add(MaxPooling2D(pool_size=2, strides=2)) # Flatten the 4-rank output of the convolutional layers # to 2-rank that can be input to a fully-connected / dense layer. model.add(Flatten()) # First fully-connected / dense layer with ReLU-activation. model.add(Dense(128, activation='relu')) # Last fully-connected / dense layer with softmax-activation # for use in classification. model.add(Dense(num_classes, activation='softmax')) optimizer = Adam(lr=1e-3) model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) model.fit(x=data.train.images, y=data.train.labels, epochs=1, batch_size=128) result = model.evaluate(x=data.test.images, y=data.test.labels) for name, value in zip(model.metrics_names, result): print(name, value)
from tensorflow.python.keras.datasets import imdb from tensorflow.python.keras import preprocessing from tensorflow.python.keras.models import Sequential from tensorflow.python.keras.layers import Flatten, Dense, Embedding max_features = 1000 maxlen = 20 (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) x_train = preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen) x_test = preprocessing.sequence.pad_sequences(x_test, maxlen=maxlen) # Define model model = Sequential() model.add(Embedding(10000, 8, input_length=maxlen)) model.add(Flatten()) model.add(Dense(1, activation='sigmoid')) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) model.summary() history = model.fit(x_train, y_train, epochs=10, batch_size=32, validation_split=0.2) print(history.history) print('val accuracy: {}'.format(history.history['val_acc']))
def default_categorical(input_shape=(120, 160, 3), roi_crop=(0, 0)): opt = keras.optimizers.Adam() drop = 0.2 img_in = Input( shape=input_shape, name='img_in' ) # First layer, input layer, Shape comes from camera.py resolution, RGB x = img_in x = Cropping2D(cropping=(roi_crop, (0, 0)))( x) #trim configured pixels off top and bottom #x = Lambda(lambda x: x/127.5 - 1.)(x) # normalize and re-center x = BatchNormalization()(x) x = Convolution2D( 24, (5, 5), strides=(2, 2), activation='relu', name="conv2d_1" )( x ) # 24 features, 5 pixel x 5 pixel kernel (convolution, feauture) window, 2wx2h stride, relu activation x = Dropout(drop)( x ) # Randomly drop out (turn off) 10% of the neurons (Prevent overfitting) x = Convolution2D( 32, (5, 5), strides=(2, 2), activation='relu', name="conv2d_2" )(x) # 32 features, 5px5p kernel window, 2wx2h stride, relu activatiion x = Dropout(drop)( x ) # Randomly drop out (turn off) 10% of the neurons (Prevent overfitting) if input_shape[0] > 32: x = Convolution2D( 64, (5, 5), strides=(2, 2), activation='relu', name="conv2d_3")( x) # 64 features, 5px5p kernal window, 2wx2h stride, relu else: x = Convolution2D( 64, (3, 3), strides=(1, 1), activation='relu', name="conv2d_3")( x) # 64 features, 5px5p kernal window, 2wx2h stride, relu if input_shape[0] > 64: x = Convolution2D( 64, (3, 3), strides=(2, 2), activation='relu', name="conv2d_4")( x) # 64 features, 3px3p kernal window, 2wx2h stride, relu elif input_shape[0] > 32: x = Convolution2D( 64, (3, 3), strides=(1, 1), activation='relu', name="conv2d_4")( x) # 64 features, 3px3p kernal window, 2wx2h stride, relu x = Dropout(drop)( x ) # Randomly drop out (turn off) 10% of the neurons (Prevent overfitting) x = Convolution2D( 64, (3, 3), strides=(1, 1), activation='relu', name="conv2d_5")( x) # 64 features, 3px3p kernal window, 1wx1h stride, relu x = Dropout(drop)( x ) # Randomly drop out (turn off) 10% of the neurons (Prevent overfitting) # Possibly add MaxPooling (will make it less sensitive to position in image). Camera angle fixed, so may not to be needed x = Flatten(name='flattened')(x) # Flatten to 1D (Fully connected) x = Dense(100, activation='relu', name="fc_1")( x) # Classify the data into 100 features, make all negatives 0 x = Dropout(drop)( x ) # Randomly drop out (turn off) 10% of the neurons (Prevent overfitting) x = Dense(50, activation='relu', name="fc_2")( x) # Classify the data into 50 features, make all negatives 0 x = Dropout(drop)( x) # Randomly drop out 10% of the neurons (Prevent overfitting) #categorical output of the angle angle_out = Dense(15, activation='softmax', name='angle_out')( x ) # Connect every input with every output and output 15 hidden units. Use Softmax to give percentage. 15 categories and find best one based off percentage 0.0-1.0 #continous output of throttle throttle_out = Dense(20, activation='softmax', name='throttle_out')( x) # Reduce to 1 number, Positive number only model = Model(inputs=[img_in], outputs=[angle_out, throttle_out]) return model
model.add(Conv2D(layer_size, (3,3), input_shape = x.shape[1:])) model.add(Activation("relu")) model.add(MaxPooling2D(pool_size=(2,2))) model.add(Dropout(0.2)) for l in range(conv_layer-1): model.add(Conv2D(layer_size, (3,3))) model.add(Activation("relu")) model.add(Conv2D(layer_size, (3,3))) model.add(Activation("relu")) model.add(MaxPooling2D(pool_size=(2,2))) model.add(Dropout(0.2)) model.add(Flatten()) for l in range(dense_layer): model.add(Dense(layer_size)) model.add(Activation("relu")) model.add(Dropout(0.2)) model.add(Dense(1)) model.add(Activation('sigmoid')) model.compile(loss="binary_crossentropy", optimizer="adam", metrics=['accuracy']) history = model.fit(x, y, batch_size=32, epochs=epochs, validation_split=0.1) acc = history.history['acc'] val_acc = history.history['val_acc'] loss = history.history['loss'] val_loss = history.history['val_loss']
1), ytest.reshape(-1, 1) print(xtrain.shape, xval.shape, xtest.shape, sep=sp) print(ytrain.shape, yval.shape, ytest.shape, sep=sp) # build the model model = Sequential() model.add(LSTM(50, return_sequences=True, input_shape=(len(xtrain[0]), 1))) # model.add(Dropout(0.2)) # model.add(LSTM(150, return_sequences=True)) # model.add(Dropout(0.2)) model.add(LSTM(5)) model.add(Dense(1, activation='linear')) model.compile(loss='mse', optimizer='Adam') model_history = model.fit(xtrain, ytrain, epochs=30, batch_size=100, verbose=1, validation_data=(xval, yval), shuffle=False) lossValues = pd.DataFrame(model.history.history) lossValues = lossValues.rename( { 'val_loss': 'ValidationLoss', 'val_acc': 'Val_Accuray',
not_in_model = 0 in_model = 0 for word, i in word_index.items(): if unicode(word) in w2v_model: in_model += 1 embedding_matrix[i] = np.asarray(w2v_model[unicode(word)], dtype='float32') else: not_in_model += 1 print(str(not_in_model)+' words not in w2v model') embedding_layer = Embedding(len(word_index) + 1, EMBEDDING_DIM, weights=[embedding_matrix], input_length=MAX_SEQUENCE_LENGTH, trainable=False) print('(5) training model...') model = Sequential() model.add(embedding_layer) model.add(LSTM(200, dropout=0.2, recurrent_dropout=0.2)) model.add(Dropout(0.2)) model.add(Dense(labels.shape[1], activation='softmax')) model.summary() plot_model(model, to_file=os.path.join(ckpt_path, 'word_vector_lstm_model.png'), show_shapes=True) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['acc']) print(model.metrics_names) model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=2, batch_size=128) model.save(os.path.join(ckpt_path, 'word_vector_lstm.h5')) print('(6) testing model...') print(model.evaluate(x_test, y_test))
def DIEN(feature_dim_dict, seq_feature_list, embedding_size=8, hist_len_max=16, gru_type="GRU", use_negsampling=False, alpha=1.0, use_bn=False, dnn_hidden_units=(200, 80), dnn_activation='relu', att_hidden_units=(64, 16), att_activation="dice", att_weight_normalization=True, l2_reg_dnn=0, l2_reg_embedding=1e-5, dnn_dropout=0, init_std=0.0001, seed=1024, task='binary'): """Instantiates the Deep Interest Evolution Network architecture. :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]} :param seq_feature_list: list,to indicate sequence sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]`` :param embedding_size: positive integer,sparse feature embedding_size. :param hist_len_max: positive int, to indicate the max length of seq input :param gru_type: str,can be GRU AIGRU AUGRU AGRU :param use_negsampling: bool, whether or not use negtive sampling :param alpha: float ,weight of auxiliary_loss :param use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param dnn_activation: Activation function to use in DNN :param att_hidden_units: list,list of positive integer , the layer number and units in each layer of attention net :param att_activation: Activation function to use in attention net :param att_weight_normalization: bool.Whether normalize the attention score of local activation unit. :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ check_feature_config_dict(feature_dim_dict) sparse_input, dense_input, user_behavior_input, user_behavior_length = get_input( feature_dim_dict, seq_feature_list, hist_len_max) sparse_embedding_dict = {feat.name: Embedding(feat.dimension, embedding_size, embeddings_initializer=RandomNormal( mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2( l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name) for i, feat in enumerate(feature_dim_dict["sparse"])} query_emb_list = get_embedding_vec_list(sparse_embedding_dict,sparse_input,feature_dim_dict["sparse"],return_feat_list=seq_feature_list) keys_emb_list = get_embedding_vec_list(sparse_embedding_dict,user_behavior_input,feature_dim_dict['sparse'],return_feat_list=seq_feature_list) deep_input_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, feature_dim_dict['sparse']) query_emb = concat_fun(query_emb_list) keys_emb = concat_fun(keys_emb_list) deep_input_emb = concat_fun(deep_input_emb_list) if use_negsampling: neg_user_behavior_input = OrderedDict() for i, feat in enumerate(seq_feature_list): neg_user_behavior_input[feat] = Input(shape=(hist_len_max,), name='neg_seq_' + str(i) + '-' + feat) neg_uiseq_embed_list = get_embedding_vec_list(sparse_embedding_dict,neg_user_behavior_input,feature_dim_dict["sparse"],seq_feature_list,) # [sparse_embedding_dict[feat]( # neg_user_behavior_input[feat]) for feat in seq_feature_list] neg_concat_behavior = concat_fun(neg_uiseq_embed_list) else: neg_concat_behavior = None hist, aux_loss_1 = interest_evolution(keys_emb, query_emb, user_behavior_length, gru_type=gru_type, use_neg=use_negsampling, neg_concat_behavior=neg_concat_behavior, embedding_size=embedding_size, att_hidden_size=att_hidden_units, att_activation=att_activation, att_weight_normalization=att_weight_normalization, ) deep_input_emb = Concatenate()([deep_input_emb, hist]) deep_input_emb = tf.keras.layers.Flatten()(deep_input_emb) if len(dense_input) > 0: deep_input_emb = Concatenate()( [deep_input_emb] + list(dense_input.values())) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, use_bn, seed)(deep_input_emb) final_logit = Dense(1, use_bias=False)(output) output = PredictionLayer(task)(final_logit) model_input_list = get_inputs_list( [sparse_input, dense_input, user_behavior_input]) if use_negsampling: model_input_list += list(neg_user_behavior_input.values()) model_input_list += [user_behavior_length] model = tf.keras.models.Model(inputs=model_input_list, outputs=output) if use_negsampling: model.add_loss(alpha * aux_loss_1) tf.keras.backend.get_session().run(tf.global_variables_initializer()) return model
return_state=True, dropout=0.4, recurrent_dropout=0.2) decoder_outputs, decoder_fwd_state, decoder_back_state = decoder_lstm( dec_emb, initial_state=[state_h, state_c]) # Attention layer attn_layer = AttentionLayer(name='attention_layer') attn_out, attn_states = attn_layer([encoder_outputs, decoder_outputs]) # Concat attention input and decoder LSTM output decoder_concat_input = Concatenate( axis=-1, name='concat_layer')([decoder_outputs, attn_out]) #dense layer decoder_dense = TimeDistributed(Dense(y_vocab, activation='softmax')) decoder_outputs = decoder_dense(decoder_concat_input) # Define the model model = Model([encoder_inputs, decoder_inputs], decoder_outputs) model.summary() model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy') es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=2) history = model.fit([x_train, y_train[:, :-1]], y_train.reshape(y_train.shape[0], y_train.shape[1], 1)[:, 1:], epochs=50, callbacks=[es],
def _build_model(self, features, classes): input_shape = (self.IMAGE_WIDTH, self.IMAGE_HEIGHT, self.COLOR_CHANNELS) model = Sequential() # 1st convolution block model.add( Conv2D(features, kernel_size=(3, 3), activation='relu', input_shape=input_shape, data_format='channels_last', kernel_regularizer=l2(0.01))) model.add( Conv2D(features, kernel_size=(3, 3), activation='relu', padding='same')) model.add(BatchNormalization()) model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) model.add(Dropout(0.5)) # 2nd convolution block model.add( Conv2D(2 * features, kernel_size=(3, 3), activation='relu', padding='same')) model.add(BatchNormalization()) model.add( Conv2D(2 * features, kernel_size=(3, 3), activation='relu', padding='same')) model.add(BatchNormalization()) model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) model.add(Dropout(0.5)) # 3rd convolution block model.add( Conv2D(4 * features, kernel_size=(3, 3), activation='relu', padding='same')) model.add(BatchNormalization()) model.add( Conv2D(4 * features, kernel_size=(3, 3), activation='relu', padding='same')) model.add(BatchNormalization()) model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) model.add(Dropout(0.5)) # 4th convolution block model.add( Conv2D(8 * features, kernel_size=(3, 3), activation='relu', padding='same')) model.add(BatchNormalization()) model.add( Conv2D(8 * features, kernel_size=(3, 3), activation='relu', padding='same')) model.add(BatchNormalization()) model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) model.add(Dropout(0.5)) model.add(Flatten()) # classification block model.add(Dense(8 * features, activation='relu')) model.add(Dropout(0.4)) model.add(Dense(4 * features, activation='relu')) model.add(Dropout(0.4)) model.add(Dense(2 * features, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(classes, activation='softmax')) return model
model.add(MaxPooling2D(pool_size=2, strides=2)) model.add( Conv2D(kernel_size=5, strides=1, filters=80, padding="valid", activation="relu", name="conv_layer_3")) model.add(MaxPooling2D(pool_size=2, strides=2)) model.add(Flatten()) model.add(Dense(128, activation="relu")) model.add(Dense(num_classes, activation="softmax")) print(model.summary()) from tensorflow.python.keras.optimizers import Adam optimizer = Adam(lr=1e-3) model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=["accuracy"]) model.fit(x=train_data, y=train_labels, epochs=50, batch_size=256)
n_samples = 1000 n_numbers = 2 largest = 10 alphabet = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', ' '] n_chars = len(alphabet) n_in_seq_length = n_numbers * ceil(log10(largest + 1)) + n_numbers - 1 n_out_seq_length = ceil(log10(n_numbers * (largest + 1))) # define LSTM configuration n_batch = 10 n_epoch = 30 # create LSTM model = Sequential() model.add(LSTM(100, input_shape=(n_in_seq_length, n_chars))) model.add(RepeatVector(n_out_seq_length)) model.add(LSTM(50, return_sequences=True)) model.add(TimeDistributed(Dense(n_chars, activation='softmax'))) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) # train LSTM for i in range(n_epoch): X, y = generate_data(n_samples, n_numbers, largest, alphabet) print(i) model.fit(X, y, epochs=1, batch_size=n_batch) # evaluate on some new patterns X, y = generate_data(n_samples, n_numbers, largest, alphabet) result = model.predict(X, batch_size=n_batch, verbose=0) # calculate error expected = [invert(x, alphabet) for x in y]
activation='relu', input_shape=(128, 128, 1))) model.add(MaxPooling2D(pool_size=(3, 3))) # layer 2 model.add(Conv2D(256, (7, 7), activation='relu')) model.add(MaxPooling2D(pool_size=(3, 3))) model.add(Dropout(0.25)) # flattening layer model.add(Flatten()) # Dense layer model.add(Dense(512, activation='relu')) model.add(Dropout(0.5)) # Dense output layer model.add(Dense(num_classes, activation='softmax')) # compiling the model with apprpriate metrics, optimizer and loss function model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # training the model model.fit(train_x, train_y, batch_size=5, epochs=50, validation_split=0.2) # saving the trained model
def ffnthree(xtrain, ytrain, xtest, ytest, input_shape, num_classes, batch_size, epochs, callbacks, ismodelsaved=False, tl=False): if ismodelsaved == False: # model definition ffn3 = Sequential() ffn3.add( Dense(100, input_dim=input_shape, kernel_initializer="lecun_uniform", activation="relu")) ffn3.add(BatchNormalization()) ffn3.add(Dense(50, activation="relu", kernel_initializer="uniform")) ffn3.add(Dropout(0.5)) ffn3.add(Dense(10, activation="relu", kernel_initializer="uniform")) ffn3.add(Dense(num_classes, activation='softmax')) # ffn3.compile(loss=binary_crossentropy, optimizer=tf.keras.optimizers.RMSprop(0.001, rho=0.9), metrics=['accuracy']) # historyffn3 = ffn3.fit(xtrain, ytrain, batch_size=batch_size, epochs=epochs, verbose=0, validation_data=(xtest, ytest), callbacks=callbacks) score = ffn3.evaluate(xtest, ytest, verbose=0) p('Test loss:', score[0]) p('Test accuracy:', score[1]) # # display learning curves if True: plt.figure() plt.plot(historyffn3.history['loss'], label='train loss') plt.plot(historyffn3.history['val_loss'], label='test loss') plt.title('Learning Curves') plt.xlabel('epochs') plt.ylabel('loss') plt.legend() plt.show() else: if input_shape == 92: ffn3 = tf.keras.models.load_model(flpath + 'saved_model_4x23/ffn3_4x23') else: if tl: ffn3 = tf.keras.models.load_model( flpath + 'saved_model_guideseq_8x23/ffn3_8x23') else: ffn3 = tf.keras.models.load_model( flpath + 'saved_model_crispr_8x23/ffn3crispr_8x23') p("FFN3: Done") return ffn3
model = Sequential() # 模型第一层为embedding model.add( Embedding(num_words, embedding_dim, weights=[embedding_matrix], input_length=max_tokens, trainable=False)) # model.add(Bidirectional(CuDNNLSTM(units=32, return_sequences=True))) model.add(Bidirectional(LSTM(units=32, return_sequences=True))) # model.add(CuDNNLSTM(units=16, return_sequences=False)) model.add(LSTM(units=16, return_sequences=False)) model.add(Dense(5, activation='softmax')) # 使用adam以0.001的learning rate进行优化 optimizer = Adam(lr=1e-3) model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) model.summary() # 开始训练 model.fit(X_train, y_train, validation_split=0.1, epochs=50, batch_size=128) # callbacks:list,其中的元素是keras.callbacks.Callback的对象。这个list中的回调函数将会在训练过程中的适当时机被调用,参考回调函数 # **结论** # 首先对测试样本进行预测,得到了还算满意的准确度。 # 之后定义一个预测函数,来预测输入的文本的极性,可见模型对于否定句和一些简单的逻辑结构都可以进行准确的判断。
model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.2)) model.add(BatchNormalization()) model.add(Conv2D(128, (3, 3), padding='same')) model.add(Activation('relu')) model.add(Dropout(0.2)) model.add(BatchNormalization()) # Flatting model.add(Flatten()) model.add(Dropout(0.2)) # Classification model.add(Dense(256, kernel_constraint=maxnorm(3))) model.add(Activation('relu')) model.add(Dropout(0.2)) model.add(BatchNormalization()) model.add(Dense(128, kernel_constraint=maxnorm(3))) model.add(Activation('relu')) model.add(Dropout(0.2)) model.add(BatchNormalization()) model.add(Dense(class_num)) model.add(Activation('softmax')) # Set the epochs and optimizer epochs = 25 optimizer = 'adam'
we set the weights to be 'ImageNet' to specify that we use the pre-traind model on ImageNet pooling equals average says that if we had extra channels in our tensor at the end of this step we want to collapse them to 1d tensor by taking an average across channels now we have a pre-trained model that creates the layer before the last layer that we saw in the slides """ my_new_model.add( MobileNet(weights='imagenet', include_top=False, pooling='avg')) """ we add a dense layer to make predictions, we specify the number of nodes in this layer which in this case is the number of classes, then we want to apply the softmax function to turn it into probabilities """ my_new_model.add(Dense( num_classes, activation='softmax', )) """ we tell tensor flow not to train the first layer which is the pre-trained model because that's the model that was already pre-trained with the ImageNet data """ my_new_model.layers[0].trainable = False """ the compile command tells tensorflow how to update the relationships in the dense connections when we're doing the training with our data, we have a measure of loss or inaccuracy we want to minimize we specify as categorical cross entropy (log loss function) we use The Adam optimization algorithm which is an extension to stochastic gradient descent to minimize the categorical cross entropy, we ask it to report the accuracy metric that is what fraction of predictions were correct this is easier to interpret than categorical cross entropy scores, so it would prints out how the model is doing """
model.add( Conv2D(filters=n_filter, kernel_size=filter_size, padding=padding, input_shape=x_train.shape[1:], activation=cnn_activation)) # maxpooling model.add(MaxPooling2D(pool_size=pool_size)) # connect to DNN model.add(Flatten()) model.add(Dropout(flatten_dropout)) for n_nodes in dnn_nodes: model.add(Dense(n_nodes, activation='relu')) model.add(Dropout(dnn_dropout)) # output model.add(Dense(10, activation='softmax')) print(model.summary()) #plot_model(model,to_file='model.png') # train model ===================================== model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) train_history = model.fit(x=x_train, y=y_train,
def DIEN(feature_dim_dict, seq_feature_list, embedding_size=8, hist_len_max=16, gru_type="GRU", use_negsampling=False, alpha=1.0, use_bn=False, hidden_size=(200, 80), activation='sigmoid', att_hidden_size=(64, 16), att_activation=Dice, att_weight_normalization=True, l2_reg_deep=0, l2_reg_embedding=1e-5, final_activation='sigmoid', keep_prob=1, init_std=0.0001, seed=1024, ): """Instantiates the Deep Interest Evolution Network architecture. :param feature_dim_dict: dict,to indicate sparse field (**now only support sparse feature**)like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':[]} :param seq_feature_list: list,to indicate sequence sparse field (**now only support sparse feature**),must be a subset of ``feature_dim_dict["sparse"]`` :param embedding_size: positive integer,sparse feature embedding_size. :param hist_len_max: positive int, to indicate the max length of seq input :param gru_type: str,can be GRU AIGRU AUGRU AGRU :param use_negsampling: bool, whether or not use negtive sampling :param alpha: float ,weight of auxiliary_loss :param use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param hidden_size: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param activation: Activation function to use in deep net :param att_hidden_size: list,list of positive integer , the layer number and units in each layer of attention net :param att_activation: Activation function to use in attention net :param att_weight_normalization: bool.Whether normalize the attention score of local activation unit. :param l2_reg_deep: float. L2 regularizer strength applied to deep net :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'`` :param keep_prob: float in (0,1]. keep_prob used in deep net :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :return: A Keras model instance. """ check_feature_config_dict(feature_dim_dict) sparse_input, dense_input, user_behavior_input, user_behavior_length = get_input( feature_dim_dict, seq_feature_list, hist_len_max) sparse_embedding_dict = {feat.name: Embedding(feat.dimension, embedding_size, embeddings_initializer=RandomNormal( mean=0.0, stddev=init_std, seed=seed), embeddings_regularizer=l2( l2_reg_embedding), name='sparse_emb_' + str(i) + '-' + feat.name) for i, feat in enumerate(feature_dim_dict["sparse"])} query_emb_list = [sparse_embedding_dict[feat]( sparse_input[feat]) for feat in seq_feature_list] keys_emb_list = [sparse_embedding_dict[feat]( user_behavior_input[feat]) for feat in seq_feature_list] deep_input_emb_list = [sparse_embedding_dict[feat.name]( sparse_input[feat.name]) for feat in feature_dim_dict["sparse"]] query_emb = Concatenate()(query_emb_list) if len( query_emb_list) > 1 else query_emb_list[0] keys_emb = Concatenate()(keys_emb_list) if len( keys_emb_list) > 1 else keys_emb_list[0] deep_input_emb = Concatenate()(deep_input_emb_list) if len( deep_input_emb_list) > 1 else deep_input_emb_list[0] if use_negsampling: neg_user_behavior_input = {feat: Input(shape=(hist_len_max,), name='neg_seq_' + str(i) + '-' + feat) for i, feat in enumerate(seq_feature_list)} neg_uiseq_embed_list = [sparse_embedding_dict[feat]( neg_user_behavior_input[feat]) for feat in seq_feature_list] neg_concat_behavior = Concatenate()(neg_uiseq_embed_list) if len(neg_uiseq_embed_list) > 1 else \ neg_uiseq_embed_list[0] else: neg_concat_behavior = None hist, aux_loss_1 = interest_evolution(keys_emb, query_emb, user_behavior_length, gru_type=gru_type, use_neg=use_negsampling, neg_concat_behavior=neg_concat_behavior, embedding_size=embedding_size, att_hidden_size=att_hidden_size, att_activation=att_activation, att_weight_normalization=att_weight_normalization,) deep_input_emb = Concatenate()([deep_input_emb, hist]) deep_input_emb = tf.keras.layers.Flatten()(deep_input_emb) if len(dense_input) > 0: deep_input_emb = Concatenate()( [deep_input_emb]+list(dense_input.values())) output = MLP(hidden_size, activation, l2_reg_deep, keep_prob, use_bn, seed)(deep_input_emb) final_logit = Dense(1, use_bias=False)(output) output = PredictionLayer(final_activation)(final_logit) model_input_list = get_inputs_list( [sparse_input, dense_input, user_behavior_input]) if use_negsampling: model_input_list += list(neg_user_behavior_input.values()) model_input_list += [user_behavior_length] model = Model(inputs=model_input_list, outputs=output) if use_negsampling: model.add_loss(alpha * aux_loss_1) tf.keras.backend.get_session().run(tf.global_variables_initializer()) return model
def define_nmt(hidden_size, batch_size, en_timesteps, en_vsize, sp_timesteps, sp_vsize): """ Defining a NMT model """ # Define an input sequence and process it. if batch_size: encoder_inputs = Input(batch_shape=(batch_size, en_timesteps, en_vsize), name='encoder_inputs') decoder_inputs = Input(batch_shape=(batch_size, sp_timesteps - 1, sp_vsize), name='decoder_inputs') else: encoder_inputs = Input(shape=(en_timesteps, en_vsize), name='encoder_inputs') decoder_inputs = Input(shape=(sp_timesteps - 1, sp_vsize), name='decoder_inputs') # Encoder GRU encoder_gru = GRU(hidden_size, return_sequences=True, return_state=True, name='encoder_gru') encoder_out, encoder_state = encoder_gru(encoder_inputs) # Set up the decoder GRU, using `encoder_states` as initial state. decoder_gru = GRU(hidden_size, return_sequences=True, return_state=True, name='decoder_gru') decoder_out, decoder_state = decoder_gru(decoder_inputs, initial_state=encoder_state) # Attention layer attn_layer = AttentionLayer(name='attention_layer') attn_out, attn_states = attn_layer([encoder_out, decoder_out]) # Concat attention input and decoder GRU output decoder_concat_input = Concatenate( axis=-1, name='concat_layer')([decoder_out, attn_out]) # Dense layer dense = Dense(sp_vsize, activation='softmax', name='softmax_layer') dense_time = TimeDistributed(dense, name='time_distributed_layer') decoder_pred = dense_time(decoder_concat_input) # Full model full_model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_pred) full_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) full_model.summary() """ Inference model """ batch_size = 1 """ Encoder (Inference) model """ encoder_inf_inputs = Input(batch_shape=(batch_size, en_timesteps, en_vsize), name='encoder_inf_inputs') encoder_inf_out, encoder_inf_state = encoder_gru(encoder_inf_inputs) encoder_model = Model(inputs=encoder_inf_inputs, outputs=[encoder_inf_out, encoder_inf_state]) """ Decoder (Inference) model """ decoder_inf_inputs = Input(batch_shape=(batch_size, 1, sp_vsize), name='decoder_word_inputs') encoder_inf_states = Input(batch_shape=(batch_size, en_timesteps, hidden_size), name='encoder_inf_states') decoder_init_state = Input(batch_shape=(batch_size, hidden_size), name='decoder_init') decoder_inf_out, decoder_inf_state = decoder_gru( decoder_inf_inputs, initial_state=decoder_init_state) attn_inf_out, attn_inf_states = attn_layer( [encoder_inf_states, decoder_inf_out]) decoder_inf_concat = Concatenate( axis=-1, name='concat')([decoder_inf_out, attn_inf_out]) decoder_inf_pred = TimeDistributed(dense)(decoder_inf_concat) decoder_model = Model( inputs=[encoder_inf_states, decoder_init_state, decoder_inf_inputs], outputs=[decoder_inf_pred, attn_inf_states, decoder_inf_state]) return full_model, encoder_model, decoder_model
def assemble_layers(self): from tensorflow.python.keras.layers import Activation, Dense import tensorflow as tf return Activation('softmax', name='softmax')(Dense(2 + 1)( tf.keras.layers.Flatten()(self.inputs)))
print(y_train) # 원 핫 인코딩을 적용 하기 이전 # np_utils : 원핫 인코딩을 수행해 준다. from keras.utils import np_utils y_train = np_utils.to_categorical(y_train, num_classes=NB_CLASSES, dtype='float32') print(y_train) # 원 핫 인코딩 적용한 이후의 값 model = Sequential() model.add( Dense(units=NB_CLASSES, input_shape=(x_column, ), activation='softmax')) # 모델의 간략한 정보를 출력해 준다. model.summary() # 'sgd' : 확률적 경사 하강법 # 산 정상에서 지면으로 가고자 할 때, 경사가 가장 급한 곳으로 이동하는 기법 model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) history = model.fit(x_train, y_train, epochs=1000, verbose=0) print(history) print('-' * 30) for idx in range(len(x_test)):
print(" Get Labels...") num_classes = len(train_generator.class_indices) train_classes = train_generator.classes train_labels = utils.to_categorical(train_classes, num_classes=num_classes) num_classes = len(validate_generator.class_indices) validate_classes = validate_generator.classes validate_labels = utils.to_categorical(validate_classes, num_classes=num_classes) print("Create top layer model...") # Builds linear stack of layers for model model = tf.keras.Sequential() # FCL 1 - Flatten to 1D -> Hidden FCL -> Relu -> Dropout prob 0.5 model.add(Flatten(input_shape=bottleneck_train_features.shape[1:])) model.add(Dense(FCL_SIZE)) model.add(Activation('relu')) model.add(Dropout(DROPOUT_2)) # FCL 2 - Flatten to 1D -> Final Fully Connected Layer -> softmax model.add(Dense(CLASSES)) model.add(Activation('softmax')) # produces error as a probability # Configure optimizer for gradient descent # Compile training process for Multi-class classification optimizer = optimizers.RMSprop(lr=LEARNING_RATE, decay=DECAY_RATE) model.compile( optimizer=optimizer, # Adam optimizer or rmsprop loss= 'categorical_crossentropy', # use cros-entropy loss function to minimise loss metrics=['accuracy']) # report on accuracy
def default_loc(num_outputs, num_locations, input_shape): ''' Notes: this model depends on concatenate which failed on keras < 2.0.8 ''' drop = 0.5 img_in = Input(shape=input_shape, name='img_in') x = img_in #x = Cropping2D(cropping=((10,0), (0,0)))(x) #trim 10 pixels off top #x = Lambda(lambda x: x/127.5 - 1.)(x) # normalize and re-center x = BatchNormalization()(x) x = Convolution2D(24, (5, 5), strides=(2, 2), activation='relu', name="conv2d_1")(x) x = Dropout(drop)(x) x = Convolution2D(32, (5, 5), strides=(2, 2), activation='relu', name="conv2d_2")(x) x = Dropout(drop)(x) x = Convolution2D(64, (5, 5), strides=(2, 2), activation='relu', name="conv2d_3")(x) x = Dropout(drop)(x) x = Convolution2D(64, (3, 3), strides=(2, 2), activation='relu', name="conv2d_4")(x) x = Dropout(drop)(x) x = Convolution2D(64, (3, 3), strides=(1, 1), activation='relu', name="conv2d_5")(x) x = Dropout(drop)(x) x = Flatten(name='flattened')(x) x = Dense(100, activation='relu')(x) x = Dropout(drop)(x) z = Dense(50, activation='relu')(x) z = Dropout(.1)(z) #categorical output of the angle angle_out = Dense(15, activation='softmax', name='angle')(z) #categorical output of throttle throttle_out = Dense(20, activation='softmax', name='throttle')(z) #categorical output of location loc_out = Dense(num_locations, activation='softmax', name='loc')(z) #categorical output of lane lane_out = Dense(2, activation='softmax', name='lane')(z) #model = Model(inputs=[img_in], outputs=[angle_out, throttle_out, loc_out, lane_out]) model = Model(inputs=[img_in], outputs=[angle_out, throttle_out, loc_out]) return model
def cifar10_pyramid_ensemble(input_shape=None, input_tensor=None, n_classes=None, weights_path: Union[None, str] = None) -> Model: """ Defines a cifar10 network. :param n_classes: used in order to be compatible with the main script. :param input_shape: the input shape of the network. Can be omitted if input_tensor is used. :param input_tensor: the input tensor of the network. Can be omitted if input_shape is used. :param weights_path: a path to a trained custom network's weights. :return: Keras functional API Model. """ output_list = [] inputs = create_inputs(input_shape, input_tensor) # Submodel Strong. # Block1. x1 = Conv2D(32, (3, 3), padding='same', activation='elu', name='submodel_strong_block1_conv1')(inputs) x1 = Conv2D(32, (3, 3), padding='same', activation='elu', name='submodel_strong_block1_conv2')(x1) x1 = MaxPooling2D(pool_size=(2, 2), name='submodel_strong_block1_pool')(x1) # Block2 x1 = Conv2D(64, (3, 3), padding='same', activation='elu', name='submodel_strong_block2_conv1')(x1) x1 = Conv2D(64, (3, 3), padding='same', activation='elu', name='submodel_strong_block2_conv2')(x1) x1 = MaxPooling2D(pool_size=(2, 2), name='submodel_strong_block2_pool')(x1) # Block3 x1 = BatchNormalization(name='submodel_strong_block3_batch-norm')(x1) x1 = Conv2D(128, (3, 3), padding='same', activation='elu', name='submodel_strong_block3_conv')(x1) x1 = Dropout(0.5, name='submodel_strong_block3_dropout', seed=0)(x1) # Add Submodel Strong top layers. x1 = Flatten(name='submodel_strong_flatten')(x1) outputs_submodel_strong = Dense(10, name='submodel_strong_output')(x1) # Submodel Weak 1. # Block1. x2 = Conv2D(64, (3, 3), padding='same', activation='elu', name='submodel_weak_1_block1_conv1')(inputs) x2 = Conv2D(64, (3, 3), padding='same', activation='elu', name='submodel_weak_1_block1_conv2')(x2) x2 = MaxPooling2D(pool_size=(2, 2), name='submodel_weak_1_block1_pool')(x2) # Add Submodel Weak 1 top layers. x2 = Flatten(name='submodel_weak_1_flatten')(x2) outputs2 = Dense(5, name='submodel_weak_1_output')(x2) # Average the predictions for the first five classes. averaged_first_half_classes = Average(name='averaged_first_half_classes')( [Crop(1, 0, 5)(outputs_submodel_strong), outputs2]) output_list.append(averaged_first_half_classes) # Submodel Weak 2. # Block1. x3 = Conv2D(64, (3, 3), padding='same', activation='elu', name='submodel_weak_2_block1_conv1')(inputs) x3 = Conv2D(64, (3, 3), padding='same', activation='elu', name='submodel_weak_2_block1_conv2')(x3) x3 = MaxPooling2D(pool_size=(2, 2), name='submodel_weak_2_block1_pool')(x3) # Add Submodel Weak 2 top layers. x3 = Flatten(name='submodel_weak_2_flatten')(x3) outputs3 = Dense(5, name='submodel_weak_2_output')(x3) # Average the predictions for the last five classes. averaged_last_half_classes = Average(name='averaged_last_half_classes')( [Crop(1, 5, 10)(outputs_submodel_strong), outputs3]) output_list.append(averaged_last_half_classes) # Concatenate all class predictions together. outputs = Concatenate(name='output')(output_list) outputs = Softmax(name='output_softmax')(outputs) # Create model. model = Model(inputs, outputs, name='cifar10_pyramid_ensemble') # Load weights, if they exist. load_weights(weights_path, model) return model
def default_latent(num_outputs, input_shape): drop = 0.2 img_in = Input(shape=input_shape, name='img_in') x = img_in x = Lambda(lambda x: x / 255.)(x) # normalize x = Convolution2D(24, (5, 5), strides=(2, 2), activation='relu', name="conv2d_1")(x) x = Dropout(drop)(x) x = Convolution2D(32, (5, 5), strides=(2, 2), activation='relu', name="conv2d_2")(x) x = Dropout(drop)(x) x = Convolution2D(32, (5, 5), strides=(2, 2), activation='relu', name="conv2d_3")(x) x = Dropout(drop)(x) x = Convolution2D(32, (3, 3), strides=(1, 1), activation='relu', name="conv2d_4")(x) x = Dropout(drop)(x) x = Convolution2D(32, (3, 3), strides=(1, 1), activation='relu', name="conv2d_5")(x) x = Dropout(drop)(x) x = Convolution2D(64, (3, 3), strides=(2, 2), activation='relu', name="conv2d_6")(x) x = Dropout(drop)(x) x = Convolution2D(64, (3, 3), strides=(2, 2), activation='relu', name="conv2d_7")(x) x = Dropout(drop)(x) x = Convolution2D(10, (1, 1), strides=(2, 2), activation='relu', name="latent")(x) y = Conv2DTranspose(filters=64, kernel_size=(3, 3), strides=2, name="deconv2d_1")(x) y = Conv2DTranspose(filters=64, kernel_size=(3, 3), strides=2, name="deconv2d_2")(y) y = Conv2DTranspose(filters=32, kernel_size=(3, 3), strides=2, name="deconv2d_3")(y) y = Conv2DTranspose(filters=32, kernel_size=(3, 3), strides=2, name="deconv2d_4")(y) y = Conv2DTranspose(filters=32, kernel_size=(3, 3), strides=2, name="deconv2d_5")(y) y = Conv2DTranspose(filters=1, kernel_size=(3, 3), strides=2, name="img_out")(y) x = Flatten(name='flattened')(x) x = Dense(256, activation='relu')(x) x = Dropout(drop)(x) x = Dense(100, activation='relu')(x) x = Dropout(drop)(x) x = Dense(50, activation='relu')(x) x = Dropout(drop)(x) outputs = [y] for i in range(num_outputs): outputs.append( Dense(1, activation='linear', name='n_outputs' + str(i))(x)) model = Model(inputs=[img_in], outputs=outputs) return model
conv_2 = Conv1D(64, 3, name='conv_2', padding='same', activation='relu')(dropout_1) # pooling layer pool_layer = AveragePooling1D()(conv_2) #print(pool_layer.shape) flattened_layer = Flatten()(pool_layer) # dropout layer dropout_2 = Dropout(dropout_rate, name='dropout_2')(flattened_layer) dense_1 = Dense(64, activation='relu', name='fc_1')(dropout_2) # and a softmax layer equal to num_classes predictions = Dense(num_classes, activation='softmax', name='pred')(dense_1) optimizer = Adam(lr=learning_rate) # finally create Model with correct input and output layers model = Model(inputs=temporal_input_layer, outputs=predictions) model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) print(model.summary())
df = pd.get_dummies(df, columns=['Class'], prefix=["Class"]) x = df[df.columns[0:4]].values y = df[df.columns[4:7]].values return (x, y) x, y = preprocess() train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.2) model = Sequential() model.add(Dense(10, activation='relu', input_dim=4)) model.add(Dense(10, activation='relu')) model.add(Dense(3, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) model.fit(train_x, train_y, epochs=500, batch_size=1, verbose=2, validation_split=0.1) score = model.evaluate(test_x, test_y)
def DeepFM(linear_feature_columns, dnn_feature_columns, embedding_size=8, use_fm=True, only_dnn=False, dnn_hidden_units=(128, 128), l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, init_std=0.0001, seed=1024, dnn_dropout=0, dnn_activation='relu', dnn_use_bn=False, task='binary'): """Instantiates the DeepFM Network architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param embedding_size: positive integer,sparse feature embedding_size :param use_fm: bool,use FM part or not :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to linear part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ ## 为每个特征创建Input[1,]; feature == > {'feature1': Input[1,], ...} features = build_input_features(linear_feature_columns + dnn_feature_columns) ## [Input1, Input2, ... ] inputs_list = list(features.values()) sparse_embedding_list, dense_value_list = input_from_feature_columns( features, dnn_feature_columns, embedding_size, l2_reg_embedding, init_std, seed) ## [feature_1对应的embedding层,下连接对应feature1的Input[1,]层,...], [feature_1对应的Input[1,]层,...] linear_logit = get_linear_logit(features, linear_feature_columns, l2_reg=l2_reg_linear, init_std=init_std, seed=seed, prefix='linear') ## 线性变换层,没有激活函数 fm_input = concat_fun(sparse_embedding_list, axis=1) ## 稀疏embedding层concate在一起 fm_logit = FM()(fm_input) ## FM的二次项部分输出,不包含一次项和bias dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list) # dnn_out = Dense(128, dnn_activation, l2_reg_dnn, dnn_dropout, # dnn_use_bn, seed)(dnn_input) # dnn_out = DNN((dnn_hidden_units[0],), dnn_activation, l2_reg_dnn, dnn_dropout, # dnn_use_bn, seed)(dnn_input) mmoe_out = MMoE(units=16, num_experts=8, num_tasks=8)(dnn_input) mmoe_cat_layer = concat_fun(mmoe_out) mmoe_high_layers = MMoEdiffGate(units=16, num_experts=8, num_tasks=2)([mmoe_cat_layer, dnn_input]) finish_in, like_in = mmoe_high_layers finish_out_1 = Dense(128, dnn_activation, kernel_regularizer=l2(l2_reg_dnn))(finish_in) finish_out = Dense(128, dnn_activation, kernel_regularizer=l2(l2_reg_dnn))(finish_out_1) finish_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(finish_out) like_out_1 = Dense(128, dnn_activation, kernel_regularizer=l2(l2_reg_dnn))(like_in) like_out = Dense(128, dnn_activation, kernel_regularizer=l2(l2_reg_dnn))(like_out_1) like_logit = tf.keras.layers.Dense(1, use_bias=False, activation=None)(like_out) # dnn_logit = tf.keras.layers.Dense( # 1, use_bias=False, activation=None)(dnn_out) # if len(dnn_hidden_units) > 0 and only_dnn == True: # final_logit = dnn_logit # elif len(dnn_hidden_units) == 0 and use_fm == False: # only linear # final_logit = linear_logit # elif len(dnn_hidden_units) == 0 and use_fm == True: # linear + FM # final_logit = tf.keras.layers.add([linear_logit, fm_logit]) # elif len(dnn_hidden_units) > 0 and use_fm == False: # linear + Deep # final_logit = tf.keras.layers.add([linear_logit, dnn_logit]) # elif len(dnn_hidden_units) > 0 and use_fm == True: # linear + FM + Deep # final_logit = tf.keras.layers.add([linear_logit, fm_logit, dnn_logit]) # else: # raise NotImplementedError finish_logit = tf.keras.layers.add([linear_logit, fm_logit, finish_logit]) like_logit = tf.keras.layers.add([linear_logit, fm_logit, like_logit]) output_finish = PredictionLayer('binary', name='finish_output')(finish_logit) output_like = PredictionLayer('binary', name='like_output')(like_logit) model = tf.keras.models.Model(inputs=inputs_list, outputs=[output_finish, output_like]) return model