def seq_2_seq_att_LSTM(X_embedding, MAX_LEN, num_words, EMBEDDING_DIM, LSTM_units, LSTM_dropout): # Encoder # Encoder input shape is (batch size, max length) encoder_inputs = Input(shape=(MAX_LEN,)) encoder_embedding = Embedding(input_dim=num_words, output_dim=EMBEDDING_DIM, input_length = MAX_LEN, embeddings_initializer=Constant(X_embedding), trainable=False)(encoder_inputs) # LSTM encoder_lstm = LSTM(units=LSTM_units, return_state=True, return_sequences=True, recurrent_dropout=LSTM_dropout, dropout=LSTM_dropout) encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedding) encoder_states = [state_h, state_c] # Decoder decoder_inputs = Input(shape=(None,)) decoder_embedding_layer = Embedding(input_dim=num_words, output_dim=EMBEDDING_DIM, trainable=True) decoder_embedding = decoder_embedding_layer(decoder_inputs) decoder_lstm = LSTM(units=LSTM_units, return_state=True, return_sequences=True, recurrent_dropout=LSTM_dropout, dropout=LSTM_dropout) decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states) # Attention attention_weight = dot([decoder_outputs, encoder_outputs], axes=[2, 2], normalize=True) # cosine similarity attention = Activation('softmax')(attention_weight) context = dot([attention, encoder_outputs], axes=[2,1]) decoder_combined_context = concatenate([context, decoder_outputs]) att_output = TimeDistributed(Dense(64, activation="tanh"))(decoder_combined_context) output = TimeDistributed(Dense(num_words, activation="softmax"))(att_output) model = Model(inputs=[encoder_inputs,decoder_inputs], outputs=output) return model
def MLR(region_feature_columns, base_feature_columns=None, region_num=4, l2_reg_linear=1e-5, seed=1024, task='binary', bias_feature_columns=None): """Instantiates the Mixed Logistic Regression/Piece-wise Linear Model. :param region_feature_columns: An iterable containing all the features used by region part of the model. :param base_feature_columns: An iterable containing all the features used by base part of the model. :param region_num: integer > 1,indicate the piece number :param l2_reg_linear: float. L2 regularizer strength applied to weight :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :param bias_feature_columns: An iterable containing all the features used by bias part of the model. :return: A Keras model instance. """ if region_num <= 1: raise ValueError("region_num must > 1") if base_feature_columns is None or len(base_feature_columns) == 0: base_feature_columns = region_feature_columns if bias_feature_columns is None: bias_feature_columns = [] features = build_input_features(region_feature_columns + base_feature_columns + bias_feature_columns) inputs_list = list(features.values()) region_score = get_region_score(features, region_feature_columns, region_num, l2_reg_linear, seed) learner_score = get_learner_score(features, base_feature_columns, region_num, l2_reg_linear, seed, task=task) final_logit = dot([region_score, learner_score], axes=-1) if bias_feature_columns is not None and len(bias_feature_columns) > 0: bias_score = get_learner_score(features, bias_feature_columns, 1, l2_reg_linear, seed, prefix='bias_', task='binary') final_logit = dot([final_logit, bias_score], axes=-1) model = Model(inputs=inputs_list, outputs=final_logit) return model
def _create_attention_mechanism(decoder_outputs, encoder_outputs, step): attention = dot([decoder_outputs, encoder_outputs], axes=[2, 2]) attention = Activation('softmax')(attention) context = dot([attention, encoder_outputs], axes=[2, 1]) context = BatchNormalization(momentum=step.hyperparams['momentum'])(context) decoder_combined_context = concatenate([context, decoder_outputs]) projection = TimeDistributed(Dense(units=step.hyperparams['output_dim'])) decoder_outputs = projection(decoder_combined_context) return decoder_outputs
def seq_2_seq_biLSTM_att(X_embedding, MAX_LEN, num_words, EMBEDDING_DIM, LSTM_units, LSTM_dropout): # Encoder # [?, 100] encoder_inputs = Input(shape=(MAX_LEN,)) # [?, 100, 300] encoder_embedding = Embedding(input_dim=num_words, output_dim=EMBEDDING_DIM, input_length = MAX_LEN, embeddings_initializer=Constant(X_embedding), trainable=False)(encoder_inputs) # LSTM encoder_lstm = Bidirectional(LSTM(units=LSTM_units, return_state=True, return_sequences=True, recurrent_dropout=LSTM_dropout, dropout=LSTM_dropout)) # [?, 100, 300] encoder_outputs, forward_h, forward_c, backward_h, backward_c = encoder_lstm(encoder_embedding) # [?, 300] state_h = concatenate([forward_h, backward_h]) state_c = concatenate([forward_c, backward_c]) encoder_states = [state_h, state_c] # Decoder # [?, 30] decoder_inputs = Input(shape=(None,)) decoder_embedding_layer = Embedding(input_dim=num_words, output_dim=EMBEDDING_DIM, trainable=True) # [?, 30, 300] decoder_embedding = decoder_embedding_layer(decoder_inputs) decoder_lstm = LSTM(units=2*LSTM_units, return_state=True, return_sequences=True, recurrent_dropout=LSTM_dropout, dropout=LSTM_dropout) # [?, 30, 300] decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states) # [?, 30, 100] attention_weight = dot([decoder_outputs, encoder_outputs], axes=[2, 2]) attention = Activation('softmax')(attention_weight) # [?, 30, 300] context = dot([attention, encoder_outputs], axes=[2,1]) #[?, 100, 300] = dot([?,?,100] , [?, 100, 300]) # [?, 30, 600] decoder_combined_context = concatenate([context, decoder_outputs]) # [?, 30, 64] att_output = TimeDistributed(Dense(128, activation="tanh"))(decoder_combined_context) # [?, 30, 39093] output = TimeDistributed(Dense(num_words, activation="softmax"))(att_output) model = Model(inputs=[encoder_inputs,decoder_inputs], outputs=output) return model
def MLR(region_feature_columns, base_feature_columns=None, region_num=4, l2_reg_linear=1e-5, init_std=0.0001, seed=1024, task='binary', bias_feature_columns=None): """Instantiates the Mixed Logistic Regression/Piece-wise Linear Model. :param region_feature_columns: An iterable containing all the features used by region part of the model. :param base_feature_columns: An iterable containing all the features used by base part of the model. :param region_num: integer > 1,indicate the piece number :param l2_reg_linear: float. L2 regularizer strength applied to weight :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :param bias_feature_columns: An iterable containing all the features used by bias part of the model. :return: A Keras model instance. """ #todo 还没修改 if region_num <= 1: raise ValueError("region_num must > 1") # if not isinstance(region_feature_columns, # dict) or "sparse" not in region_feature_columns or "dense" not in region_feature_columns: # raise ValueError( # "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}") if base_feature_columns is None or len(base_feature_columns) == 0: base_feature_columns = region_feature_columns if bias_feature_columns is None: bias_feature_columns = [] features = build_input_features(region_feature_columns + base_feature_columns+bias_feature_columns) inputs_list = list(features.values()) region_score = get_region_score(features,region_feature_columns,region_num,l2_reg_linear,init_std,seed) learner_score = get_learner_score(features,base_feature_columns,region_num,l2_reg_linear,init_std,seed,task=task) final_logit = dot([region_score,learner_score],axes=-1) if bias_feature_columns is not None and len(bias_feature_columns) > 0: bias_score =get_learner_score(features,bias_feature_columns,1,l2_reg_linear,init_std,seed,prefix='bias_',task='binary') final_logit = dot([final_logit,bias_score],axes=-1) model = Model(inputs=inputs_list, outputs=final_logit) return model
def create_mem_network(): input_story = layers.Input(shape=(story_maxlen, )) input_m_encoded = layers.Embedding(input_dim=vocab_size, output_dim=64)(input_story) input_m_encoded = layers.Dropout(0.3)(input_m_encoded) input_c_encoded = layers.Embedding(input_dim=vocab_size, output_dim=query_maxlen)(input_story) input_c_encoded = layers.Dropout(0.3)(input_c_encoded) input_ques = layers.Input(shape=(query_maxlen, )) ques_encoded = layers.Embedding(input_dim=vocab_size, output_dim=64, input_length=query_maxlen)(input_ques) ques_encoded = layers.Dropout(0.3)(ques_encoded) # (samples, story_maxlen, query_maxlen) match = layers.dot([input_m_encoded, ques_encoded], axes=(2, 2)) match = layers.Activation('softmax')(match) response = layers.add([match, input_c_encoded ]) # (samples, story_maxlen, query_maxlen) response = layers.Permute((2, 1))(response) answer = layers.concatenate([response, ques_encoded]) answer = layers.LSTM(32)(answer) answer = layers.Dropout(0.3)(answer) answer = layers.Dense(vocab_size, activation=None)(answer) return models.Model(inputs=[input_story, input_ques], outputs=answer)
def inference(encoder_inputs, encoder_outputs, encoder_last): encoder_model = Model(inputs=encoder_inputs, outputs=[encoder_outputs, encoder_last]) # inference decoder_init_1 = Input(shape=(latent_size, ), name='Decoder-Init-1') decoder_init_2 = Input(shape=(latent_size, ), name='Decoder-Init-2') decoder_inputs = Input(shape=(None, ), name='Decoder-Input') decoder_embeddings = Embedding(vocabulary_size, 300, weights=[embedding_matrix], trainable=False, mask_zero=True, name='Decoder-Word-Embedding') norm_decoder_embeddings = BatchNormalization( name='Decoder-Batch-Normalization-1') decoder_lstm_1 = LSTM(latent_size, name='Decoder-LSTM-1', return_sequences=True, dropout=0.2, recurrent_dropout=0.2) norm_decoder = BatchNormalization(name='Decoder-Batch-Normalization-2') attention_activation = Activation('softmax', name='Attention') dense_intermediate = TimeDistributed( Dense(64, activation="tanh", name="Intermediate-Output-Dense")) dense_final = TimeDistributed( Dense(vocabulary_size, activation="softmax", name="Final-Output-Dense")) embedding_inf = decoder_embeddings(decoder_inputs) embedding_inf = norm_decoder_embeddings(embedding_inf) lstm_inf = decoder_lstm_1(embedding_inf, initial_state=[decoder_init_2, decoder_init_2]) lstm_inf = norm_decoder(lstm_inf) attention_inf = dot([lstm_inf, decoder_init_1], axes=[2, 2]) attention_inf = attention_activation(attention_inf) context_inf = dot([attention_inf, decoder_init_1], axes=[2, 1]) decoder_combined_context_inf = concatenate([context_inf, lstm_inf]) outputs_inf = dense_intermediate(decoder_combined_context_inf) decoder_last_inf = dense_final(outputs_inf) decoder_model = Model([decoder_inputs] + [decoder_init_2, decoder_init_2], [decoder_last_inf] + [lstm_inf]) return encoder_model, decoder_model
def compute_cov(args): i, Xcopy, ycopy = args # Hypothesis: equal number of samples (Ni) for each class Xg = Xcopy[ycopy == i] # [None, d] Xg_bar = Xg - tf.reduce_mean(Xg, axis=0, keepdims=True) # [None, d] m = tf.cast(tf.shape(Xg_bar)[0], tf.float32) # [] Xg_bar_dummy_batch = tf.expand_dims(Xg_bar, axis=0) # [1, None, d] return (1. / (m - 1)) * tf.squeeze( dot([Xg_bar_dummy_batch, Xg_bar_dummy_batch], axes=1), axis=0) # [d, d]
def pnn_model(sparse_columns, dense_columns, train, test, lmbda=0.05): ####### sparse features ########## sparse_input = [] lr_embedding = [] fm_embedding = [] # sparse print('----------- sparse features ------------') sparse_input = [] fm_embedding = [] for col in sparse_columns: _input = Input(shape=(1,)) sparse_input.append(_input) ## fm_embedding nums = pd.concat((train[col], test[col])).nunique() + 1 embed = Embedding(nums, 10, input_length=1, embeddings_regularizer=tf.keras.regularizers.l2(0.5))(_input) reshape = Reshape((10,))(embed) fm_embedding.append(reshape) fst_order_sparse_layer = concatenate(fm_embedding) print('sparse input: ', len(sparse_input), sparse_input) print('sparse emb: ', len(fm_embedding), fm_embedding) print('sparse layer: ', fst_order_sparse_layer) ####### dense features ########## print('----------- dense features ------------') dense_input = [] for col in dense_columns: _input = Input(shape=(1,)) dense_input.append(_input) concat_dense_input = concatenate(dense_input) fst_order_dense_layer = Dense(10, activation='relu')(concat_dense_input) print('dense input: ', len(dense_input), dense_input) print('final dense input: ', concat_dense_input) print('dense layer: ', fst_order_dense_layer) ####### emb features ########## print('----------- embedding features ------------') fm_embedding.append(fst_order_dense_layer) print('embedding layer: ', len(fm_embedding), fm_embedding) ####### PNN ########## print('----------- pnn layer ------------') field_cnt = len(fm_embedding) product_list = [] for i in range(field_cnt): for j in range(i + 1, field_cnt): tmp = dot([fm_embedding[i], fm_embedding[j]], axes=1) product_list.append(tmp) inner_product = concatenate(fm_embedding + product_list) print('pnn product layer: ', inner_product) fc_layer = Dropout(0.2)(Activation(activation="relu")(BatchNormalization()(Dense(64)(inner_product)))) print('full conection layer1: ', fc_layer) fc_layer = Dropout(0.2)(Activation(activation="relu")(BatchNormalization()(Dense(32)(fc_layer)))) print('full conection layer2: ', fc_layer) output_layer = Dense(1, activation='sigmoid')(fc_layer) print('output layer: ', output_layer) print('----------- model ------------') model = Model(inputs=sparse_input + dense_input, outputs=output_layer) print('input: ', len(sparse_input + dense_input), sparse_input + dense_input) print('output: ', output_layer) print('model: ', model) return model
def MLR(region_feature_dim_dict,base_feature_dim_dict={"sparse":{},"dense":[]},region_num=4, l2_reg_linear=1e-5, init_std=0.0001,seed=1024,final_activation='sigmoid', bias_feature_dim_dict={"sparse":{},"dense":[]}): """Instantiates the Mixed Logistic Regression/Piece-wise Linear Model. :param region_feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} :param base_feature_dim_dict: dict or None,to indicate sparse field and dense field of base learner.if None, it is same as region_feature_dim_dict :param region_num: integer > 1,indicate the piece number :param l2_reg_linear: float. L2 regularizer strength applied to weight :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'`` :param bias_feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} :return: A Keras model instance. """ if region_num <= 1: raise ValueError("region_num must > 1") if not isinstance(region_feature_dim_dict, dict) or "sparse" not in region_feature_dim_dict or "dense" not in region_feature_dim_dict: raise ValueError( "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}") same_flag = False if base_feature_dim_dict == {"sparse":{},"dense":[]}: base_feature_dim_dict = region_feature_dim_dict same_flag = True region_sparse_input, region_dense_input, base_sparse_input, base_dense_input, bias_sparse_input, bias_dense_input = get_input( region_feature_dim_dict,base_feature_dim_dict,bias_feature_dim_dict,same_flag) region_embeddings, base_embeddings,bias_embedding= get_embedding(region_num,region_feature_dim_dict,base_feature_dim_dict,bias_feature_dim_dict,init_std,seed,l2_reg_linear) if same_flag: base_dense_input_ = region_dense_input base_sparse_input_ = region_sparse_input else: base_dense_input_ = base_dense_input base_sparse_input_ = base_sparse_input region_dense_feature_num = len(region_feature_dim_dict['dense']) region_sparse_feature_num = len(region_feature_dim_dict['sparse']) base_dense_feature_num = len(base_feature_dim_dict['dense']) base_sparse_feature_num = len(base_feature_dim_dict['sparse']) bias_dense_feature_num = len(bias_feature_dim_dict['dense']) bias_sparse_feature_num = len(bias_feature_dim_dict['sparse']) if region_dense_feature_num> 1: region_dense_logits_ = [Dense(1, )(Concatenate()(region_dense_input)) for _ in range(region_num)] elif region_dense_feature_num == 1: region_dense_logits_ = [Dense(1, )(region_dense_input[0]) for _ in range(region_num)] if base_dense_feature_num > 1: base_dense_logits = [Dense(1, )(Concatenate()(base_dense_input_))for _ in range(region_num)] elif base_dense_feature_num==1: base_dense_logits = [Dense(1, )(base_dense_input_[0])for _ in range(region_num)] if region_dense_feature_num > 0 and region_sparse_feature_num==0: region_logits = Concatenate()(region_dense_logits_) elif region_dense_feature_num == 0 and region_sparse_feature_num >0: region_sparse_logits = [ add([region_embeddings[j][i](region_sparse_input[i]) for i in range(region_sparse_feature_num)]) if region_sparse_feature_num > 1 else region_embeddings[j][0](region_sparse_input[0]) for j in range(region_num) ] region_logits = Concatenate()(region_sparse_logits) else: region_sparse_logits = [ add([region_embeddings[j][i](region_sparse_input[i]) for i in range(region_sparse_feature_num)]) for j in range(region_num)] region_logits =Concatenate()([add([region_sparse_logits[i],region_dense_logits_[i]]) for i in range(region_num)]) if base_dense_feature_num > 0 and base_sparse_feature_num == 0: base_logits = base_dense_logits elif base_dense_feature_num == 0 and base_sparse_feature_num > 0: base_sparse_logits = [add( [base_embeddings[j][i](base_sparse_input_[i]) for i in range(base_sparse_feature_num)]) if base_sparse_feature_num > 1 else base_embeddings[j][0](base_sparse_input_[0]) for j in range(region_num)] base_logits = base_sparse_logits else: base_sparse_logits = [add( [base_embeddings[j][i](base_sparse_input_[i]) for i in range(base_sparse_feature_num)]) if base_sparse_feature_num > 1 else base_embeddings[j][0](base_sparse_input_[0]) for j in range(region_num)] base_logits = [add([base_sparse_logits[i], base_dense_logits[i]]) for i in range(region_num)] region_weights = Activation("softmax")(region_logits)#Dense(self.region_num, activation='softmax')(final_logit) learner_score = Concatenate()( [Activation(final_activation, name='learner' + str(i))(base_logits[i]) for i in range(region_num)]) final_logit = dot([region_weights,learner_score], axes=-1) if bias_dense_feature_num + bias_sparse_feature_num > 0: if bias_dense_feature_num > 1: bias_dense_logits = Dense(1,)(Concatenate()(bias_dense_input)) elif bias_dense_feature_num == 1: bias_dense_logits = Dense(1,)(bias_dense_input[0]) else: pass if bias_sparse_feature_num > 1: bias_cate_logits = add([bias_embedding[i](bias_sparse_input[i]) for i, feat in enumerate(bias_feature_dim_dict['sparse'])]) elif bias_sparse_feature_num == 1: bias_cate_logits = bias_embedding[0](bias_sparse_input[0]) else: pass if bias_dense_feature_num >0 and bias_sparse_feature_num > 0: bias_logits = add([bias_dense_logits, bias_cate_logits]) elif bias_dense_feature_num > 0: bias_logits = bias_dense_logits else: bias_logits = bias_cate_logits bias_prob = Activation('sigmoid')(bias_logits) final_logit = dot([final_logit,bias_prob],axes=-1) output = Reshape([1])(final_logit) model = Model(inputs=region_sparse_input +region_dense_input+base_sparse_input+base_dense_input+bias_sparse_input+bias_dense_input, outputs=output) return model
def seq2seq_architecture(latent_size, vocabulary_size, max_len_article, embedding_matrix, batch_size, epochs, train_article, train_summary, train_target): # encoder encoder_inputs = Input(shape=(None, ), name='Encoder-Input') encoder_embeddings = Embedding(vocabulary_size, 300, weights=[embedding_matrix], trainable=False, mask_zero=True, name='Encoder-Word-Embedding') norm_encoder_embeddings = BatchNormalization( name='Encoder-Batch-Normalization') encoder_lstm_1 = LSTM(latent_size, name='Encoder-LSTM-1', return_sequences=True, dropout=0.2, recurrent_dropout=0.2) e = encoder_embeddings(encoder_inputs) e = norm_encoder_embeddings(e) encoder_outputs = encoder_lstm_1(e) encoder_last = encoder_outputs[:, -1, :] # decoder decoder_inputs = Input(shape=(None, ), name='Decoder-Input') decoder_embeddings = Embedding(vocabulary_size, 300, weights=[embedding_matrix], trainable=False, mask_zero=True, name='Decoder-Word-Embedding') norm_decoder_embeddings = BatchNormalization( name='Decoder-Batch-Normalization-1') decoder_lstm_1 = LSTM(latent_size, name='Decoder-LSTM-1', return_sequences=True, dropout=0.2, recurrent_dropout=0.2) norm_decoder = BatchNormalization(name='Decoder-Batch-Normalization-2') attention_activation = Activation('softmax', name='Attention') dense_intermediate = TimeDistributed( Dense(64, activation="tanh", name="Intermediate-Output-Dense")) dense_final = TimeDistributed( Dense(vocabulary_size, activation="softmax", name="Final-Output-Dense")) d = decoder_embeddings(decoder_inputs) d = norm_decoder_embeddings(d) decoder_outputs = decoder_lstm_1( d, initial_state=[encoder_last, encoder_last]) decoder_outputs = norm_decoder(decoder_outputs) attention = dot([decoder_outputs, encoder_outputs], axes=[2, 2]) attention = attention_activation(attention) context = dot([attention, encoder_outputs], axes=[2, 1]) decoder_combined_context = concatenate([context, decoder_outputs]) outputs = dense_intermediate(decoder_combined_context) decoder_last = dense_final(outputs) seq2seq_model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=decoder_last) seq2seq_model.compile(optimizer="rmsprop", loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy']) seq2seq_model.summary() classes = [item for sublist in train_summary.tolist() for item in sublist] class_weights = class_weight.compute_class_weight('balanced', np.unique(classes), classes) e_stopping = EarlyStopping(monitor='val_loss', patience=4, verbose=1, mode='min', restore_best_weights=True) history = seq2seq_model.fit(x=[train_article, train_summary], y=np.expand_dims(train_target, -1), batch_size=batch_size, epochs=epochs, validation_split=0.1, class_weight=class_weights) f = open("data/models/results.txt", "w", encoding="utf-8") f.write("Attention LSTM \n layers: 1 \n latent size: " + str(latent_size) + "\n vocab size: " + str(vocabulary_size) + "\n") f.close() history_dict = history.history plot_loss(history_dict) return seq2seq_model
def linear_discriminative_eigvals(y, X, lambda_val=1e-3, ret_vecs=False): """ Compute the linear discriminative eigenvalues Usage: >>> y = [0, 0, 1, 1] >>> X = [[1, -2], [-3, 2], [1, 1.4], [-3.5, 1]] >>> eigvals = linear_discriminative_eigvals(y, X, 2) >>> eigvals.numpy() [-0.33328852 -0.17815116] Parameters ---------- y: tf.Tensor, np.ndarray Ground truth values, with shape [N, 1] X: tf.Tensor, np.ndarray The predicted values (i.e., features), with shape [N, d]. lambda_val: float Lambda for stablizing the right-side matrix of the generalized eigenvalue problem ret_vecs: bool Return eigenvectors or not. **Notice:** If False, only eigenvalues are returned and this function supports backpropagation (used for training); If True, both eigenvalues and eigenvectors are returned but the backpropagation is undefined (used for validation). Returns ------- eigvals: tf.Tensor Linear discriminative eigenvalues, with shape [cls] References: Dorfer M, Kelz R, Widmer G. Deep linear discriminant analysis[J]. arXiv preprint arXiv:1511.04707, 2015. """ X = tf.convert_to_tensor(X, tf.float32) # [N, d] y = tf.squeeze(tf.cast(tf.convert_to_tensor(y), tf.int32)) # [N] y.set_shape(X.shape[:-1]) # [N] classes = tf.sort(tf.unique(y).y) num_classes = tf.shape(classes)[0] def compute_cov(args): i, Xcopy, ycopy = args # Hypothesis: equal number of samples (Ni) for each class Xg = Xcopy[ycopy == i] # [None, d] Xg_bar = Xg - tf.reduce_mean(Xg, axis=0, keepdims=True) # [None, d] m = tf.cast(tf.shape(Xg_bar)[0], tf.float32) # [] Xg_bar_dummy_batch = tf.expand_dims(Xg_bar, axis=0) # [1, None, d] return (1. / (m - 1)) * tf.squeeze( dot([Xg_bar_dummy_batch, Xg_bar_dummy_batch], axes=1), axis=0) # [d, d] # convariance matrixs for all the classes covs_t = tf.map_fn( compute_cov, (classes, tf.repeat(tf.expand_dims(X, 0), num_classes, axis=0), tf.repeat(tf.expand_dims(y, 0), num_classes, axis=0)), dtype=tf.float32) # [cls, d, d] # Within-class scatter matrix Sw = tf.reduce_mean(covs_t, axis=0) # [d, d] # Total scatter matrix X_bar = X - tf.reduce_mean(X, axis=0, keepdims=True) # [N, d] m = tf.cast(X_bar.shape[0], tf.float32) # [] X_bar_dummy_batch = tf.expand_dims(X_bar, axis=0) # [1, N, d] St = (1. / (m - 1)) * tf.squeeze( dot([X_bar_dummy_batch, X_bar_dummy_batch], axes=1), axis=0) # [d, d] # Between-class scatter matrix Sb = St - Sw # [d, d] # Force Sw_t to be positive-definite (for numerical stability) Sw = Sw + tf.eye(Sw.shape[0]) * lambda_val # [d, d] # Solve the generalized eigenvalue problem: Sb * W = lambda * Sw * W # We use the customed `eigh` function for generalized eigenvalue problem if ret_vecs: return eigh(Sb, Sw) # [cls], [d, cls] else: return eigvalsh(Sb, Sw) # [cls]
def build_model(self): """Helper method for creating the model""" vocab = set() for story, q, answer in self.train_stories + self.test_stories: vocab |= set(story + q + [answer]) vocab = sorted(vocab) # Reserve 0 for masking via pad_sequences vocab_size = len(vocab) + 1 story_maxlen = max( len(x) for x, _, _ in self.train_stories + self.test_stories) query_maxlen = max( len(x) for _, x, _ in self.train_stories + self.test_stories) word_idx = {c: i + 1 for i, c in enumerate(vocab)} self.inputs_train, self.queries_train, self.answers_train = ( vectorize_stories(word_idx, story_maxlen, query_maxlen, self.train_stories)) self.inputs_test, self.queries_test, self.answers_test = ( vectorize_stories(word_idx, story_maxlen, query_maxlen, self.test_stories)) # placeholders input_sequence = Input((story_maxlen, )) question = Input((query_maxlen, )) # encoders # embed the input sequence into a sequence of vectors input_encoder_m = Sequential() input_encoder_m.add(Embedding(input_dim=vocab_size, output_dim=64)) input_encoder_m.add(Dropout(self.config.get("dropout", 0.3))) # output: (samples, story_maxlen, embedding_dim) # embed the input into a sequence of vectors of size query_maxlen input_encoder_c = Sequential() input_encoder_c.add( Embedding(input_dim=vocab_size, output_dim=query_maxlen)) input_encoder_c.add(Dropout(self.config.get("dropout", 0.3))) # output: (samples, story_maxlen, query_maxlen) # embed the question into a sequence of vectors question_encoder = Sequential() question_encoder.add( Embedding(input_dim=vocab_size, output_dim=64, input_length=query_maxlen)) question_encoder.add(Dropout(self.config.get("dropout", 0.3))) # output: (samples, query_maxlen, embedding_dim) # encode input sequence and questions (which are indices) # to sequences of dense vectors input_encoded_m = input_encoder_m(input_sequence) input_encoded_c = input_encoder_c(input_sequence) question_encoded = question_encoder(question) # compute a "match" between the first input vector sequence # and the question vector sequence # shape: `(samples, story_maxlen, query_maxlen)` match = dot([input_encoded_m, question_encoded], axes=(2, 2)) match = Activation("softmax")(match) # add the match matrix with the second input vector sequence response = add([match, input_encoded_c ]) # (samples, story_maxlen, query_maxlen) response = Permute( (2, 1))(response) # (samples, query_maxlen, story_maxlen) # concatenate the match matrix with the question vector sequence answer = concatenate([response, question_encoded]) # the original paper uses a matrix multiplication. # we choose to use a RNN instead. answer = LSTM(32)(answer) # (samples, 32) # one regularization layer -- more would probably be needed. answer = Dropout(self.config.get("dropout", 0.3))(answer) answer = Dense(vocab_size)(answer) # (samples, vocab_size) # we output a probability distribution over the vocabulary answer = Activation("softmax")(answer) # build the final model model = Model([input_sequence, question], answer) return model
def MLR(region_feature_dim_dict, base_feature_dim_dict={"sparse": {}, "dense": []}, region_num=4, l2_reg_linear=1e-5, init_std=0.0001, seed=1024, final_activation='sigmoid', bias_feature_dim_dict={"sparse": {}, "dense": []}): """Instantiates the Mixed Logistic Regression/Piece-wise Linear Model. :param region_feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} :param base_feature_dim_dict: dict or None,to indicate sparse field and dense field of base learner.if None, it is same as region_feature_dim_dict :param region_num: integer > 1,indicate the piece number :param l2_reg_linear: float. L2 regularizer strength applied to weight :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param final_activation: str,output activation,usually ``'sigmoid'`` or ``'linear'`` :param bias_feature_dim_dict: dict,to indicate sparse field and dense field like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_4','field_5']} :return: A Keras model instance. """ if region_num <= 1: raise ValueError("region_num must > 1") if not isinstance(region_feature_dim_dict, dict) or "sparse" not in region_feature_dim_dict or "dense" not in region_feature_dim_dict: raise ValueError( "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}") same_flag = False if base_feature_dim_dict == {"sparse": {}, "dense": []}: base_feature_dim_dict = region_feature_dim_dict same_flag = True region_sparse_input, region_dense_input, base_sparse_input, base_dense_input, bias_sparse_input, bias_dense_input = get_input( region_feature_dim_dict, base_feature_dim_dict, bias_feature_dim_dict, same_flag) region_embeddings, base_embeddings, bias_embedding = get_embedding( region_num, region_feature_dim_dict, base_feature_dim_dict, bias_feature_dim_dict, init_std, seed, l2_reg_linear) if same_flag: base_dense_input_ = region_dense_input base_sparse_input_ = region_sparse_input else: base_dense_input_ = base_dense_input base_sparse_input_ = base_sparse_input region_dense_feature_num = len(region_feature_dim_dict['dense']) region_sparse_feature_num = len(region_feature_dim_dict['sparse']) base_dense_feature_num = len(base_feature_dim_dict['dense']) base_sparse_feature_num = len(base_feature_dim_dict['sparse']) bias_dense_feature_num = len(bias_feature_dim_dict['dense']) bias_sparse_feature_num = len(bias_feature_dim_dict['sparse']) if region_dense_feature_num > 1: region_dense_logits_ = [Dense(1, )(Concatenate()(region_dense_input)) for _ in range(region_num)] elif region_dense_feature_num == 1: region_dense_logits_ = [Dense(1, )(region_dense_input[0]) for _ in range(region_num)] if base_dense_feature_num > 1: base_dense_logits = [Dense(1, )(Concatenate()(base_dense_input_))for _ in range(region_num)] elif base_dense_feature_num == 1: base_dense_logits = [Dense(1, )(base_dense_input_[0])for _ in range(region_num)] if region_dense_feature_num > 0 and region_sparse_feature_num == 0: region_logits = Concatenate()(region_dense_logits_) elif region_dense_feature_num == 0 and region_sparse_feature_num > 0: region_sparse_logits = [ add([region_embeddings[j][i](region_sparse_input[i]) for i in range(region_sparse_feature_num)]) if region_sparse_feature_num > 1 else region_embeddings[j][0](region_sparse_input[0]) for j in range(region_num)] region_logits = Concatenate()(region_sparse_logits) else: region_sparse_logits = [ add([region_embeddings[j][i](region_sparse_input[i]) for i in range(region_sparse_feature_num)]) for j in range(region_num)] region_logits = Concatenate()( [add([region_sparse_logits[i], region_dense_logits_[i]]) for i in range(region_num)]) if base_dense_feature_num > 0 and base_sparse_feature_num == 0: base_logits = base_dense_logits elif base_dense_feature_num == 0 and base_sparse_feature_num > 0: base_sparse_logits = [add( [base_embeddings[j][i](base_sparse_input_[i]) for i in range(base_sparse_feature_num)]) if base_sparse_feature_num > 1 else base_embeddings[j][0](base_sparse_input_[0]) for j in range(region_num)] base_logits = base_sparse_logits else: base_sparse_logits = [add( [base_embeddings[j][i](base_sparse_input_[i]) for i in range(base_sparse_feature_num)]) if base_sparse_feature_num > 1 else base_embeddings[j][0](base_sparse_input_[0]) for j in range(region_num)] base_logits = [add([base_sparse_logits[i], base_dense_logits[i]]) for i in range(region_num)] # Dense(self.region_num, activation='softmax')(final_logit) region_weights = Activation("softmax")(region_logits) learner_score = Concatenate()( [Activation(final_activation, name='learner' + str(i))(base_logits[i]) for i in range(region_num)]) final_logit = dot([region_weights, learner_score], axes=-1) if bias_dense_feature_num + bias_sparse_feature_num > 0: if bias_dense_feature_num > 1: bias_dense_logits = Dense(1,)(Concatenate()(bias_dense_input)) elif bias_dense_feature_num == 1: bias_dense_logits = Dense(1,)(bias_dense_input[0]) else: pass if bias_sparse_feature_num > 1: bias_cate_logits = add([bias_embedding[i](bias_sparse_input[i]) for i, feat in enumerate(bias_feature_dim_dict['sparse'])]) elif bias_sparse_feature_num == 1: bias_cate_logits = bias_embedding[0](bias_sparse_input[0]) else: pass if bias_dense_feature_num > 0 and bias_sparse_feature_num > 0: bias_logits = add([bias_dense_logits, bias_cate_logits]) elif bias_dense_feature_num > 0: bias_logits = bias_dense_logits else: bias_logits = bias_cate_logits bias_prob = Activation('sigmoid')(bias_logits) final_logit = dot([final_logit, bias_prob], axes=-1) output = Reshape([1])(final_logit) model = Model(inputs=region_sparse_input + region_dense_input+base_sparse_input + base_dense_input+bias_sparse_input+bias_dense_input, outputs=output) return model