def D_model(): base = 32 inputs = Input([img_height, img_width, channel + num_classes]) x = Conv2D(base, (5, 5), padding='same', strides=(2,2), name='d_conv1', kernel_initializer=RN(mean=0.0, stddev=0.02), use_bias=False)(inputs) x = LeakyReLU(alpha=0.2)(x) x = Conv2D(base*2, (5, 5), padding='same', strides=(2,2), name='d_conv2', kernel_initializer=RN(mean=0.0, stddev=0.02), use_bias=False)(x) x = LeakyReLU(alpha=0.2)(x) x = Conv2D(base*4, (5, 5), padding='same', strides=(2,2), name='d_conv3', kernel_initializer=RN(mean=0.0, stddev=0.02), use_bias=False)(x) x = LeakyReLU(alpha=0.2)(x) x = Conv2D(base*8, (5, 5), padding='same', strides=(2,2), name='d_conv4', kernel_initializer=RN(mean=0.0, stddev=0.02), use_bias=False)(x) x = LeakyReLU(alpha=0.2)(x) x = Flatten()(x) x = Dense(1, activation='sigmoid', name='d_out', kernel_initializer=RN(mean=0.0, stddev=0.02), bias_initializer=Constant())(x) model = Model(inputs=inputs, outputs=x, name='D') return model
def model_lstm(learning_rate=0.01, dropout=0.2, recurrent_dropout=0.2): model = Sequential() if embeddings_index is None: model.add(Embedding(num_words, 300, embeddings_initializer='glorot_uniform', input_length=max_token_list_len)) else: model.add(Embedding(num_words, 300, embeddings_initializer=Constant(embedding_matrix), input_length=max_token_list_len, trainable=False)) model.add(LSTM(8, dropout=dropout, recurrent_dropout=recurrent_dropout)) model.add(Dense(2 if mapping != 'A' else 3, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer=RMSprop(learning_rate=learning_rate), metrics=['acc']) return model
def build_model(embedding_matrix: np.array, num_other_results: int): inp = Input(shape=(MAX_SEQUENCE_LENGTH, )) x = Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1], embeddings_initializer=Constant(embedding_matrix), input_length=MAX_SEQUENCE_LENGTH, trainable=False)(inp) x = SpatialDropout1D(0.2)(x) x = Bidirectional(CuDNNLSTM(LSTM_UNITS, return_sequences=True))(x) x = Bidirectional(CuDNNLSTM(LSTM_UNITS, return_sequences=True))(x) x = concatenate([GlobalMaxPooling1D()(x), GlobalAveragePooling1D()(x)]) x = add([x, Dense(DENSE_HIDDEN_UNITS, activation='relu')(x)]) x = add([x, Dense(DENSE_HIDDEN_UNITS, activation='relu')(x)]) result = Dense(1, activation='sigmoid')(x) other_results = Dense(num_other_results, activation='sigmoid')(x) model = Model(inputs=inp, outputs=[result, other_results]) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc', binary_accuracy]) return model
def new_res_block(inp, filters, kernel_size=3, padding='same', **kwargs): """ Residual block """ kwargs = update_kwargs(kwargs) var_x = LeakyReLU(alpha=0.2)(inp) var_x = ReflectionPadding2D(stride=1, kernel_size=kernel_size)(var_x) padding = 'valid' var_x = Conv2D(filters, kernel_size=kernel_size, padding=padding, **kwargs)(var_x) var_x = LeakyReLU(alpha=0.2)(var_x) var_x = ReflectionPadding2D(stride=1, kernel_size=kernel_size)(var_x) padding = 'valid' var_x = Conv2D(filters, kernel_size=kernel_size, padding=padding, **kwargs)(var_x) var_x = Scale(gamma_init=Constant(value=0.1))(var_x) var_x = Add()([var_x, inp]) var_x = LeakyReLU(alpha=0.2)(var_x) return var_x
def test_ow3_constraint(self): """ Test ow-pool with mean weights""" constraint = PosUnitModule(axis=3) neg_ones_ini = -np.ones((1, 1, self.x_input.shape[3], self.pool_size[0] * self.pool_size[1])) w_initializer = Constant(value=neg_ones_ini) x = OW3Pooling2D(pool_size=self.pool_size, name='ow', padding='same', weights_constraint=constraint, weights_initializer=w_initializer)(self.input_tensor) x = Flatten()(x) x = Activation('softmax')(x) ow_model = Model(self.input_tensor, x) ow_model.compile(optimizer=self.optimizer, loss='categorical_crossentropy', metrics=['accuracy']) ow_model.fit(self.x_input, self.y, epochs=5, verbose=0) ow_layer = ow_model.layers[-3] ow_weights = ow_layer.get_weights()[0] np.testing.assert_array_almost_equal(np.sum(ow_weights, axis=3), [[[1, 1]]], decimal=5) self.assertFalse(np.sum(ow_weights<0))
def get_shallow_convnet(window_size=4096, channels=2, output_size=84): inputs = Input(shape=(window_size, channels)) conv = ComplexConv1D(32, 512, strides=16, activation='relu')(inputs) pool = AveragePooling1D(pool_size=4, strides=2)(conv) pool = Permute([2, 1])(pool) flattened = Flatten()(pool) dense = ComplexDense(2048, activation='relu')(flattened) predictions = ComplexDense(output_size, activation='sigmoid', bias_initializer=Constant(value=-5))(dense) predictions = GetReal(predictions) model = Model(inputs=inputs, outputs=predictions) model.compile(optimizer=Adam(lr=1e-4), loss='binary_crossentropy', metrics=['accuracy']) return model
def test_guided_grad_modifier(): # Create a simple linear sequence x -> linear(w.x) with weights w1 = -1, w2 = 1. inp = Input(shape=(2, )) out = Dense(1, activation='linear', use_bias=False, kernel_initializer=Constant([-1., 1.]))(inp) model = Model(inp, out) # Original model gradient should be [w1, w2] assert np.array_equal(_compute_grads(model, [1., -1.]), [-1., 1.]) # Original gradient is [-1, 1] but new gradient should be [0, 0] # First one is clipped because of negative gradient while the second is clipped due to negative input. modified_model = modify_model_backprop(model, 'guided') assert np.array_equal(_compute_grads(modified_model, [1., -1.]), [0., 0.]) # Ensure that the original model reference remains unchanged. assert model.layers[1].activation == get('linear') assert modified_model.layers[1].activation == get('relu')
def run_complex_embedding_network_mixture(lookup_table, max_sequence_length, nb_classes=2, random_init=True, embedding_trainable=True): embedding_dimension = lookup_table.shape[1] sequence_input = Input(shape=(max_sequence_length, ), dtype='int32') phase_embedding = phase_embedding_layer( max_sequence_length, lookup_table.shape[0], embedding_dimension, trainable=embedding_trainable)(sequence_input) amplitude_embedding = amplitude_embedding_layer( np.transpose(lookup_table), max_sequence_length, trainable=embedding_trainable, random_init=random_init)(sequence_input) [seq_embedding_real, seq_embedding_imag ] = ComplexMultiply()([phase_embedding, amplitude_embedding]) [sentence_embedding_real, sentence_embedding_imag ] = ComplexMixture()([seq_embedding_real, seq_embedding_imag]) sentence_embedding_real = Flatten()(sentence_embedding_real) sentence_embedding_imag = Flatten()(sentence_embedding_imag) # output = Complex1DProjection(dimension = embedding_dimension)([sentence_embedding_real, sentence_embedding_imag]) predictions = ComplexDense(units=nb_classes, activation='sigmoid', bias_initializer=Constant(value=-1))([ sentence_embedding_real, sentence_embedding_imag ]) output = GetReal()(predictions) model = Model(sequence_input, output) return model
def basicModel(embedding_matrix, MAX_NB_WORDS, MAX_PARA_LENGTH, MAX_PARAS): embedding_layer = Embedding( MAX_NB_WORDS + 1, EMBEDDING_DIM, #weights = [embedding_matrix], embeddings_initializer=Constant(embedding_matrix), mask_zero=True, input_length=MAX_PARA_LENGTH, trainable=False) para_input = Input(shape=(MAX_PARA_LENGTH, ), dtype='int32') embedded_sequences = embedding_layer(para_input) #norm_sequence = BatchNormalization()(embedded_sequences) l_lstm_sen = Bidirectional( GRU(100, return_sequences=True, implementation=2))(embedded_sequences) #drop_out = Dropout(0.2)(l_lstm_sen) l_att = AttLayer()(l_lstm_sen) weighted_sum = WeightedSum()([l_lstm_sen, l_att]) paraEncoder = Model(para_input, weighted_sum) paraEncoder.summary() doc_input = Input(shape=(MAX_PARAS, MAX_PARA_LENGTH), dtype='int32') doc_encoder = TimeDistributed(paraEncoder)(doc_input) mask_doc = Masking(mask_value=0.0)(doc_encoder) #norm_doc = BatchNormalization()(mask_doc) l_lstm_para = Bidirectional( GRU(100, return_sequences=True, implementation=2))(mask_doc) #norm_doc_1 = BatchNormalization()(l_lstm_para) #drop_out = Dropout(0.2)(l_lstm_para) l_att_para = AttLayer()(l_lstm_para) weighted_sum_doc = WeightedSum()([l_lstm_para, l_att_para]) batch_norm = BatchNormalization()(weighted_sum_doc) drop_out = Dropout(0.2)(batch_norm) preds = Dense(1, activation='sigmoid', kernel_regularizer=l12_reg)(drop_out) model = Model(doc_input, preds) model.summary() return model
def create_model(observation_space, action_space, args): assert isinstance(observation_space, gym.spaces.Box) assert isinstance(action_space, gym.spaces.Box) \ or isinstance(action_space, gym.spaces.Discrete) h = x = Input(shape=observation_space.shape) for i in range(args.hidden_layers): h = Dense(args.hidden_nodes, activation=args.activation_function)(h) if isinstance(action_space, gym.spaces.Discrete): # produce logits for all actions h = Dense(action_space.n)(h) # sample action from logits a = Lambda(lambda x: tf.multinomial(x, num_samples=1))(h) # turn logits into probabilities p = Activation('softmax')(h) # model outputs sampled action model = Model(x, a) # loss is between true values and probabilities model.compile(optimizer=RMSprop(lr=args.learning_rate), loss=lambda y_true, y_pred: sparse_categorical_crossentropy(y_true, p)) else: # number of actions n = np.prod(action_space.shape) # produce means and stddevs for Gaussian mu = Dense(n)(h) logstd = Dense(n, bias_initializer=Constant(np.log(args.stddev)))(h) std = Activation(K.exp)(logstd) # sample action from Gaussian a = Lambda(lambda x: mu + std * K.random_normal(K.shape(mu)))( [mu, std]) # model outputs sampled action model = Model(x, a) # negative log likelihood of Gaussian model.compile(optimizer=RMSprop(lr=args.learning_rate, clipnorm=1.), loss=lambda y_true, y_pred: 0.5 * np.log(2 * np.pi) + logstd + 0.5 * ((y_true - mu) / std)**2) model.summary() return model
def base_model(): model = Sequential() embedding_layer = Embedding( num_words, 300, embeddings_initializer=Constant(embedding_matrix), input_length=max_length, trainable=False) model.add(embedding_layer) #model.add(Embedding(len(word_index),128,input_length=max_length)) model.add(CuDNNLSTM(100)) model.add(Dense(len(categories), activation='softmax')) model.compile( loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', util.precision_m, util.recall_m, util.f1_m]) #tf.keras.utils.plot_model(model, to_file='lstm_model.png', show_shapes=True) model.summary() return model
def RNN_Net_simple(num_words,EMBEDDING_DIM,embedding_matrix,max_length): recall = tf.keras.metrics.Recall() precision = tf.keras.metrics.Precision() auc=tf.keras.metrics.AUC() # define model model = Sequential() embedding_layer = Embedding(num_words, EMBEDDING_DIM, embeddings_initializer = Constant(embedding_matrix), input_length = max_length, trainable = False) #add layers model.add(embedding_layer) model.add(SimpleRNN(units = 128, dropout = 0.2, recurrent_dropout=0.2)) model.add(Dense(1,activation = 'sigmoid')) model.compile(loss = 'binary_crossentropy', optimizer = 'adam',metrics=['accuracy',auc,precision,recall]) model.summary() return model
def bilinear2x(layer, num_kernels, kernel_size=(5, 5), strides=(2, 2), factor=2): """ https://kivantium.net/keras-bilinear #NHWC format filter_shape = [kernel_size[0], kernel_size[0], num_inp_channels, num_filters] https://www.tensorflow.org/api_docs/python/tf/nn/depthwise_conv2d_native_backprop_input """ #new_layer = Conv2DTranspose(filters = num_kernels, kernel_size = kernel_size, strides = strides, padding='same', kernel_initializer=Constant(bilinear_upsample_weights(factor, num_kernels)),trainable=False)(layer) new_layer = Conv2DTranspose(filters=num_kernels, kernel_size=kernel_size, strides=strides, padding='same', kernel_initializer=Constant( bilinear_upsample_weights( factor, num_kernels)))(layer) #print('ello') return new_layer
def build(self, _): # Flag to activate or not quantization self.activeSwitch = K.variable(bool(self.active), name='quantization-active') # Stiffness control parameter of the softmax distribution # TODO: is regularization actually needed here? self.log_beta = self.add_weight( shape=(1, ), initializer=Constant(value=np.log(self.beta)), regularizer=None, # regularizers.l1(1e-3) trainable=True, name='log-beta') # Quantization bins uniformly distributed on the [-1, 1] interval self.bins = K.constant(np.linspace(-1.0, 1.0, self.nbBins).astype(np.float32), name='quantization-bins') self.built = True
def test_ow1_max(self): """ Test ow-pool with max weights""" x = MaxPooling2D(pool_size=self.pool_size, padding='same')(self.input_tensor) x = MaxPooling2D(pool_size=self.pool_size, padding='same')(x) max_model = Model(self.input_tensor, x) max_ini = np.zeros(self.pool_size[0] * self.pool_size[1]) max_ini[0] = 1 w_initializer = Constant(value=max_ini) x = OW1Pooling2D(pool_size=self.pool_size, padding='same', weights_initializer=w_initializer)(self.input_tensor) x = OW1Pooling2D(pool_size=self.pool_size, padding='same', weights_initializer=w_initializer)(x) ow_model = Model(self.input_tensor, x) avg_prediction = max_model.predict(self.x_input) ow_prediction = ow_model.predict(self.x_input) np.testing.assert_array_almost_equal(avg_prediction, ow_prediction)
def create_conv_model(): model_conv = Sequential() embedding_layer = Embedding( vocabulary_size, 300, embeddings_initializer=Constant(embedding_matrix), input_length=max_length, trainable=False) model_conv.add(embedding_layer) model_conv.add(Dropout(0.2)) model_conv.add(Conv1D(64, 2, activation='relu')) model_conv.add(MaxPooling1D(pool_size=4)) model_conv.add(LSTM(50)) model_conv.add(Dense(len(categories), activation='softmax')) model_conv.compile( loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy', util.precision_m, util.recall_m, util.f1_m]) #plot_model(model_conv, to_file='conv_lstm_model.png', show_shapes=True) model_conv.summary() return model_conv
def create_glove_nn(): # Code used to build GloVe embedding layer # This code can be found via link: https://keras.io/examples/pretrained_word_embeddings/ embeddings_index = {} with open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt')) as f: for line in f: word, coefs = line.split(maxsplit=1) coefs = np.fromstring(coefs, 'f', sep=' ') embeddings_index[word] = coefs print('Found %s word vectors.' % len(embeddings_index)) # prepare embedding matrix num_words = min(MAX_NUM_WORDS, len(word_index) + 1) embedding_matrix = np.zeros((num_words, EMBEDDING_DIM)) for word, i in word_index.items(): if i >= MAX_NUM_WORDS: continue embedding_vector = embeddings_index.get(word) if embedding_vector is not None: # words not found in embedding index will be all-zeros. embedding_matrix[i] = embedding_vector # load pre-trained word embeddings into an Embedding layer # note that we set trainable = False so as to keep the embeddings fixed embedding_layer = Embedding(num_words, EMBEDDING_DIM, embeddings_initializer=Constant(embedding_matrix), input_length=MAX_SEQUENCE_LENGTH, trainable=False) nn = Sequential() nn.add(embedding_layer) nn.add(Flatten()) nn.add(Dense(128, activation='relu')) nn.add(Dense(64, activation='relu')) nn.add(Dense(3, activation='softmax')) nn.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=[metrics.mae, metrics.categorical_accuracy, metrics.cosine_proximity]) nn.summary() return nn
def keras_preprocess(sentences, maxlen): # Clean text clean_sents = sentences.apply(text_preprocess) # tokenizing tokenizer = Tokenizer() tokenizer.fit_on_texts(clean_sents) sequences = tokenizer.texts_to_sequences(clean_sents) dl_data = sequence.pad_sequences(sequences, maxlen=maxlen) word_index = tokenizer.word_index EMBEDDING_DIM = 300 embeddings_index = {} for word, idx in word_index.items(): try: embedding = nlp(word).vector embeddings_index[word] = embedding except: pass print('Found %s unique tokens.' % len(word_index)) print('Total %s word vectors.' % len(embeddings_index)) embedding_matrix = np.zeros((len(word_index) + 1, EMBEDDING_DIM)) for word, i in word_index.items(): embedding_vector = embeddings_index.get(word) if embedding_vector is not None: embedding_matrix[i] = embedding_vector embedding_layer = Embedding( len(word_index) + 1, EMBEDDING_DIM, embeddings_initializer=Constant(embedding_matrix), input_length=maxlen, trainable=False) return np.array(dl_data), embedding_layer
def get_pretrain_embeddings(path, word_index, EMBEDDING_DIM=300): MAX_NUM_WORDS = len(word_index) BASE_DIR = path + 'data/' GLOVE_DIR = os.path.join(BASE_DIR, 'w2v') print('Indexing word vectors.') embeddings_index = {} with open(os.path.join(GLOVE_DIR, 'glove.42B.300d.txt'), encoding="utf-8") as f: for line in f: word, coefs = line.split(maxsplit=1) coefs = np.fromstring(coefs, 'f', sep=' ') embeddings_index[word] = coefs print('Found %s word vectors.' % len(embeddings_index)) print('Preparing embedding matrix.') # prepare embedding matrix num_words = min(MAX_NUM_WORDS, len(word_index)) + 1 embedding_matrix = np.zeros((num_words, EMBEDDING_DIM)) found = 0 for word, i in word_index.items(): if i > MAX_NUM_WORDS: continue embedding_vector = embeddings_index.get(word) if embedding_vector is not None: # words not found in embedding index will be all-zeros. if embedding_vector.shape[0] == 0: continue embedding_matrix[i] = embedding_vector found += 1 print("Token num: %d, Found Tokens: %d" % (len(word_index), found)) # load pre-trained word embeddings into an Embedding layer embedding_layer = Embedding(num_words, EMBEDDING_DIM, embeddings_initializer=Constant(embedding_matrix)) return embedding_layer
def train(x_train, y_train, x_test, y_test, layer_shape, time_steps, epoch, learning_rate, predict_length, embed, words_per_news, wordindex): event_per_days = 5 x_train_price = _process_price(x_train, event_per_days, words_per_news) x_test_price = _process_price(x_test, event_per_days, words_per_news) x_train_events = x_train[:, :, 1] x_train_events = _padding(event_array=x_train_events, word_index=wordindex) x_test_events = x_test[:, :, 1] x_test_events = _padding(event_array=x_test_events, word_index=wordindex) num_words = len(embed) + 1 # price_input = Input(batch_shape=(None, seq_length, event_per_days, words_per_news, 1), name='price_input', dtype="float32") # event_input = Input(batch_shape=(None, seq_length, event_per_days, words_per_news), name='event_input') price_input = Input(name='price_input', dtype="float32", shape=(10, event_per_days, words_per_news, 1)) event_input = Input(name='event_input', dtype="int32", shape=(10, event_per_days, words_per_news)) emb = Embedding(input_dim=num_words, output_dim=300, embeddings_initializer=Constant(embed), mask_zero=False, trainable=False)(event_input) total_input = concatenate([emb, price_input], axis=4) print(total_input._keras_shape) conv1 = Conv3D(layer_shape[0], kernel_size=(event_per_days, 3, 3), activation='relu')(total_input) flat = Flatten()(conv1) out = Dense(3)(flat) print(out._keras_shape) # model = Model(inputs=[price_input, event_input], outputs=[out]) model = Model(inputs=[price_input, event_input], outputs=[out]) model.compile(loss='mse', optimizer='adam', metrics=['accuracy']) model.fit([x_train_price, x_train_events], [y_train], validation_data=([x_test_price, x_test_events], [y_test]))
def build(self): semantic_input = Input(shape=(self.max_sequence_length, ), name='word-embedding-input') semantic_emb = Embedding(self.embedding_matrix.shape[0], self.embedding_matrix.shape[1], embeddings_initializer=Constant( self.embedding_matrix), input_length=self.max_sequence_length, trainable=False)(semantic_input) gru = Bidirectional(GRU(self.hidden_units, return_sequences=True))(semantic_emb) avg_pool = GlobalAveragePooling1D()(gru) if self.emotion_dim != 0: emotion_input = Input(shape=(self.emotion_dim, ), name='emotion-input') emotion_enhanced = Concatenate()([avg_pool, emotion_input]) dense = Dense(32, activation='relu', kernel_regularizer=l2( self.l2_param))(emotion_enhanced) output = Dense(self.category_num, activation='softmax', kernel_regularizer=l2(self.l2_param))(dense) model = Model(inputs=[semantic_input, emotion_input], outputs=output) else: dense = Dense(32, activation='relu', kernel_regularizer=l2(self.l2_param))(avg_pool) output = Dense(self.category_num, activation='softmax', kernel_regularizer=l2(self.l2_param))(dense) model = Model(inputs=[semantic_input], outputs=output) return model
def transform_net(inputs, num_init_filters, scope=None, regularize=False): """ Generates an orthogonal transformation tensor for the input data :param inputs: tensor with input image (either BxNxK or BxNx1xK) :param output_shape: shape of the ourput matrix :param scope: name of the grouping scope :param regularize: enforce orthogonality constraint :return: Bxoutput_shape[0]xoutput_shape[1] tensor of the transformation """ with K.name_scope(scope): input_shape = inputs.get_shape().as_list() k = input_shape[-1] num_points = input_shape[-2] net = conv1d_bn(inputs, num_filters=num_init_filters, kernel_size=1, padding='valid', use_bias=True, scope='tconv1') net = conv1d_bn(net, num_filters=num_init_filters * 2, kernel_size=1, padding='valid', use_bias=True, scope='tconv2') net = conv1d_bn(net, num_filters=num_init_filters * 16, kernel_size=1, padding='valid', use_bias=True, scope='tconv3') # net = conv1d_bn(net, num_filters=num_init_filters * 8, kernel_size=1, padding='valid', # use_bias=True, scope='tconv2') # Done in 2D since 1D is painfully slow net = MaxPooling2D(pool_size=(num_points, 1), padding='valid')(Lambda(K.expand_dims)(net)) net = Flatten()(net) net = dense_bn(net, units=num_init_filters * 8, scope='tfc1', activation='relu') net = dense_bn(net, units=num_init_filters * 4, scope='tfc2', activation='relu') # net = dense_bn(net, units=num_init_filters * 4, scope='tfc2', activation='relu') transform = Dense(units=k * k, kernel_initializer='zeros', bias_initializer=Constant(np.eye(k).flatten()), activity_regularizer=orthogonal(l2=0.001) if regularize else None)(net) transform = Reshape((k, k))(transform) return transform
def build(self, input_shape): hadamard_size = 2**int( math.ceil(math.log(max(input_shape[1], self.output_dim), 2))) self.hadamard = K.constant(value=hadamard(hadamard_size, dtype=np.int8) [:input_shape[1], :self.output_dim]) init_scale = 1. / math.sqrt(self.output_dim) self.scale = self.add_weight(name='scale', shape=(1, ), initializer=Constant(init_scale), trainable=True) if self.use_bias: self.bias = self.add_weight(name='bias', shape=(self.output_dim, ), initializer=RandomUniform( -init_scale, init_scale), trainable=True) super(HadamardClassifier, self).build(input_shape)
def get_embdedding_layer(name, embedding_matrix, max_sequence_length, trainable): if name == 'News': layer = Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1], embeddings_initializer=Constant(embedding_matrix), input_length=max_sequence_length, trainable=trainable, mask_zero=True) elif name == 'BERT': layer = load_trained_bert_from_checkpoint( config_file='resource/cased_L-12_H-768_A-12/bert_config.json', checkpoint_file='resource/cased_L-12_H-768_A-12/bert_model.ckpt', seq_len=max_sequence_length, trainable=trainable, num_hidden_layers=None) # This BERT implementation supports zero-masking, and outputs masks as normal keras Embedding layer # TODO: drop the vectors for [CLS] and [SEP] for BERT else: raise NotImplementedError() return layer
def __init__(self, input_dim, output_dim, trainable, activation="linear", weight_info=None, **kwargs): super().__init__(**kwargs) self._input_dim = input_dim self._output_dim = output_dim self._trainable = trainable self._activation = activations.get(activation) self._weight_info = weight_info self._projection_mode = False weight_initializer = None if self._weight_info is None else Constant( custom_weight_factory.get_weight(self._weight_info)) self._embedder = Embedding(input_dim, output_dim, trainable=trainable, embeddings_initializer=weight_initializer)
def build_model_bilstm_v3(embedding_matrix): ''' ''' input_a = Input(shape=(None, ), dtype='int64') token_number = embedding_matrix.shape[0] embedding_dim = embedding_matrix.shape[1] embedding_layer = Embedding( token_number, embedding_dim, embeddings_initializer=Constant(embedding_matrix), trainable=False, ) emb_a = embedding_layer(input_a) x = Bidirectional(LSTM(64, return_sequences=True))(emb_a) x = Bidirectional(LSTM(64))(x) output = Dense(snli_label_number, activation='softmax')(x) model = Model(input_a, output) model.compile(loss='categorical_crossentropy', optimizer=Adagrad(learning_rate=args.learning_rate), metrics=['categorical_accuracy']) model.summary() return model
def build(self): """construct model architecture""" input_context = Input(shape=(self.max_len, ), dtype='int32') embeddings = Embedding(self.num_vocab + 2, self.emb_dim, embeddings_initializer=Constant( self.emb_matrix), input_length=self.max_len, trainable=True)(input_context) branch1 = Dropout(self.dropout_rate)(embeddings) branch1 = Conv1D(self.num_filter_1, self.kernel_size_1, padding='same', activation='relu', strides=self.num_stride_1)(branch1) branch2 = Dropout(self.dropout_rate)(embeddings) branch2 = Conv1D(self.num_filter_2, self.kernel_size_2, padding='same', activation='tanh', strides=self.num_stride_2)(branch2) merged = Multiply()([branch1, branch2]) # elementwise multiplication merged = GlobalMaxPooling1D()(merged) # feed-forward neural network preds = Dense(self.hidden_dims_1, activation='selu')(merged) preds = Dropout(self.dropout_rate)(preds) preds = Dense(self.hidden_dims_2, activation='selu')(preds) preds = Dropout(self.dropout_rate)(preds) preds = Dense(self.hidden_dims_3, activation='selu')(preds) preds = Dense(self.hidden_dims_4, activation='relu')(preds) preds = Dense(self.num_classes, activation='softmax')(preds) self.model = Model([input_context], preds)
def net2(self): text_input = Input(shape=(self.max_ngram_len,), name='text_input') product_input = Input(shape=(1,), name='product_input') text_embeds = self.ngram_embeds(text_input) text_embeds = Reshape((text_embeds.shape[1], text_embeds.shape[2], 1))(text_embeds) if self.pre_trained_embeds is None: self.product_embeds = Embedding(input_dim=self.product_num, output_dim=self.embedding_dim, input_length=1, name='product_embedding') else: self.product_embeds = Embedding(input_dim=self.product_num, input_length=1, embeddings_initializer=Constant(self.pre_trained_embeds), trainable=True, output_dim=self.embedding_dim, name='product_embedding') product_embeds = self.product_embeds(product_input) conv1 = Conv2D(self.feature_num, (self.kernel_size[0], self.embedding_dim), padding='valid')(text_embeds) conv1 = BatchNormalization()(conv1) conv1 = ReLU()(conv1) max_pool1 = MaxPool2D((conv1.shape[1], 1))(conv1) conv2 = Conv2D(self.feature_num, (self.kernel_size[1], self.embedding_dim), padding='valid')(text_embeds) conv2 = BatchNormalization()(conv2) conv2 = ReLU()(conv2) max_pool2 = MaxPool2D((conv2.shape[1], 1))(conv2) conv3 = Conv2D(self.feature_num, (self.kernel_size[2], self.embedding_dim), padding='valid')(text_embeds) conv3 = BatchNormalization()(conv3) conv3 = ReLU()(conv3) max_pool3 = MaxPool2D((conv3.shape[1], 1))(conv3) max_pool1 = self.slicing_lambda(max_pool1) max_pool2 = self.slicing_lambda(max_pool2) max_pool3 = self.slicing_lambda(max_pool3) product_embeds = Reshape((1, 1, product_embeds.shape[2]))(product_embeds) product_model = self.slicing_lambda(product_embeds) text_model = keras.layers.concatenate([max_pool1, max_pool2, max_pool3]) text_product_model = keras.layers.concatenate([max_pool1, max_pool2, max_pool3, product_model])
def create_model(self, tokenizer): """ Create and train a new Keras RNN model using pre-trained word embeddings from SpaCy, and then training the model further on the movie conversation text. Saves the model to the text/ folder once training is completed Tokenizer -> Model """ total_words = len(tokenizer.word_index) + 1 # Use word embeddings from SpaCy embedding_dim = len(nlp('The').vector) embedding_matrix = np.zeros((total_words, embedding_dim)) for i, word in enumerate(tokenizer.word_index.keys()): embedding_matrix[i] = nlp(word).vector embedding_layer = Embedding( total_words, embedding_dim, embeddings_initializer=Constant(embedding_matrix), trainable=True, ) model = Sequential() model.add(embedding_layer) model.add(Bidirectional(LSTM(400))) model.add(Dense(400, activation='relu')) model.add(Dropout(0.2)) model.add(Dense(total_words, activation='softmax')) model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["acc"]) xs, labels = self.input_sequences[:, :-1], self.input_sequences[:, -1] ys = to_categorical(labels, num_classes=total_words) model.fit(xs, ys, batch_size=128, epochs=20) # Save model model.save('text/model_%s.h5' % self.movie_id) return model
def word_embedding(Max_Sequence_Length, embedding_dim, word_index): # prepare embedding matrix # num_words = min(MAX_NUM_WORDS, len(word_index)) + 1 num_words = len(word_index) + 1 word_index = word_index embedding_matrix = np.zeros((num_words, embedding_dim)) for word, i in word_index.items(): # if i > MAX_NUM_WORDS: # continue embedding_vector = embeddings_index.get(word) if embedding_vector is not None: # words not found in embedding index will be all-zeros. embedding_matrix[i] = embedding_vector # load pre-trained word embeddings into an Embedding layer # note that we set trainable = False so as to keep the embeddings fixed embedding_layer = Embedding(num_words, embedding_dim, embeddings_initializer=Constant(embedding_matrix), input_length=Max_Sequence_Length, trainable=False) return embedding_layer