def build_GRU_with_z_gate_model(seq2seq, weight_array): h_tm1_input = Input(shape=(seq2seq.units, ), name="h_input") x_input = Input(shape=(seq2seq.units, ), name="x_input") r_input = Input(shape=(seq2seq.units, ), name="r_input") hh_input = Input(shape=(seq2seq.units, ), name="hh_input") def gru_with_z_gate(x, weight): h_tm1, inputs, r, hh = x[0], x[1], x[2], x[3] weight = K.variable(weight) units = h_tm1.shape[-1] kernel_z = weight[:units, :units] recurrent_kernel_z = weight[units:units * 2, :units] input_bias_z = weight[units * 2, :units] # Change to 1 dim. x_z = K.bias_add(K.dot(inputs, kernel_z), input_bias_z) recurrent_z = K.dot(h_tm1, recurrent_kernel_z) z_without_activate = x_z + recurrent_z z = hard_sigmoid(z_without_activate) h = z * h_tm1 + (1 - z) * hh #return h return z output = Lambda(gru_with_z_gate, arguments={"weight": weight_array })([h_tm1_input, x_input, r_input, hh_input]) #h = layers.Add()([h_tm1_input, x_input]) gate_model = Model([h_tm1_input, x_input, r_input, hh_input], output) #print("z gate model.") #gate_model.summary() return gate_model
def build_decoder_model_without_argmax(seq2seq, input_t, output_t): # Remove all initializer. input_state = Input(shape=(seq2seq.units, ), name="decoder_state") decoder_inputs = Input(shape=(None, ), name="decoder_input") decoder_embedding = Embedding(seq2seq.tgt_token_size, seq2seq.units, input_length=None, name="decoder_emb") decoder_gru = GRU(seq2seq.units, return_sequences=True, return_state=True, name="decoder_gru") decoder_dense = Dense(seq2seq.tgt_token_size, activation="softmax", name="output_dense") state = input_state for t in range(input_t, output_t): inputs = Lambda(slice, arguments={"index": t})( decoder_inputs) # Count encoder output as time 0. inputs_embedding = decoder_embedding(inputs) decoder_outputs_time, state = decoder_gru(inputs_embedding, initial_state=state) if input_t == output_t: decoder_outputs_time = Lambda(lambda x: K.expand_dims(x, axis=1))( state) softmax = decoder_dense(decoder_outputs_time) decoder_model = Model([decoder_inputs, input_state], [softmax] + [state]) return decoder_model
def build_model(units, inputs_dim, output="regression", sparse_dim=[], with_ts=False, ts_maxlen=0): assert output == "regression" or output == "binary_clf", "This output type is not supported." assert len(sparse_dim) == inputs_dim[1], "Dimension not match." # Inputs for basic features. inputs1 = Input(shape=(inputs_dim[0], ), name="basic_input") x1 = Dense(units, kernel_regularizer='l2', activation="relu")(inputs1) # Inputs for long one-hot features. inputs2 = Input(shape=(inputs_dim[1], ), name="one_hot_input") for i in range(len(sparse_dim)): if i == 0: x2 = Embedding(sparse_dim[i], units, mask_zero=True)(slice(inputs2, i)) else: tmp = Embedding(sparse_dim[i], units, mask_zero=True)(slice(inputs2, i)) x2 = Concatenate()([x2, tmp]) x2 = tf.reshape(x2, [-1, units * inputs_dim[1]]) x = Concatenate()([x1, x2]) if with_ts: inputs3 = Input(shape=( None, inputs_dim[2], ), name="ts_input") x3 = LSTM(units, input_shape=(ts_maxlen, inputs_dim[2]), return_sequences=0)(inputs3) x = Concatenate()([x, x3]) x = Dense(units, kernel_regularizer='l2', activation="relu")(x) x = Dropout(0.5)(x) x = Dense(units, kernel_regularizer='l2', activation="relu")(x) x = Dropout(0.5)(x) if output == "regression": x = Dense(1, kernel_regularizer='l2')(x) model = Model(inputs=[inputs1, inputs2], outputs=x) if with_ts: model = Model(inputs=[inputs1, inputs2, inputs3], outputs=x) model.compile(optimizer='adam', loss='mean_squared_error') elif output == "binary_clf": x = Dense(1, kernel_regularizer='l2', activation="sigmoid")(x) model = Model(inputs=[inputs1, inputs2], outputs=x) if with_ts: model = Model(inputs=[inputs1, inputs2, inputs3], outputs=x) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) #model.summary() return model
def enc_dec(src_max_len, tgt_max_len, src_token_size, tgt_token_size, latent_dim=128): """Get the empty encoder and decoder.""" rd = RandomUniform(minval=-0.08, maxval=0.08, seed=None) encoder_inputs = Input(shape=(None, ), name='encoder_inputs') encoder_embedding = Embedding(src_token_size, latent_dim, embeddings_initializer=rd, input_length=None, mask_zero=True, name='encoder_emb')(encoder_inputs) encoder_time, encoder_state_h = GRU(latent_dim, kernel_initializer=rd, bias_initializer=rd, return_state=True, return_sequences=True, name='forward')(encoder_embedding) encoder_model = Model(encoder_inputs, [encoder_state_h, encoder_time]) decoder_inputs = Input(shape=(None, ), name='decoder_inputs') decoder_embedding = Embedding(tgt_token_size, latent_dim, embeddings_initializer=rd, input_length=None, name='decoder_emb') #(decoder_inputs) decoder_gru = GRU(latent_dim, kernel_initializer=rd, bias_initializer=rd, return_sequences=True, return_state=True, name='decoder_gru') decoder_dense = Dense(tgt_token_size, kernel_initializer=rd, bias_initializer=rd, activation='softmax', name='output_dense') decoder_state_input_h = Input(shape=(latent_dim, ), name='decoder_input_h') decoder_outputs, state_h = decoder_gru(decoder_embedding(decoder_inputs), initial_state=decoder_state_input_h) decoder_outputs = decoder_dense(decoder_outputs) decoder_model = Model([decoder_inputs] + [decoder_state_input_h], [decoder_outputs] + [state_h]) return encoder_model, decoder_model
def DNNclassifier_crps(self, p, num_cut, optimizer, seeding): tf.set_random_seed(seeding) inputs = Input(shape=(p,)) if isinstance(optimizer, str): opt = optimizer else: opt_name = optimizer.__class__.__name__ opt_config = optimizer.get_config() opt_class = getattr(optimizers, opt_name) opt = opt_class(**opt_config) for i, n_neuron in enumerate(self.hidden_list): if i == 0: net = Dense(n_neuron, kernel_initializer = 'he_uniform')(inputs) else: net = Dense(n_neuron, kernel_initializer = 'he_uniform')(net) net = Activation(activation = 'elu')(net) net = BatchNormalization()(net) net = Dropout(rate=self.dropout_list[i])(net) softmaxlayer = Dense(num_cut + 1, activation='softmax', kernel_initializer = 'he_uniform')(net) output = Lambda(self.tf_cumsum)(softmaxlayer) model = Model(inputs = [inputs], outputs=[output]) model.compile(optimizer=opt, loss=self.crps_loss) return model
def build_network(num_actions, agent_history_length, resized_width, resized_height): with tf.device("/gpu:0"): state = tf.placeholder( "float", [None, agent_history_length, resized_width, resized_height]) inputs = Input(shape=( agent_history_length, resized_width, resized_height, )) model = Convolution2D(filters=16, kernel_size=(8, 8), strides=(4, 4), activation='relu', padding='same')(inputs) model = Convolution2D(filters=32, kernel_size=(4, 4), strides=(2, 2), activation='relu', padding='same')(model) model = Flatten()(model) model = Dense(256, activation='relu')(model) q_values = Dense(num_actions, activation='linear')(model) m = Model(inputs, outputs=q_values) return state, m
def build_ann_classifier(n_hiden_layers, hidden_layer_size, optimizer='adam', input_shape=None): ''' function to build the ANN architecture ''' # intialize a classifier classifier = Sequential() # input layer classifier.add(Input(shape=input_shape)) # hidden layers for n in range(n_hiden_layers): classifier.add( Dense(units=hidden_layer_size, kernel_initializer='uniform', activation='relu')) # each next hidden layer of the network will be 50% smaller hidden_layer_size /= hidden_layer_size # output layers classifier.add( Dense(units=1, kernel_initializer='uniform', activation='sigmoid')) #compile model classifier.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy']) return classifier
def __init__(self, inputs, arch, siam_reg, main_path, y_true): self.orig_inputs = inputs # set up inputs self.inputs = { 'A': inputs['Unlabeled'], 'B': Input(shape=inputs['Unlabeled'].get_shape().as_list()[1:]), 'Labeled': inputs['Labeled'], } self.main_path = os.path.join(main_path, 'siemese/') self.y_true = y_true # generate layers self.layers = [] self.layers += util.make_layer_list(arch, 'siamese', siam_reg) # create the siamese net self.outputs = stack_layers(self.inputs, self.layers) # add the distance layer self.distance = Lambda(affinities.euclidean_distance, output_shape=affinities.eucl_dist_output_shape)( [self.outputs['A'], self.outputs['B']]) # create the distance model for training self.net = Model([self.inputs['A'], self.inputs['B']], self.distance) # compile the siamese network self.net.compile(loss=affinities.get_contrastive_loss(m_neg=1, m_pos=0.05), optimizer='rmsprop')
def build_mlp( obs_spec: Spec, act_spec: Spec, layer_sizes=(64, 64), activation='relu', initializer='glorot_uniform', value_separate=False, obs_shift=False, obs_scale=False) -> tf.keras.Model: """ Factory method for a simple fully connected neural network model used in e.g. MuJuCo environment If value separate is set to true then a separate path is added for value fn, otherwise branches out of last layer If obs shift is set to true then observations are normalized to mean zero with running mean estimate If obs scale is set to true then observations are standardized to std.dev one with running std.dev estimate """ inputs = inputs_ = [Input(s.shape, name="input_" + s.name) for s in obs_spec] if obs_shift or obs_scale: inputs_ = [RunningStatsNorm(obs_shift, obs_scale, name="norm_" + s.name)(x) for s, x in zip(obs_spec, inputs_)] inputs_concat = Concatenate()(inputs_) if len(inputs_) > 1 else inputs_[0] x = build_fc(inputs_concat, layer_sizes, activation, initializer) outputs = [build_logits(space, x, initializer) for space in act_spec] if value_separate: x = build_fc(inputs_concat, layer_sizes, activation, initializer, 'value_') value = Dense(1, name="value_out", kernel_initializer=initializer)(x) value = Squeeze(axis=-1)(value) outputs.append(value) return tf.keras.Model(inputs=inputs, outputs=outputs)
def generate(self): model_path = os.path.expanduser(self.model_path) assert model_path.endswith( '.h5'), 'Keras model or weights must be a .h5 file.' # Load model, or construct model and load weights. num_anchors = len(self.anchors) num_classes = len(self.class_names) is_tiny_version = num_anchors == 6 # default setting try: self.yolo_model = load_model(model_path, compile=False) except: self.yolo_model = tiny_yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes) \ if is_tiny_version else yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes) self.yolo_model.load_weights( self.model_path) # make sure model, anchors and classes match else: assert self.yolo_model.layers[-1].output_shape[-1] == \ num_anchors/len(self.yolo_model.output) * (num_classes + 5), \ 'Mismatch between model and given anchor and class sizes' print('{} model, anchors, and classes loaded.'.format(model_path)) # Generate colors for drawing bounding boxes. hsv_tuples = [(x / len(self.class_names), 1., 1.) for x in range(len(self.class_names))] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list( map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) np.random.seed(10101) # Fixed seed for consistent colors across runs. np.random.shuffle( self.colors) # Shuffle colors to decorrelate adjacent classes. np.random.seed(None) # Reset seed to default. # Generate output tensor targets for filtered bounding boxes. self.input_image_shape = K.placeholder(shape=(2, )) if self.gpu_num >= 2: self.yolo_model = multi_gpu_model(self.yolo_model, gpus=self.gpu_num) boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors, len(self.class_names), self.input_image_shape, score_threshold=self.score, iou_threshold=self.iou) return boxes, scores, classes
def create_actor_model(self): state_input = Input(shape=6) h1 = Dense(400, activation='relu')(state_input) h2 = Dense(300, activation='relu')(h1) output = Dense(1, activation='tanh')(h2) model = Model(inputs=state_input, outputs=output) return model
def gru_keras(max_features, maxlen, bidirectional, dropout_rate, embed_dim, rec_units, mtype='GRU', reduction=None, classes=4, lr=0.001): if K.backend == 'tensorflow': K.clear_session() input_layer = Input(shape=(maxlen, )) embedding_layer = Embedding(max_features, output_dim=embed_dim, trainable=True)(input_layer) x = SpatialDropout1D(dropout_rate)(embedding_layer) if reduction: if mtype == 'GRU': if bidirectional: x = Bidirectional( CuDNNGRU(units=rec_units, return_sequences=True))(x) else: x = CuDNNGRU(units=rec_units, return_sequences=True)(x) elif mtype == 'LSTM': if bidirectional: x = Bidirectional( CuDNNLSTM(units=rec_units, return_sequences=True))(x) else: x = CuDNNLSTM(units=rec_units, return_sequences=True)(x) if reduction == 'average': x = GlobalAveragePooling1D()(x) elif reduction == 'maximum': x = GlobalMaxPool1D()(x) else: if mtype == 'GRU': if bidirectional: x = Bidirectional( CuDNNGRU(units=rec_units, return_sequences=False))(x) else: x = CuDNNGRU(units=rec_units, return_sequences=False)(x) elif mtype == 'LSTM': if bidirectional: x = Bidirectional( CuDNNLSTM(units=rec_units, return_sequences=False))(x) else: x = CuDNNLSTM(units=rec_units, return_sequences=False)(x) output_layer = Dense(classes, activation="sigmoid")(x) model = Model(inputs=input_layer, outputs=output_layer) model.compile(loss='categorical_crossentropy', optimizer=RMSprop(learning_rate=lr, clipvalue=1, clipnorm=1), metrics=['acc']) return model
def network(self): """ Assemble Critic network to predict q-values """ state = Input((self.env_dim)) x = Dense(32, activation='elu')(state) x = Dense(16, activation='elu')(x) out = Dense(1, activation='linear', kernel_initializer=RandomUniform())(x) return Model(state, out)
def build_GRU_with_r_gate_model(seq2seq, weight_array): h_tm1_input = Input(shape=(seq2seq.units, ), name="h_input") x_input = Input(shape=(seq2seq.units, ), name="x_input") z_input = Input(shape=(seq2seq.units, ), name="z_input") xh_input = Input( shape=(seq2seq.units, ), name="xh_input" ) # x_h = K.bias_add(K.dot(inputs, kernel_h), input_bias_h) rh_input = Input( shape=(seq2seq.units, seq2seq.units), name="rh_input" ) # split_recurrent_h = K.dot(h_tm1.transpose(), recurrent_kernel_h) def gru_with_r_gate(x, weight): h_tm1, inputs, z, x_h, split_recurrent_h = x[0], x[1], x[2], x[3], x[4] weight = K.variable(weight) units = h_tm1.shape[-1] kernel_r = weight[:units, units:units * 2] recurrent_kernel_r = weight[units:units * 2, units:units * 2] input_bias_r = weight[units * 2, units:units * 2] # Change to 1 dim. x_r = K.bias_add(K.dot(inputs, kernel_r), input_bias_r) recurrent_r = K.dot(h_tm1, recurrent_kernel_r) r_without_activate = x_r + recurrent_r r = hard_sigmoid(r_without_activate) #r = hard_sigmoid(x_r + recurrent_r) # Recompute recurrent_h by two parts. r_unsqueeze = K.expand_dims(r, axis=-1) recompute_recurrent_h = K.sum(r_unsqueeze * split_recurrent_h, axis=1) hh = tanh(x_h + recompute_recurrent_h) h = z * h_tm1 + (1 - z) * hh #return h return r output = Lambda(gru_with_r_gate, arguments={"weight": weight_array})( [h_tm1_input, x_input, z_input, xh_input, rh_input]) gate_model = Model([h_tm1_input, x_input, z_input, xh_input, rh_input], output) #print("r gate model") #gate_model.summary() return gate_model
def NN_huaweiv1(maxlen, embedding_matrix=None, class_num1=17, class_num2=12): emb_layer = Embedding( embedding_matrix.shape[0], embedding_matrix.shape[1], input_length=maxlen, weights=[embedding_matrix], trainable=False, ) seq1 = Input(shape=(maxlen, )) x1 = emb_layer(seq1) sdrop = SpatialDropout1D(rate=0.2) lstm_layer = Bidirectional(CuDNNGRU(128, return_sequences=True)) gru_layer = Bidirectional(CuDNNGRU(128, return_sequences=True)) cnn1d_layer = Conv1D(64, kernel_size=3, padding="same", kernel_initializer="he_uniform") x1 = sdrop(x1) lstm1 = lstm_layer(x1) gru1 = gru_layer(lstm1) att_1 = Attention(maxlen)(lstm1) att_2 = Attention(maxlen)(gru1) cnn1 = cnn1d_layer(lstm1) avg_pool = GlobalAveragePooling1D() max_pool = GlobalMaxPooling1D() x1 = concatenate([ att_1, att_2, Attention(maxlen)(cnn1), avg_pool(cnn1), max_pool(cnn1) ]) x = Dropout(0.2)(Activation(activation="relu")(BatchNormalization()( Dense(128)(x1)))) x = Activation(activation="relu")(BatchNormalization()(Dense(64)(x))) pred1_d = Dense(class_num1)(x) pred1 = Activation(activation='sigmoid', name='pred1')(pred1_d) y = concatenate([x1, x]) y = Activation(activation="relu")(BatchNormalization()(Dense(64)(x))) pred2_d = Dense(class_num2)(y) pred2 = Activation(activation='sigmoid', name='pred2')(pred2_d) z = Dropout(0.2)(Activation(activation="relu")(BatchNormalization()( Dense(128)(x1)))) z = concatenate([pred1_d, pred2_d, z]) pred3 = Dense(class_num1 + class_num2, activation='sigmoid', name='pred3')(z) model = Model(inputs=seq1, outputs=[pred1, pred2, pred3]) return model
def build_GRU_with_h_gate_model(seq2seq): # A new one. units = seq2seq.units h_tm1_input = Input(shape=(units, ), name="h_input") x_input = Input(shape=(units, ), name="x_input") z_input = Input(shape=(units, ), name="z_input") r_input = Input(shape=(units, ), name="r_input") x_h = Dense(units, name="wx_h")( x_input) # x_h = K.bias_add(K.dot(inputs, kernel_h), input_bias_h) r_h_tm1 = layers.Multiply()([r_input, h_tm1_input]) # r * h_tm1 recurrent_h = Dense(units, use_bias=False, name="uh_h")( r_h_tm1) # recurrent_h = K.dot(r * h_tm1, recurrent_kernel_h) hh_ = layers.Add()([x_h, recurrent_h]) hh = tanh(hh_) # hh = tanh(x_h + recurrent_h) h1 = layers.Multiply()([z_input, h_tm1_input]) h2 = layers.Multiply()([1 - z_input, hh]) h = layers.Add()([h1, h2]) # h = z * h_tm1 + (1 - z) * hh GRU_with_h_gate_model = Model([h_tm1_input, x_input, z_input, r_input], h) #print("h gate model.") #GRU_with_h_gate_model.summary() return GRU_with_h_gate_model
def face_impl(input_shape, output_size): x = Input(shape=input_shape) e = modelf(input_shape, embedding)(x) y = Dense(output_size)(e) y = Activation("softmax")(y) model = Model(x, y) model.compile("adam", "sparse_categorical_crossentropy", metrics=["accuracy"]) return model
def triplet_impl(input_shape, output_size): inputs = [Input(shape=shape) for shape in input_shape] net = modelf(input_shape[0], output_size) output = concatenate(list(map(net, inputs))) y = TripletLossLayer()(output) model = Model(inputs, y, name="triplet({})".format(net.name)) model.compile(optimizer="adam", loss=triplet_loss, metrics=[neg_minus_pos]) return model
def create_critic_network(self): # parallel 1 state_input = Input(shape = [self.obs_dim]) w1 = Dense(self.hidden_dim, activation = 'relu')(state_input) h1 = Dense(self.hidden_dim, activation = 'linear')(w1) # parallel 2 action_input = Input(shape = [self.act_dim], name = 'action2') a1 = Dense(self.hidden_dim, activation = 'linear')(action_input) # merge #h2 = concatenate([h1, a1], mode = 'sum') h2 = concatenate([h1, a1]) h3 = Dense(self.hidden_dim, activation = 'relu')(h2) value_out = Dense(self.act_dim, activation = 'linear')(h3) model = Model(inputs = [state_input, action_input], outputs = [value_out]) adam = Adam(self.lr) model.compile(loss = 'mse', optimizer = adam) return model, action_input, state_input
def u_dense_net(input_shape, num_db, num_channels=64, growth_rate=32, convs_per_db=3): assert len( input_shape ) == 3, f"Input shape must have 3 dimension! Received '{input_shape}'!" assert (num_db > 1) and ( num_db % 2 == 1 ), f"Number of DenseBlocks must be an odd number more than 1! Received '{num_db}'!" # In a U-shaped DenseNet with N DenseBlocks, each side has floor(N/2) DenseBlocks num_trans_down = num_trans_up = num_db // 2 assert (input_shape[0] % (2**num_trans_down) == 0) and ( input_shape[1] % (2**num_trans_down) == 0 ), f"Dimension of the input shape {input_shape[:2]} must be a multiple of {2**num_trans_down} to preserve the tensor shape after down-scaling and up-scaling" assert (num_channels > 0) and ( num_channels % 2 == 0 ), f"Number of channels for TransitionBlock must be an even number more than 0! Received '{num_channels}'!" _num_channels = num_channels img_in = Input(dtype="float32", shape=input_shape, name="image_input") x = Conv2D(_num_channels, kernel_size=(5, 5), activation="relu", padding="same")(img_in) ############################### Transition down section ############################### db_outputs = [] for i in range(num_trans_down): x = DenseBlock(num_layers=convs_per_db, filters=growth_rate)(x) db_outputs.insert(0, x) num_channels += growth_rate * i num_channels //= 2 x = TransitionBlock(filters=num_channels, trans_down=True)(x) #################################### Mid DenseBlock ################################### x = DenseBlock(num_layers=convs_per_db, filters=growth_rate)(x) ################################ Transition up section ################################ for i in range(num_trans_up): num_channels += growth_rate * (i + 1) num_channels //= 2 x = TransitionBlock(filters=num_channels, trans_down=False)(x) x = Concatenate(axis=-1)([x, db_outputs[i]]) x = DenseBlock(num_layers=convs_per_db, filters=growth_rate)(x) img_out = Conv2D(1, kernel_size=(5, 5), activation="sigmoid", padding="same", name="image_output")(x) model = Model(inputs=[img_in], outputs=[img_out], name="DenseNet") return model
def create_actor(self): obs_in = Input(shape = [self.obs_dim]) # 3 states # pdb.set_trace() h1 = Dense(self.hidden_dim, activation = 'relu')(obs_in) h2 = Dense(self.hidden_dim, activation = 'relu')(h1) h3 = Dense(self.hidden_dim, activation = 'relu')(h2) out = Dense(self.act_dim, activation='tanh')(h3) model = Model(inputs = obs_in, outputs = out) # no loss function for actor apparently return model, model.trainable_weights, obs_in
def build_model(hidden_size): inputs = Input(shape=(28, 28)) x1 = Flatten()(inputs) x2 = Dense(hidden_size, activation=tf.nn.relu)(x1) x3 = Dropout(0.2)(x2) x4 = Dense(10, activation=tf.nn.softmax)(x3) model = Model(inputs=inputs, outputs=x4) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) # Train and fit model model.fit(x_train, y_train, epochs=5) [loss, acc] = model.evaluate(x_test, y_test) return [model, acc]
def NN_huaweiv1(maxlen, embedding_matrix=None, class_num1=17, class_num2=12): emb_layer = Embedding( embedding_matrix.shape[0], embedding_matrix.shape[1], input_length=maxlen, weights=[embedding_matrix], trainable=False, ) seq1 = Input(shape=(maxlen, )) emb = emb_layer(seq1) sdrop = SpatialDropout1D(rate=0.2) lstm_layer = Bidirectional(CuDNNGRU(128, return_sequences=True)) gru_layer = Bidirectional(CuDNNGRU(128, return_sequences=True)) cnn1d_layer = Conv1D(64, kernel_size=3, padding="same", kernel_initializer="he_uniform") sd = sdrop(emb) lstm1 = lstm_layer(sd) gru1 = gru_layer(lstm1) cnn1 = cnn1d_layer(gru1) gru1 = concatenate([lstm1, gru1, cnn1]) att_1 = Attention(maxlen)(gru1) att_2 = Attention(maxlen)(gru1) att_3 = Attention(maxlen)(gru1) att_4 = Attention(maxlen)(gru1) x1 = Activation(activation="relu")(BatchNormalization()(Dense(128)(att_1))) x2 = Activation(activation="relu")(BatchNormalization()(Dense(128)(att_2))) x3 = Activation(activation="relu")(BatchNormalization()(Dense(128)(att_3))) x4 = Activation(activation="relu")(BatchNormalization()(Dense(128)(att_4))) pred1_1 = Dense(class_num1 - 10, activation='sigmoid')(x1) pred1_2 = Dense(10, activation='sigmoid')(x2) pred1 = concatenate([pred1_1, pred1_2], axis=-1, name='pred1') pred2_1 = Dense(class_num2 - 9, activation='sigmoid')(x3) pred2_2 = Dense(9, activation='sigmoid')(x4) pred2 = concatenate( [pred2_1, pred2_2], axis=-1, name='pred2' ) # Dense(class_num2, activation='sigmoid',name='pred2')(y) model = Model(inputs=seq1, outputs=[pred1, pred2]) return model
def get_encoder_model (cfg): encoder_inputs = Input(shape=(cfg.len_input_seq,), name='Encoder-Input') x = keras.layers.Embedding(cfg.num_input_tokens, cfg.latent_dim, name='Encoder-Embedding', mask_zero=False) (encoder_inputs) x = keras.layers.BatchNormalization(name='Encoder-Batchnorm-1')(x) _, state_h = keras.layers.GRU(cfg.latent_dim, return_state=True,\ name='Encoder-Last-GRU')(x) encoder_model = keras.models.Model(inputs=encoder_inputs, outputs=state_h, name='Encoder-Model') encoder_outputs = encoder_model(encoder_inputs) return encoder_model, encoder_inputs, encoder_outputs
def build_fully_conv(obs_spec, act_spec, data_format='channels_first', broadcast_non_spatial=False, fc_dim=256): screen, screen_input = spatial_block('screen', obs_spec.spaces[0], conv_cfg(data_format, 'relu')) minimap, minimap_input = spatial_block('minimap', obs_spec.spaces[1], conv_cfg(data_format, 'relu')) non_spatial_inputs = [Input(s.shape) for s in obs_spec.spaces[2:]] if broadcast_non_spatial: non_spatial, spatial_dim = non_spatial_inputs[1], obs_spec.spaces[ 0].shape[1] non_spatial = tf.log(non_spatial + 1e-5) broadcasted_non_spatial = Broadcast2D(spatial_dim)(non_spatial) state = tf.concat([screen, minimap, broadcasted_non_spatial], axis=1) else: state = tf.concat([screen, minimap], axis=1) fc = Flatten(name="state_flat")(state) fc = Dense(fc_dim, **dense_cfg('relu'))(fc) value = Dense(1, name="value_out", **dense_cfg(scale=0.1))(fc) value = tf.squeeze(value, axis=-1) logits = [] for space in act_spec: if space.is_spatial(): logits.append( Conv2D(1, 1, **conv_cfg(data_format, scale=0.1))(state)) logits[-1] = Flatten()(logits[-1]) else: logits.append(Dense(space.size(), **dense_cfg(scale=0.1))(fc)) mask_actions = Lambda(lambda x: tf.where(non_spatial_inputs[0] > 0, x, -1000 * tf.ones_like(x)), name="mask_unavailable_action_ids") logits[0] = mask_actions(logits[0]) return Model(inputs=[screen_input, minimap_input] + non_spatial_inputs, outputs=logits + [value])
def spatial_block(name, space, cfg): inpt = Input(space.shape, name=name + '_input') block = tf.split(inpt, space.shape[0], axis=1) for i, (name, dim) in enumerate(zip(space.spatial_feats, space.spatial_dims)): if dim > 1: block[i] = tf.squeeze(block[i], axis=1) # Embedding dim 10 as per https://arxiv.org/pdf/1806.01830.pdf block[i] = Embedding(input_dim=dim, output_dim=10)(block[i]) # [N, H, W, C] -> [N, C, H, W] block[i] = tf.transpose(block[i], perm=[0, 3, 1, 2]) else: block[i] = tf.log(block[i] + 1e-5) block = tf.concat(block, axis=1) block = Conv2D(16, 5, **cfg)(block) block = Conv2D(32, 3, **cfg)(block) return block, inpt
def build_cnn_nature(obs_spec, act_spec, data_format='channels_first', value_separate=False, obs_shift=False, obs_scale=False): conv_cfg = dict(padding='same', data_format=data_format, activation='relu') conv_spec = [(32, 8, 4), (64, 4, 2), (64, 3, 1)] inputs = [Input(s.shape, name="input_" + s.name) for s in obs_spec] inputs_concat = Concatenate()(inputs) if len(inputs) > 1 else inputs[0] # expected NxCxHxW, but got NxHxWxC if data_format == 'channels_first' and inputs_concat.shape[1] > 3: inputs_concat = Transpose([0, 3, 1, 2])(inputs_concat) inputs_scaled = Rescale(1. / 255)(inputs_concat) if obs_shift or obs_scale: inputs_scaled = RunningStatsNorm(obs_shift, obs_scale)(inputs_scaled) x = build_cnn(inputs_scaled, conv_spec, conv_cfg, dense=512, prefix='policy_') outputs = [Dense(s.size(), name="logits_" + s.name)(x) for s in act_spec] if value_separate: x = build_cnn(inputs_scaled, conv_spec, conv_cfg, dense=512, prefix='value_') value = Dense(1, name="value_out")(x) value = Squeeze(axis=-1)(value) outputs.append(value) return Model(inputs=inputs, outputs=outputs)
def cnn_keras(max_features, maxlen, dropout_rate, embed_dim, num_filters=300, classes=4, lr=0.001): if K.backend == 'tensorflow': K.clear_session() input_layer = Input(shape=(maxlen, )) embedding_layer = Embedding(max_features, output_dim=embed_dim, trainable=True)(input_layer) x = SpatialDropout1D(dropout_rate)(embedding_layer) x = Conv1D(num_filters, 7, activation='relu', padding='same')(x) x = GlobalMaxPooling1D()(x) output_layer = Dense(classes, activation="sigmoid")(x) model = Model(inputs=input_layer, outputs=output_layer) model.compile(loss='categorical_crossentropy', optimizer=RMSprop(learning_rate=lr, clipvalue=1, clipnorm=1), metrics=['acc']) return model
def build_rnn(n_lstm_layers, lstm_layer_size, n_hiden_layers, hidden_layer_size, optimizer='adam', input_shape=None): ''' function to build the RNN architecture ''' # intialize a classifier classifier = Sequential() # input layer classifier.add(Input(shape=input_shape)) # lstm layers for n in range(n_lstm_layers): classifier.add(CuDNNLSTM(units=lstm_layer_size, return_sequences=True)) # flatten array to 1d vector classifier.add(Flatten()) # hidden layers for n in range(n_hiden_layers): classifier.add( Dense(units=hidden_layer_size, kernel_initializer='uniform', activation='relu')) # output layer classifier.add( Dense(units=1, kernel_initializer='uniform', activation='sigmoid')) # compile model classifier.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy']) return classifier
def seq2seq(src_max_len, tgt_max_len, src_token_size, tgt_token_size, latent_dim=128, teacher_forcing_ratio=0.5): rd = RandomUniform(minval=-0.08, maxval=0.08, seed=None) encoder_inputs = Input(shape=(None, ), name='encoder_inputs') print('(Build model) encoder_inputs =', encoder_inputs.shape) encoder_embedding = Embedding(src_token_size, latent_dim, embeddings_initializer=rd, input_length=None, mask_zero=True, name='encoder_emb')(encoder_inputs) print('(Build model) encoder_embedding =', encoder_embedding.shape) encoder_time, encoder_state_h = GRU(latent_dim, kernel_initializer=rd, bias_initializer=rd, return_state=True, return_sequences=True, name='forward')(encoder_embedding) print("(Build model) encoder_state_h =", encoder_state_h.shape) encoder_model = Model(encoder_inputs, [encoder_state_h, encoder_time]) decoder_inputs = Input(shape=(None, ), name='decoder_inputs') print('(Build model) decoder_inputs =', decoder_inputs.shape) decoder_embedding = Embedding(tgt_token_size, latent_dim, embeddings_initializer=rd, input_length=None, name='decoder_emb') decoder_gru = GRU(latent_dim, kernel_initializer=rd, bias_initializer=rd, return_sequences=True, return_state=True, name='decoder_gru') decoder_dense = Dense(tgt_token_size, kernel_initializer=rd, bias_initializer=rd, activation='softmax', name='output_dense') inputs = Lambda(slice, arguments={'h1': 0})(decoder_inputs) softmax_state = [] teacher_forcing = Input(shape=(None, ), ) decoder_state_h = encoder_state_h # Run decoder on each timestep. for i in range(tgt_max_len): inputs_embed = decoder_embedding(inputs) decoder_outputs_time, state_h = decoder_gru( inputs_embed, initial_state=decoder_state_h) softmax = decoder_dense(decoder_outputs_time) outputs = Lambda(lambda x: K.argmax(x))(softmax) outputs = Lambda(lambda x: K.cast(outputs, 'float32'))(outputs) decoder_inputs_time = Lambda(slice, arguments={'h1': i + 1})(decoder_inputs) inputs = Lambda(where, arguments={'ratio': teacher_forcing_ratio})( [teacher_forcing, decoder_inputs_time, outputs]) # inputs = Lambda(where, arguments={'ratio': 0.5})([teacher_forcing, outputs, decoder_inputs_time]) decoder_state_h = state_h softmax_state += [softmax] decoder_outputs = Lambda(lambda x: K.concatenate(x, axis=1))(softmax_state) # Define the model that will turn "encoder_input_data" & "decoder_input_data" into "decoder_target_data". model = Model([encoder_inputs, decoder_inputs, teacher_forcing], decoder_outputs) # model = Model([encoder_inputs, decoder_inputs], decoder_outputs) # model.summary() decoder_state_input_h = Input(shape=(latent_dim, ), name='decoder_input_h') decoder_outputs, state_h = decoder_gru(decoder_embedding(decoder_inputs), initial_state=decoder_state_input_h) print('(Build model) decoder_outputs =', decoder_outputs) decoder_outputs = decoder_dense(decoder_outputs) print('(Build model) decoder_outputs =', decoder_outputs) decoder_model = Model([decoder_inputs] + [decoder_state_input_h], [decoder_outputs] + [state_h]) encoder_model.summary() decoder_model.summary() return model, encoder_model, decoder_model