def __init__(self, inputs, arch, siam_reg, main_path, y_true): self.orig_inputs = inputs # set up inputs self.inputs = { 'A': inputs['Unlabeled'], 'B': Input(shape=inputs['Unlabeled'].get_shape().as_list()[1:]), 'Labeled': inputs['Labeled'], } self.main_path = os.path.join(main_path, 'siemese/') self.y_true = y_true # generate layers self.layers = [] self.layers += util.make_layer_list(arch, 'siamese', siam_reg) # create the siamese net self.outputs = stack_layers(self.inputs, self.layers) # add the distance layer self.distance = Lambda(affinities.euclidean_distance, output_shape=affinities.eucl_dist_output_shape)( [self.outputs['A'], self.outputs['B']]) # create the distance model for training self.net = Model([self.inputs['A'], self.inputs['B']], self.distance) # compile the siamese network self.net.compile(loss=affinities.get_contrastive_loss(m_neg=1, m_pos=0.05), optimizer='rmsprop')
def DNNclassifier_crps(self, p, num_cut, optimizer, seeding): tf.set_random_seed(seeding) inputs = Input(shape=(p,)) if isinstance(optimizer, str): opt = optimizer else: opt_name = optimizer.__class__.__name__ opt_config = optimizer.get_config() opt_class = getattr(optimizers, opt_name) opt = opt_class(**opt_config) for i, n_neuron in enumerate(self.hidden_list): if i == 0: net = Dense(n_neuron, kernel_initializer = 'he_uniform')(inputs) else: net = Dense(n_neuron, kernel_initializer = 'he_uniform')(net) net = Activation(activation = 'elu')(net) net = BatchNormalization()(net) net = Dropout(rate=self.dropout_list[i])(net) softmaxlayer = Dense(num_cut + 1, activation='softmax', kernel_initializer = 'he_uniform')(net) output = Lambda(self.tf_cumsum)(softmaxlayer) model = Model(inputs = [inputs], outputs=[output]) model.compile(optimizer=opt, loss=self.crps_loss) return model
def gru_keras(max_features, maxlen, bidirectional, dropout_rate, embed_dim, rec_units, mtype='GRU', reduction=None, classes=4, lr=0.001): if K.backend == 'tensorflow': K.clear_session() input_layer = Input(shape=(maxlen, )) embedding_layer = Embedding(max_features, output_dim=embed_dim, trainable=True)(input_layer) x = SpatialDropout1D(dropout_rate)(embedding_layer) if reduction: if mtype == 'GRU': if bidirectional: x = Bidirectional( CuDNNGRU(units=rec_units, return_sequences=True))(x) else: x = CuDNNGRU(units=rec_units, return_sequences=True)(x) elif mtype == 'LSTM': if bidirectional: x = Bidirectional( CuDNNLSTM(units=rec_units, return_sequences=True))(x) else: x = CuDNNLSTM(units=rec_units, return_sequences=True)(x) if reduction == 'average': x = GlobalAveragePooling1D()(x) elif reduction == 'maximum': x = GlobalMaxPool1D()(x) else: if mtype == 'GRU': if bidirectional: x = Bidirectional( CuDNNGRU(units=rec_units, return_sequences=False))(x) else: x = CuDNNGRU(units=rec_units, return_sequences=False)(x) elif mtype == 'LSTM': if bidirectional: x = Bidirectional( CuDNNLSTM(units=rec_units, return_sequences=False))(x) else: x = CuDNNLSTM(units=rec_units, return_sequences=False)(x) output_layer = Dense(classes, activation="sigmoid")(x) model = Model(inputs=input_layer, outputs=output_layer) model.compile(loss='categorical_crossentropy', optimizer=RMSprop(learning_rate=lr, clipvalue=1, clipnorm=1), metrics=['acc']) return model
def create_model(self): base_model = Xception(weights=None, include_top=False, input_shape=(IM_HEIGHT, IM_WIDTH, 3)) x = base_model.output x = GlobalAveragePooling2D()(x) predictions = Dense(3, activation="linear")(x) model = Model(inputs=base_model.input, outputs=predictions) model.compile(loss="mse", optimizer=Adam(lr=0.001), metrics=["accuracy"]) # model.enable_eager_execution() return model
def ensure_softmax_output(model): """ Adds a softmax layer on top of the logits layer, in case the output layer is a logits layer. Parameters ---------- model : Keras Model The original model Returns ------- new_model : Keras Model The modified model """ if 'softmax' not in model.output_names: if 'logits' in model.output_names: output = Activation('softmax', name='softmax')(model.output) new_model = Model(inputs=model.input, outputs=output) else: raise ValueError('The output layer is neither softmax nor logits') else: new_model = model return new_model
def __init__(self, num_steps): """Passes frames through base CNNs and return feature. Args: num_steps: int, Number of steps being passed through CNN. Raises: ValueError: if invalid network config is passed. """ super(BaseModel, self).__init__() layer = CONFIG.MODEL.BASE_MODEL.LAYER network = CONFIG.MODEL.BASE_MODEL.NETWORK local_ckpt = get_pretrained_ckpt(network) if network in ['Resnet50', 'Resnet50_pretrained']: base_model = resnet_v2.ResNet50V2(include_top=False, weights=local_ckpt, pooling='max', backend=tf.keras.backend, layers=tf.keras.layers, models=tf.keras.models, utils=tf.keras.utils) elif CONFIG.model.base_model.network == 'VGGM': base_model = vggm_net(CONFIG.IMAGE_SIZE) else: raise ValueError('%s not supported.' % CONFIG.MODEL.BASE_MODEL.NETWORK) self.base_model = Model(inputs=base_model.input, outputs=base_model.get_layer(layer).output) self.num_steps = num_steps
def build_GRU_with_z_gate_model(seq2seq, weight_array): h_tm1_input = Input(shape=(seq2seq.units, ), name="h_input") x_input = Input(shape=(seq2seq.units, ), name="x_input") r_input = Input(shape=(seq2seq.units, ), name="r_input") hh_input = Input(shape=(seq2seq.units, ), name="hh_input") def gru_with_z_gate(x, weight): h_tm1, inputs, r, hh = x[0], x[1], x[2], x[3] weight = K.variable(weight) units = h_tm1.shape[-1] kernel_z = weight[:units, :units] recurrent_kernel_z = weight[units:units * 2, :units] input_bias_z = weight[units * 2, :units] # Change to 1 dim. x_z = K.bias_add(K.dot(inputs, kernel_z), input_bias_z) recurrent_z = K.dot(h_tm1, recurrent_kernel_z) z_without_activate = x_z + recurrent_z z = hard_sigmoid(z_without_activate) h = z * h_tm1 + (1 - z) * hh #return h return z output = Lambda(gru_with_z_gate, arguments={"weight": weight_array })([h_tm1_input, x_input, r_input, hh_input]) #h = layers.Add()([h_tm1_input, x_input]) gate_model = Model([h_tm1_input, x_input, r_input, hh_input], output) #print("z gate model.") #gate_model.summary() return gate_model
def build_decoder_model_without_argmax(seq2seq, input_t, output_t): # Remove all initializer. input_state = Input(shape=(seq2seq.units, ), name="decoder_state") decoder_inputs = Input(shape=(None, ), name="decoder_input") decoder_embedding = Embedding(seq2seq.tgt_token_size, seq2seq.units, input_length=None, name="decoder_emb") decoder_gru = GRU(seq2seq.units, return_sequences=True, return_state=True, name="decoder_gru") decoder_dense = Dense(seq2seq.tgt_token_size, activation="softmax", name="output_dense") state = input_state for t in range(input_t, output_t): inputs = Lambda(slice, arguments={"index": t})( decoder_inputs) # Count encoder output as time 0. inputs_embedding = decoder_embedding(inputs) decoder_outputs_time, state = decoder_gru(inputs_embedding, initial_state=state) if input_t == output_t: decoder_outputs_time = Lambda(lambda x: K.expand_dims(x, axis=1))( state) softmax = decoder_dense(decoder_outputs_time) decoder_model = Model([decoder_inputs, input_state], [softmax] + [state]) return decoder_model
def vggm_net(image_size): """VGG-M: VGGM-esque (not exactly VGGM) small network.""" use_bn = CONFIG.model.vggm.use_bn x = layers.Input(shape=(image_size, image_size, 3), dtype='float32') inputs = x x = layers.ZeroPadding2D(padding=(3, 3))(x) x = layers.Conv2D(64, (7, 7), strides=(2, 2), padding='valid', kernel_initializer='he_normal', name='conv1')(x) if use_bn: x = layers.BatchNormalization()(x) x = layers.Activation('relu')(x) x = layers.ZeroPadding2D(padding=(1, 1))(x) x = layers.MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding='valid')(x) conv_blocks = [(128, 3, 2), (256, 3, 2), (512, 3, 0)] for i, conv_block in enumerate(conv_blocks): channels, filter_size, max_pool_size = conv_block x = get_vggm_conv_block(x, 2 * [(channels, filter_size)], use_bn, max_pool_size, name='conv%s' % (i + 2)) model = Model(inputs=inputs, outputs=x) return model
def build_network(num_actions, agent_history_length, resized_width, resized_height): with tf.device("/gpu:0"): state = tf.placeholder( "float", [None, agent_history_length, resized_width, resized_height]) inputs = Input(shape=( agent_history_length, resized_width, resized_height, )) model = Convolution2D(filters=16, kernel_size=(8, 8), strides=(4, 4), activation='relu', padding='same')(inputs) model = Convolution2D(filters=32, kernel_size=(4, 4), strides=(2, 2), activation='relu', padding='same')(model) model = Flatten()(model) model = Dense(256, activation='relu')(model) q_values = Dense(num_actions, activation='linear')(model) m = Model(inputs, outputs=q_values) return state, m
def multi_gpu_model(model, gpus): if isinstance(gpus, (list, tuple)): num_gpus = len(gpus) target_gpu_ids = gpus else: num_gpus = gpus target_gpu_ids = range(num_gpus) def get_slice(data, i, parts): shape = tf.shape(data) batch_size = shape[:1] input_shape = shape[1:] step = batch_size // parts if i == num_gpus - 1: size = batch_size - step * i else: size = step size = tf.concat([size, input_shape], axis=0) stride = tf.concat([step, input_shape * 0], axis=0) start = stride * i return tf.slice(data, start, size) all_outputs = [] for i in range(len(model.outputs)): all_outputs.append([]) # Place a copy of the model on each GPU, # each getting a slice of the inputs. for i, gpu_id in enumerate(target_gpu_ids): with tf.device('/gpu:%d' % gpu_id): with tf.name_scope('replica_%d' % gpu_id): inputs = [] # Retrieve a slice of the input. for x in model.inputs: input_shape = tuple(x.get_shape().as_list())[1:] slice_i = Lambda(get_slice, output_shape=input_shape, arguments={ 'i': i, 'parts': num_gpus })(x) inputs.append(slice_i) # Apply model on slice # (creating a model replica on the target device). outputs = model(inputs) if not isinstance(outputs, list): outputs = [outputs] # Save the outputs for merging back together later. for o in range(len(outputs)): all_outputs[o].append(outputs[o]) # Merge outputs on CPU. with tf.device('/cpu:0'): merged = [] for name, outputs in zip(model.output_names, all_outputs): merged.append(concatenate(outputs, axis=0, name=name)) return Model(model.inputs, merged)
def create_actor_model(self): state_input = Input(shape=6) h1 = Dense(400, activation='relu')(state_input) h2 = Dense(300, activation='relu')(h1) output = Dense(1, activation='tanh')(h2) model = Model(inputs=state_input, outputs=output) return model
def insert_layer_old(model, new_layer, prev_layer_name): x = model.input for layer in model.layers: x = layer(x) if layer.name == prev_layer_name: x = new_layer(x) new_model = Model(inputs=model.input, outputs=x)
def network(self): """ Assemble Critic network to predict q-values """ state = Input((self.env_dim)) x = Dense(32, activation='elu')(state) x = Dense(16, activation='elu')(x) out = Dense(1, activation='linear', kernel_initializer=RandomUniform())(x) return Model(state, out)
def cnn_keras(max_features, maxlen, dropout_rate, embed_dim, num_filters=300, classes=4, lr=0.001): if K.backend == 'tensorflow': K.clear_session() input_layer = Input(shape=(maxlen, )) embedding_layer = Embedding(max_features, output_dim=embed_dim, trainable=True)(input_layer) x = SpatialDropout1D(dropout_rate)(embedding_layer) x = Conv1D(num_filters, 7, activation='relu', padding='same')(x) x = GlobalMaxPooling1D()(x) output_layer = Dense(classes, activation="sigmoid")(x) model = Model(inputs=input_layer, outputs=output_layer) model.compile(loss='categorical_crossentropy', optimizer=RMSprop(learning_rate=lr, clipvalue=1, clipnorm=1), metrics=['acc']) return model
def create_critic_network(self): # parallel 1 state_input = Input(shape = [self.obs_dim]) w1 = Dense(self.hidden_dim, activation = 'relu')(state_input) h1 = Dense(self.hidden_dim, activation = 'linear')(w1) # parallel 2 action_input = Input(shape = [self.act_dim], name = 'action2') a1 = Dense(self.hidden_dim, activation = 'linear')(action_input) # merge #h2 = concatenate([h1, a1], mode = 'sum') h2 = concatenate([h1, a1]) h3 = Dense(self.hidden_dim, activation = 'relu')(h2) value_out = Dense(self.act_dim, activation = 'linear')(h3) model = Model(inputs = [state_input, action_input], outputs = [value_out]) adam = Adam(self.lr) model.compile(loss = 'mse', optimizer = adam) return model, action_input, state_input
def ablate_activations(model, layer_regex, pct_ablation, seed): # Auxiliary dictionary to describe the network graph network_dict = {'input_layers_of': {}, 'new_output_tensor_of': {}} # Set the input layers of each layer for layer in model.layers: for node in layer._outbound_nodes: layer_name = node.outbound_layer.name if layer_name not in network_dict['input_layers_of']: network_dict['input_layers_of'].update( {layer_name: [layer.name]}) else: network_dict['input_layers_of'][layer_name].append(layer.name) # Set the output tensor of the input layer network_dict['new_output_tensor_of'].update( {model.layers[0].name: model.input}) # Iterate over all layers after the input for layer in model.layers[1:]: # Determine input tensors layer_input = [ network_dict['new_output_tensor_of'][layer_aux] for layer_aux in network_dict['input_layers_of'][layer.name] ] if len(layer_input) == 1: layer_input = layer_input[0] x = layer(layer_input) # Insert layer if name matches the regular expression if re.match(layer_regex, layer.name): ablation_layer = Dropout( rate=pct_ablation, noise_shape=(None, ) + tuple(np.repeat(1, len(layer.output_shape) - 2)) + (layer.output_shape[-1], ), seed=seed, name='{}_dropout'.format(layer.name)) x = ablation_layer(x, training=True) if seed: seed += 1 # Set new output tensor (the original one, or the one of the inserted # layer) network_dict['new_output_tensor_of'].update({layer.name: x}) return Model(inputs=model.inputs, outputs=x)
def create_actor(self): obs_in = Input(shape = [self.obs_dim]) # 3 states # pdb.set_trace() h1 = Dense(self.hidden_dim, activation = 'relu')(obs_in) h2 = Dense(self.hidden_dim, activation = 'relu')(h1) h3 = Dense(self.hidden_dim, activation = 'relu')(h2) out = Dense(self.act_dim, activation='tanh')(h3) model = Model(inputs = obs_in, outputs = out) # no loss function for actor apparently return model, model.trainable_weights, obs_in
def build_model(hidden_size): inputs = Input(shape=(28, 28)) x1 = Flatten()(inputs) x2 = Dense(hidden_size, activation=tf.nn.relu)(x1) x3 = Dropout(0.2)(x2) x4 = Dense(10, activation=tf.nn.softmax)(x3) model = Model(inputs=inputs, outputs=x4) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) # Train and fit model model.fit(x_train, y_train, epochs=5) [loss, acc] = model.evaluate(x_test, y_test) return [model, acc]
def build_GRU_with_r_gate_model(seq2seq, weight_array): h_tm1_input = Input(shape=(seq2seq.units, ), name="h_input") x_input = Input(shape=(seq2seq.units, ), name="x_input") z_input = Input(shape=(seq2seq.units, ), name="z_input") xh_input = Input( shape=(seq2seq.units, ), name="xh_input" ) # x_h = K.bias_add(K.dot(inputs, kernel_h), input_bias_h) rh_input = Input( shape=(seq2seq.units, seq2seq.units), name="rh_input" ) # split_recurrent_h = K.dot(h_tm1.transpose(), recurrent_kernel_h) def gru_with_r_gate(x, weight): h_tm1, inputs, z, x_h, split_recurrent_h = x[0], x[1], x[2], x[3], x[4] weight = K.variable(weight) units = h_tm1.shape[-1] kernel_r = weight[:units, units:units * 2] recurrent_kernel_r = weight[units:units * 2, units:units * 2] input_bias_r = weight[units * 2, units:units * 2] # Change to 1 dim. x_r = K.bias_add(K.dot(inputs, kernel_r), input_bias_r) recurrent_r = K.dot(h_tm1, recurrent_kernel_r) r_without_activate = x_r + recurrent_r r = hard_sigmoid(r_without_activate) #r = hard_sigmoid(x_r + recurrent_r) # Recompute recurrent_h by two parts. r_unsqueeze = K.expand_dims(r, axis=-1) recompute_recurrent_h = K.sum(r_unsqueeze * split_recurrent_h, axis=1) hh = tanh(x_h + recompute_recurrent_h) h = z * h_tm1 + (1 - z) * hh #return h return r output = Lambda(gru_with_r_gate, arguments={"weight": weight_array})( [h_tm1_input, x_input, z_input, xh_input, rh_input]) gate_model = Model([h_tm1_input, x_input, z_input, xh_input, rh_input], output) #print("r gate model") #gate_model.summary() return gate_model
def build_GRU_with_h_gate_model(seq2seq): # A new one. units = seq2seq.units h_tm1_input = Input(shape=(units, ), name="h_input") x_input = Input(shape=(units, ), name="x_input") z_input = Input(shape=(units, ), name="z_input") r_input = Input(shape=(units, ), name="r_input") x_h = Dense(units, name="wx_h")( x_input) # x_h = K.bias_add(K.dot(inputs, kernel_h), input_bias_h) r_h_tm1 = layers.Multiply()([r_input, h_tm1_input]) # r * h_tm1 recurrent_h = Dense(units, use_bias=False, name="uh_h")( r_h_tm1) # recurrent_h = K.dot(r * h_tm1, recurrent_kernel_h) hh_ = layers.Add()([x_h, recurrent_h]) hh = tanh(hh_) # hh = tanh(x_h + recurrent_h) h1 = layers.Multiply()([z_input, h_tm1_input]) h2 = layers.Multiply()([1 - z_input, hh]) h = layers.Add()([h1, h2]) # h = z * h_tm1 + (1 - z) * hh GRU_with_h_gate_model = Model([h_tm1_input, x_input, z_input, r_input], h) #print("h gate model.") #GRU_with_h_gate_model.summary() return GRU_with_h_gate_model
def keras_build_fn(num_feature, num_output, is_sparse, embedding_dim=-1, num_hidden_layer=2, hidden_layer_dim=512, activation='elu', learning_rate=1e-3, dropout=0.5, l1=0.0, l2=0.0, loss='categorical_crossentropy'): """Initializes and compiles a Keras DNN model using the Adam optimizer. Args: num_feature: number of features num_output: number of outputs (targets, e.g., classes)) is_sparse: boolean whether input data is in sparse format embedding_dim: int number of nodes in embedding layer; if value is <= 0 then no embedding layer will be present in the model num_hidden_layer: number of hidden layers hidden_layer_dim: int number of nodes in the hidden layer(s) activation: string activation function for hidden layers; see https://keras.io/activations/ learning_rate: float learning rate for Adam dropout: float proportion of nodes to dropout; values in [0, 1] l1: float strength of L1 regularization on weights l2: float strength of L2 regularization on weights loss: string loss function; see https://keras.io/losses/ Returns: model: Keras.models.Model compiled Keras model """ assert num_hidden_layer >= 1 inputs = Input(shape=(num_feature, ), sparse=is_sparse) activation_func_args = () if activation.lower() == 'prelu': activation_func = PReLU elif activation.lower() == 'leakyrelu': activation_func = LeakyReLU elif activation.lower() == 'elu': activation_func = ELU elif activation.lower() == 'thresholdedrelu': activation_func = ThresholdedReLU else: activation_func = Activation activation_func_args = (activation) if l1 > 0 and l2 > 0: reg_init = lambda: regularizers.l1_l2(l1, l2) elif l1 > 0: reg_init = lambda: regularizers.l1(l1) elif l2 > 0: reg_init = lambda: regularizers.l2(l2) else: reg_init = lambda: None if embedding_dim > 0: # embedding layer e = Dense(embedding_dim)(inputs) x = Dense(hidden_layer_dim, kernel_regularizer=reg_init())(e) x = activation_func(*activation_func_args)(x) x = Dropout(dropout)(x) else: x = Dense(hidden_layer_dim, kernel_regularizer=reg_init())(inputs) x = activation_func(*activation_func_args)(x) x = Dropout(dropout)(x) # add additional hidden layers for _ in range(num_hidden_layer - 1): x = Dense(hidden_layer_dim, kernel_regularizer=reg_init())(x) x = activation_func(*activation_func_args)(x) x = Dropout(dropout)(x) x = Dense(num_output)(x) preds = Activation('softmax')(x) model = Model(inputs=inputs, outputs=preds) model.compile(optimizer=Adam(lr=learning_rate), loss=loss) return model
def get_params(self): params = [] for layer in self.layers: params += layer.get_params() if __name__ == '__main__': # you can also set weights to None, it doesn't matter resnet_ = ResNet50(weights='imagenet') # make a new resnet without the softmax x = resnet_.layers[-2].output W, b = resnet_.layers[-1].get_weights() y = Dense(1000)(x) resnet = Model(resnet_.input, y) resnet.layers[-1].set_weights([W, b]) # you can determine the correct layer # by looking at resnet.layers in the console partial_model = Model(inputs=resnet.input, outputs=resnet.layers[175].output) # maybe useful when building your model # to look at the layers you're trying to copy print(partial_model.summary()) # create an instance of our own model my_partial_resnet = TFResNet() # make a fake image
# Depthwise Convolution x = DepthwiseConv2D((3, 3), padding='same')(input) x = BatchNormalization()(x) x = ReLU()(x) # Pointwise Convolution x = Conv2D(128, (1, 1))(x) x = BatchNormalization()(x) x = ReLU()(x) return x # %% ''' ##### 建構模型 ''' # %% input = Input((64, 64, 3)) output = SeparableConv(input) model = Model(inputs=input, outputs=output) model.summary() # %% ''' 更多相關連接參考: https://github.com/keras-team/keras-applications/blob/master/keras_applications/mobilenet.py#L364 ''' # %%
def __get_model__(self): self.enc_inp = Input(shape=(self.cfg.input_seq_len(), ), name="Encoder-Input") embd = Embedding(self.cfg.num_input_tokens(), self.cfg.latent_dim(), name='Encoder-Embedding', mask_zero=False) embd_outp = embd(self.enc_inp) x = BatchNormalization(name='Encoder-Batchnorm-1')(embd_outp) _, state_h = GRU(self.cfg.latent_dim(), return_state=True, name='Encoder-Last-GRU')(x) self.enc_model = Model(inputs=self.enc_inp, outputs=state_h, name='Encoder-Model') self.enc_outp = self.enc_model(self.enc_inp) self.cfg.logger.info("********** Encoder Model summary **************") self.cfg.logger.info(self.enc_model.summary()) # get the decoder self.dec_inp = Input(shape=(None, ), name='Decoder-Input') dec_emb = Embedding(self.cfg.num_output_tokens(), self.cfg.latent_dim(), name='Decoder-Embedding', mask_zero=False)(self.dec_inp) dec_bn = BatchNormalization(name='Decoder-Batchnorm-1')(dec_emb) decoder_gru = GRU(self.cfg.latent_dim(), return_state=True, return_sequences=True, name='Decoder-GRU') decoder_gru_output, _ = decoder_gru(dec_bn, initial_state=self.enc_outp) x = BatchNormalization(name='Decoder-Batchnorm-2')(decoder_gru_output) dec_dense = Dense(self.cfg.num_output_tokens(), activation='softmax', name='Final-Output-Dense') self.dec_outp = dec_dense(x) model_inp = [self.enc_inp, self.dec_inp] self.model = Model(model_inp, self.dec_outp) self.cfg.logger.info("********** Full Model summary **************") self.cfg.logger.info(str(self.model.summary())) plot_model(self.model, to_file=self.cfg.scratch_dir() + os.sep + "seq2seq.png")
class seq2seq_train: def __init__(self, cfg): self.cfg = cfg self.enc_inp = None self.enc_outp = None self.dec_inp = None self.dec_outp = None self.enc_model = None self.model = None self.__get_model__() def __get_model__(self): self.enc_inp = Input(shape=(self.cfg.input_seq_len(), ), name="Encoder-Input") embd = Embedding(self.cfg.num_input_tokens(), self.cfg.latent_dim(), name='Encoder-Embedding', mask_zero=False) embd_outp = embd(self.enc_inp) x = BatchNormalization(name='Encoder-Batchnorm-1')(embd_outp) _, state_h = GRU(self.cfg.latent_dim(), return_state=True, name='Encoder-Last-GRU')(x) self.enc_model = Model(inputs=self.enc_inp, outputs=state_h, name='Encoder-Model') self.enc_outp = self.enc_model(self.enc_inp) self.cfg.logger.info("********** Encoder Model summary **************") self.cfg.logger.info(self.enc_model.summary()) # get the decoder self.dec_inp = Input(shape=(None, ), name='Decoder-Input') dec_emb = Embedding(self.cfg.num_output_tokens(), self.cfg.latent_dim(), name='Decoder-Embedding', mask_zero=False)(self.dec_inp) dec_bn = BatchNormalization(name='Decoder-Batchnorm-1')(dec_emb) decoder_gru = GRU(self.cfg.latent_dim(), return_state=True, return_sequences=True, name='Decoder-GRU') decoder_gru_output, _ = decoder_gru(dec_bn, initial_state=self.enc_outp) x = BatchNormalization(name='Decoder-Batchnorm-2')(decoder_gru_output) dec_dense = Dense(self.cfg.num_output_tokens(), activation='softmax', name='Final-Output-Dense') self.dec_outp = dec_dense(x) model_inp = [self.enc_inp, self.dec_inp] self.model = Model(model_inp, self.dec_outp) self.cfg.logger.info("********** Full Model summary **************") self.cfg.logger.info(str(self.model.summary())) plot_model(self.model, to_file=self.cfg.scratch_dir() + os.sep + "seq2seq.png") def fit_model(self, input_vecs, output_vecs): input_data = [input_vecs, output_vecs[:, :-1]] output_data = output_vecs[:, 1:] self.model.compile(optimizer=optimizers.Nadam(lr=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy']) model_checkpoint = ModelCheckpoint(self.cfg.output_dir() + os.sep + 'model.hdf5', monitor='val_loss', save_best_only=True, period=1) csv_logger = CSVLogger(self.cfg.log_dir() + os.sep + 'history.csv') tb_dir = self.cfg.log_dir() + os.sep + "tensorboard" if os.path.isfile(tb_dir): rmtree(tb_dir) tensorboard = TensorBoard(log_dir=tb_dir, histogram_freq=10, batch_size=self.cfg.batch_size(), write_graph=True, write_grads=False, write_images=False, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None, embeddings_data=None) history = self.model.fit( input_data, np.expand_dims(output_data, -1), batch_size=self.cfg.batch_size(), epochs=self.cfg.nepochs(), validation_split=self.cfg.validation_split(), callbacks=[csv_logger, model_checkpoint, tensorboard]) return (history)
def create_seq2seq(): batch_size = 64 # Batch size for training. epochs = 100 # Number of epochs to train for. latent_dim = 256 # Latent dimensionality of the encoding space. num_samples = 10000 # Number of samples to train on. num_encoder_tokens = 71 num_decoder_tokens = 93 # Define an input sequence and process it. encoder_inputs = Input(shape=(None, num_encoder_tokens)) encoder = LSTM(latent_dim, name="lstm1", return_state=True) encoder_outputs, state_h, state_c = encoder(encoder_inputs) # We discard `encoder_outputs` and only keep the states. encoder_states = [state_h, state_c] # enc = Sequential(name='encoder') # # model.name = 'lstm' # enc.add(Input(shape=(None, num_encoder_tokens))) # enc.add(LSTM(latent_dim, name='lstm1', return_sequences=True)) # model.add(LSTM(32, name='lstm2', return_sequences=True)) # model.add(LSTM(64, name='lstm3', return_sequences=True)) # model.add(LSTM(128, name='lstm4', return_sequences=True)) # model.add(LSTM(48, name='lstm5')) # model.add(Dense(1, activation='sigmoid')) # model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) # Set up the decoder, using `encoder_states` as initial state. decoder_inputs = Input(shape=(None, num_decoder_tokens)) # We set up our decoder to return full output sequences, # and to return internal states as well. We don't use the # return states in the training model, but we will use them in inference. decoder_lstm = LSTM(latent_dim, name="lstm2", return_sequences=True, return_state=True) decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states) decoder_dense = Dense(num_decoder_tokens, name="dense1", activation="softmax") decoder_outputs = decoder_dense(decoder_outputs) # # Define the model that will turn # # `encoder_input_data` & `decoder_input_data` into `decoder_target_data` # model = Model([encoder_inputs, decoder_inputs], decoder_outputs, name='seq2seq') # # Run training # model.compile(optimizer='rmsprop', loss='categorical_crossentropy', # metrics=['accuracy']) # return model # enc = Model(encoder_inputs, encoder_outputs, name='encoder') # enc.compile(optimizer='rmsprop', loss='categorical_crossentropy', # metrics=['accuracy']) # dec = Model(decoder_inputs, decoder_outputs, name='decoder') # dec.compile(optimizer='rmsprop', loss='categorical_crossentropy', # metrics=['accuracy']) # return enc, dec encoder_model = Model(encoder_inputs, encoder_states, name="encoder") decoder_state_input_h = Input(shape=(latent_dim, )) decoder_state_input_c = Input(shape=(latent_dim, )) decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c] decoder_outputs, state_h, state_c = decoder_lstm( decoder_inputs, initial_state=decoder_states_inputs) decoder_states = [state_h, state_c] decoder_outputs = decoder_dense(decoder_outputs) decoder_model = Model( [decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states, name="decoder", ) return encoder_model, decoder_model
class SiameseNet(object): """Class for Siamese Network.""" def __init__(self, inputs, arch, siam_reg, main_path, y_true): self.orig_inputs = inputs # set up inputs self.inputs = { 'A': inputs['Unlabeled'], 'B': Input(shape=inputs['Unlabeled'].get_shape().as_list()[1:]), 'Labeled': inputs['Labeled'], } self.main_path = os.path.join(main_path, 'siemese/') self.y_true = y_true # generate layers self.layers = [] self.layers += util.make_layer_list(arch, 'siamese', siam_reg) # create the siamese net self.outputs = stack_layers(self.inputs, self.layers) # add the distance layer self.distance = Lambda(affinities.euclidean_distance, output_shape=affinities.eucl_dist_output_shape)( [self.outputs['A'], self.outputs['B']]) # create the distance model for training self.net = Model([self.inputs['A'], self.inputs['B']], self.distance) # compile the siamese network self.net.compile(loss=affinities.get_contrastive_loss(m_neg=1, m_pos=0.05), optimizer='rmsprop') def train(self, pairs_train, dist_train, pairs_val, dist_val, lr, drop, patience, num_epochs, batch_size, dset, load=True): """Train the Siamese Network.""" if load: # load weights into model output_path = os.path.join(self.main_path, dset) load_model(self.net, output_path, '_siamese') return # create handler for early stopping and learning rate scheduling self.lh = util.LearningHandler(lr=lr, drop=drop, lr_tensor=self.net.optimizer.lr, patience=patience) # initialize the training generator train_gen_ = util.train_gen(pairs_train, dist_train, batch_size) # format the validation data for keras validation_data = ([pairs_val[:, 0], pairs_val[:, 1]], dist_val) # compute the steps per epoch steps_per_epoch = int(len(pairs_train) / batch_size) # train the network self.net.fit_generator(train_gen_, epochs=num_epochs, validation_data=validation_data, steps_per_epoch=steps_per_epoch, callbacks=[self.lh]) model_json = self.net.to_json() output_path = os.path.join(self.main_path, dset) save_model(self.net, model_json, output_path, '_siamese') def predict(self, x, batch_sizes): # compute the siamese embeddings of the input data return train.predict(self.outputs['A'], x_unlabeled=x, inputs=self.orig_inputs, y_true=self.y_true, batch_sizes=batch_sizes)
def __init__(self, inputs, arch, cnc_reg, y_true, y_train_labeled_onehot, n_clusters, affinity, scale_nbr, n_nbrs, batch_sizes, result_path, dset, siamese_net=None, x_train=None, lr=0.01, temperature=1.0, bal_reg=0.0): self.y_true = y_true self.y_train_labeled_onehot = y_train_labeled_onehot self.inputs = inputs self.batch_sizes = batch_sizes self.result_path = result_path self.lr = lr self.temperature = temperature # generate layers self.layers = util.make_layer_list(arch[:-1], 'cnc', cnc_reg) print('Runing with CNC loss') self.layers += [{ 'type': 'None', 'size': n_clusters, 'l2_reg': cnc_reg, 'name': 'cnc_{}'.format(len(arch)) }] # create CncNet self.outputs = stack_layers(self.inputs, self.layers) self.net = Model(inputs=self.inputs['Unlabeled'], outputs=self.outputs['Unlabeled']) # DEFINE LOSS # generate affinity matrix W according to params if affinity == 'siamese': input_affinity = tf.concat( [siamese_net.outputs['A'], siamese_net.outputs['Labeled']], axis=0) x_affinity = siamese_net.predict(x_train, batch_sizes) elif affinity in ['knn', 'full']: input_affinity = tf.concat( [self.inputs['Unlabeled'], self.inputs['Labeled']], axis=0) x_affinity = x_train # calculate scale for affinity matrix scale = util.get_scale(x_affinity, self.batch_sizes['Unlabeled'], scale_nbr) # create affinity matrix if affinity == 'full': weight_mat = affinities.full_affinity(input_affinity, scale=scale) elif affinity in ['knn', 'siamese']: weight_mat = affinities.knn_affinity(input_affinity, n_nbrs, scale=scale, scale_nbr=scale_nbr) # define loss self.tau = tf.Variable(self.temperature, name='temperature') self.outputs['Unlabeled'] = util.gumbel_softmax( self.outputs['Unlabeled'], self.tau) num_nodes = self.batch_sizes['Unlabeled'] cluster_size = tf.reduce_sum(self.outputs['Unlabeled'], axis=0) ground_truth = [num_nodes / float(n_clusters)] * n_clusters bal = tf.losses.mean_squared_error(ground_truth, cluster_size) degree = tf.expand_dims(tf.reduce_sum(weight_mat, axis=1), 0) vol = tf.matmul(degree, self.outputs['Unlabeled'], name='vol') normalized_prob = tf.divide(self.outputs['Unlabeled'], vol[tf.newaxis, :], name='normalized_prob')[0] gain = tf.matmul(normalized_prob, tf.transpose(1 - self.outputs['Unlabeled']), name='res2') self.loss = tf.reduce_sum(gain * weight_mat) + bal_reg * bal # create the train step update self.learning_rate = tf.Variable(self.lr, name='cnc_learning_rate') self.train_step = tf.train.RMSPropOptimizer( learning_rate=self.learning_rate).minimize( self.loss, var_list=self.net.trainable_weights) # initialize cnc_net variables K.get_session().run(tf.global_variables_initializer()) K.get_session().run( tf.variables_initializer(self.net.trainable_weights)) if affinity == 'siamese': output_path = os.path.join(self.main_path, dset) load_model(siamese_net, output_path, '_siamese')
class CncNet(object): """Class for CNC Network.""" def __init__(self, inputs, arch, cnc_reg, y_true, y_train_labeled_onehot, n_clusters, affinity, scale_nbr, n_nbrs, batch_sizes, result_path, dset, siamese_net=None, x_train=None, lr=0.01, temperature=1.0, bal_reg=0.0): self.y_true = y_true self.y_train_labeled_onehot = y_train_labeled_onehot self.inputs = inputs self.batch_sizes = batch_sizes self.result_path = result_path self.lr = lr self.temperature = temperature # generate layers self.layers = util.make_layer_list(arch[:-1], 'cnc', cnc_reg) print('Runing with CNC loss') self.layers += [{ 'type': 'None', 'size': n_clusters, 'l2_reg': cnc_reg, 'name': 'cnc_{}'.format(len(arch)) }] # create CncNet self.outputs = stack_layers(self.inputs, self.layers) self.net = Model(inputs=self.inputs['Unlabeled'], outputs=self.outputs['Unlabeled']) # DEFINE LOSS # generate affinity matrix W according to params if affinity == 'siamese': input_affinity = tf.concat( [siamese_net.outputs['A'], siamese_net.outputs['Labeled']], axis=0) x_affinity = siamese_net.predict(x_train, batch_sizes) elif affinity in ['knn', 'full']: input_affinity = tf.concat( [self.inputs['Unlabeled'], self.inputs['Labeled']], axis=0) x_affinity = x_train # calculate scale for affinity matrix scale = util.get_scale(x_affinity, self.batch_sizes['Unlabeled'], scale_nbr) # create affinity matrix if affinity == 'full': weight_mat = affinities.full_affinity(input_affinity, scale=scale) elif affinity in ['knn', 'siamese']: weight_mat = affinities.knn_affinity(input_affinity, n_nbrs, scale=scale, scale_nbr=scale_nbr) # define loss self.tau = tf.Variable(self.temperature, name='temperature') self.outputs['Unlabeled'] = util.gumbel_softmax( self.outputs['Unlabeled'], self.tau) num_nodes = self.batch_sizes['Unlabeled'] cluster_size = tf.reduce_sum(self.outputs['Unlabeled'], axis=0) ground_truth = [num_nodes / float(n_clusters)] * n_clusters bal = tf.losses.mean_squared_error(ground_truth, cluster_size) degree = tf.expand_dims(tf.reduce_sum(weight_mat, axis=1), 0) vol = tf.matmul(degree, self.outputs['Unlabeled'], name='vol') normalized_prob = tf.divide(self.outputs['Unlabeled'], vol[tf.newaxis, :], name='normalized_prob')[0] gain = tf.matmul(normalized_prob, tf.transpose(1 - self.outputs['Unlabeled']), name='res2') self.loss = tf.reduce_sum(gain * weight_mat) + bal_reg * bal # create the train step update self.learning_rate = tf.Variable(self.lr, name='cnc_learning_rate') self.train_step = tf.train.RMSPropOptimizer( learning_rate=self.learning_rate).minimize( self.loss, var_list=self.net.trainable_weights) # initialize cnc_net variables K.get_session().run(tf.global_variables_initializer()) K.get_session().run( tf.variables_initializer(self.net.trainable_weights)) if affinity == 'siamese': output_path = os.path.join(self.main_path, dset) load_model(siamese_net, output_path, '_siamese') def train(self, x_train_unlabeled, x_train_labeled, x_val_unlabeled, drop, patience, min_tem, num_epochs, load=False): """Train the CNC network.""" file_name = 'cnc_net' if load: # load weights into model print('load pretrain weights of the CNC network.') load_model(self.net, self.result_path, file_name) return # create handler for early stopping and learning rate scheduling self.lh = util.LearningHandler(lr=self.lr, drop=drop, lr_tensor=self.learning_rate, patience=patience, tau=self.temperature, tau_tensor=self.tau, min_tem=min_tem, gumble=True) losses = np.empty((num_epochs, )) val_losses = np.empty((num_epochs, )) # begin cnc_net training loop self.lh.on_train_begin() for i in range(num_epochs): # train cnc_net losses[i] = train.train_step(return_var=[self.loss], updates=self.net.updates + [self.train_step], x_unlabeled=x_train_unlabeled, inputs=self.inputs, y_true=self.y_true, batch_sizes=self.batch_sizes, x_labeled=x_train_labeled, y_labeled=self.y_train_labeled_onehot, batches_per_epoch=100)[0] # get validation loss val_losses[i] = train.predict_sum( self.loss, x_unlabeled=x_val_unlabeled, inputs=self.inputs, y_true=self.y_true, x_labeled=x_train_unlabeled[0:0], y_labeled=self.y_train_labeled_onehot, batch_sizes=self.batch_sizes) # do early stopping if necessary if self.lh.on_epoch_end(i, val_losses[i]): print('STOPPING EARLY') break # print training status print('Epoch: {}, loss={:2f}, val_loss={:2f}'.format( i, losses[i], val_losses[i])) with gfile.Open(self.result_path + 'losses', 'a') as f: f.write( str(i) + ' ' + str(losses[i]) + ' ' + str(val_losses[i]) + '\n') model_json = self.net.to_json() save_model(self.net, model_json, self.result_path, file_name) def predict(self, x): # test inputs do not require the 'Labeled' input inputs_test = {'Unlabeled': self.inputs['Unlabeled']} return train.predict(self.outputs['Unlabeled'], x_unlabeled=x, inputs=inputs_test, y_true=self.y_true, x_labeled=x[0:0], y_labeled=self.y_train_labeled_onehot[0:0], batch_sizes=self.batch_sizes)