encoder_states = [state_h, state_c] # Set up the decoder, using `encoder_states` as initial state. decoder_inputs = Input(shape=(None, num_decoder_tokens)) # We set up our decoder to return full output sequences, # and to return internal states as well. We don't use the # return states in the training model, but we will use them in inference. decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True) decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states) decoder_dense = Dense(num_decoder_tokens, activation='softmax') decoder_outputs = decoder_dense(decoder_outputs) # Define the model that will turn # `encoder_input_data` & `decoder_input_data` into `decoder_target_data` model = Model([encoder_inputs, decoder_inputs], decoder_outputs) # Run training model.compile(optimizer='rmsprop', loss='categorical_crossentropy') model.fit([encoder_input_data, decoder_input_data], decoder_target_data, batch_size=batch_size, epochs=epochs, validation_split=0.2) # Save model model.save('s2s.h5') metadata = { 'num_encoder_tokens': num_encoder_tokens, 'num_decoder_tokens': num_decoder_tokens }
def train_model(self, sentences_pair, is_similar, model_save_directory='./'): train_data_1, train_data_2, labels_train, val_data_1, val_data_2, labels_val = create_train_dev_set( sentences_pair, is_similar, self.validation_split_ratio) if train_data_1 is None: print("++++ !! Failure: Unable to train model ++++") return None # embedding_layer = Embedding(121, self.embedding_dim, input_length=self.max_sequence_length, # trainable=False) # Creating LSTM Encoder lstm_layer = Bidirectional( LSTM(self.number_lstm_units, dropout=self.rate_drop_lstm, recurrent_dropout=self.rate_drop_lstm)) # Creating LSTM Encoder layer for First Sentence sequence_1_input = Input(shape=(self.max_sequence_length, ), dtype='float32') sequence_1 = Lambda(lambda x: expand_dims(x, axis=-1))( sequence_1_input) # embedded_sequences_1 = embedding_layer(sequence_1_input) # x1 = lstm_layer(embedded_sequences_1) x1 = lstm_layer(sequence_1) # Creating LSTM Encoder layer for Second Sentence sequence_2_input = Input(shape=(self.max_sequence_length, ), dtype='float32') sequence_2 = Lambda(lambda x: expand_dims(x, axis=-1))( sequence_2_input) # embedded_sequences_2 = embedding_layer(sequence_2_input) # x2 = lstm_layer(embedded_sequences_2) x2 = lstm_layer(sequence_2) # Merging two LSTM encodes vectors from sentences to # pass it to dense layer applying dropout and batch normalisation merged = concatenate([x1, x2]) merged = BatchNormalization()(merged) merged = Dropout(self.rate_drop_dense)(merged) merged = Dense(self.number_dense_units, activation=self.activation_function)(merged) merged = BatchNormalization()(merged) merged = Dropout(self.rate_drop_dense)(merged) preds = Dense(1, activation='sigmoid')(merged) model = Model(inputs=[sequence_1_input, sequence_2_input], outputs=preds) model.compile(loss='binary_crossentropy', optimizer='nadam', metrics=['acc']) early_stopping = EarlyStopping(monitor='val_loss', patience=20) STAMP = 'lstm_%d_%d_%.2f_%.2f' % ( self.number_lstm_units, self.number_dense_units, self.rate_drop_lstm, self.rate_drop_dense) checkpoint_dir = model_save_directory + 'checkpoints/' + str( int(time.time())) + '/' if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) bst_model_path = checkpoint_dir + STAMP + '.h5' model_checkpoint = ModelCheckpoint(bst_model_path, save_best_only=True, save_weights_only=False) tensorboard = TensorBoard(log_dir=checkpoint_dir + "logs/{}".format(time.time())) model.fit([train_data_1, train_data_2], labels_train, validation_data=([val_data_1, val_data_2], labels_val), epochs=200, batch_size=64, shuffle=True, callbacks=[early_stopping, model_checkpoint, tensorboard]) return bst_model_path
sess = tf.Session() K.set_session(sess) input = Input(shape=(1, ), batch_size=6) re = Reshape(target_shape=(1, 1))(input) rnn_1 = LSTM(128, stateful=True, return_sequences=True)(re) rnn_2 = LSTM(128, stateful=True, return_sequences=False)(rnn_1) output_1 = Dense(1, activation='linear')(rnn_2) output_2 = Dense(1, activation='sigmoid')(rnn_2) output_3 = Dense(3, activation='softmax')(rnn_2) model = Model(inputs=input, outputs=[output_1, output_2, output_3]) adam = Adam(lr=0.001) model.compile(loss="mse", optimizer=adam) raw_weights = model.get_weights() new_weights = [] for raw in raw_weights: new_weights.append(np.random.uniform(-5, 5, raw.shape)) model.set_weights(np.array(new_weights)) one, tow, three = model.predict([1, 2, 3, 4, 5, 6]) one = one.tolist()
def NASNet(input_shape=None, penultimate_filters=4032, num_blocks=6, stem_block_filters=96, skip_reduction=True, filter_multiplier=2, include_top=True, weights=None, input_tensor=None, pooling=None, classes=1000, default_size=None): """Instantiates a NASNet model. Note that only TensorFlow is supported for now, therefore it only works with the data format `image_data_format='channels_last'` in your Keras config at `~/.keras/keras.json`. Arguments: input_shape: Optional shape tuple, the input shape is by default `(331, 331, 3)` for NASNetLarge and `(224, 224, 3)` for NASNetMobile. It should have exactly 3 inputs channels, and width and height should be no smaller than 32. E.g. `(224, 224, 3)` would be one valid value. penultimate_filters: Number of filters in the penultimate layer. NASNet models use the notation `NASNet (N @ P)`, where: - N is the number of blocks - P is the number of penultimate filters num_blocks: Number of repeated blocks of the NASNet model. NASNet models use the notation `NASNet (N @ P)`, where: - N is the number of blocks - P is the number of penultimate filters stem_block_filters: Number of filters in the initial stem block skip_reduction: Whether to skip the reduction step at the tail end of the network. Set to `False` for CIFAR models. filter_multiplier: Controls the width of the network. - If `filter_multiplier` < 1.0, proportionally decreases the number of filters in each layer. - If `filter_multiplier` > 1.0, proportionally increases the number of filters in each layer. - If `filter_multiplier` = 1, default number of filters from the paper are used at each layer. include_top: Whether to include the fully-connected layer at the top of the network. weights: `None` (random initialization) or `imagenet` (ImageNet weights) input_tensor: Optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. classes: Optional number of classes to classify images into, only to be specified if `include_top` is True, and if no `weights` argument is specified. default_size: Specifies the default image size of the model Returns: A Keras model instance. Raises: ValueError: In case of invalid argument for `weights`, invalid input shape or invalid `penultimate_filters` value. RuntimeError: If attempting to run this model with a backend that does not support separable convolutions. """ if K.backend() != 'tensorflow': raise RuntimeError('Only Tensorflow backend is currently supported, ' 'as other backends do not support ' 'separable convolution.') if not (weights in {'imagenet', None} or os.path.exists(weights)): raise ValueError('The `weights` argument should be either ' '`None` (random initialization), `imagenet` ' '(pre-training on ImageNet), ' 'or the path to the weights file to be loaded.') if weights == 'imagenet' and include_top and classes != 1000: raise ValueError('If using `weights` as ImageNet with `include_top` ' 'as true, `classes` should be 1000') if (isinstance(input_shape, tuple) and None in input_shape and weights == 'imagenet'): raise ValueError('When specifying the input shape of a NASNet' ' and loading `ImageNet` weights, ' 'the input_shape argument must be static ' '(no None entries). Got: `input_shape=' + str(input_shape) + '`.') if default_size is None: default_size = 331 # Determine proper input shape and default size. input_shape = _obtain_input_shape( input_shape, default_size=default_size, min_size=32, data_format=K.image_data_format(), require_flatten=False, weights=weights) if K.image_data_format() != 'channels_last': logging.warning('The NASNet family of models is only available ' 'for the input data format "channels_last" ' '(width, height, channels). ' 'However your settings specify the default ' 'data format "channels_first" (channels, width, height).' ' You should set `image_data_format="channels_last"` ' 'in your Keras config located at ~/.keras/keras.json. ' 'The model being returned right now will expect inputs ' 'to follow the "channels_last" data format.') K.set_image_data_format('channels_last') old_data_format = 'channels_first' else: old_data_format = None if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor if penultimate_filters % 24 != 0: raise ValueError( 'For NASNet-A models, the value of `penultimate_filters` ' 'needs to be divisible by 24. Current value: %d' % penultimate_filters) channel_dim = 1 if K.image_data_format() == 'channels_first' else -1 filters = penultimate_filters // 24 if not skip_reduction: x = Conv2D( stem_block_filters, (3, 3), strides=(2, 2), padding='valid', use_bias=False, name='stem_conv1', kernel_initializer='he_normal')( img_input) else: x = Conv2D( stem_block_filters, (3, 3), strides=(1, 1), padding='same', use_bias=False, name='stem_conv1', kernel_initializer='he_normal')( img_input) x = BatchNormalization( axis=channel_dim, momentum=0.9997, epsilon=1e-3, name='stem_bn1')( x) p = None if not skip_reduction: # imagenet / mobile mode x, p = _reduction_a_cell( x, p, filters // (filter_multiplier**2), block_id='stem_1') x, p = _reduction_a_cell( x, p, filters // filter_multiplier, block_id='stem_2') for i in range(num_blocks): x, p = _normal_a_cell(x, p, filters, block_id='%d' % (i)) x, p0 = _reduction_a_cell( x, p, filters * filter_multiplier, block_id='reduce_%d' % (num_blocks)) p = p0 if not skip_reduction else p for i in range(num_blocks): x, p = _normal_a_cell( x, p, filters * filter_multiplier, block_id='%d' % (num_blocks + i + 1)) x, p0 = _reduction_a_cell( x, p, filters * filter_multiplier**2, block_id='reduce_%d' % (2 * num_blocks)) p = p0 if not skip_reduction else p for i in range(num_blocks): x, p = _normal_a_cell( x, p, filters * filter_multiplier**2, block_id='%d' % (2 * num_blocks + i + 1)) x = Activation('relu')(x) if include_top: x = GlobalAveragePooling2D()(x) x = Dense(classes, activation='softmax', name='predictions')(x) else: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input model = Model(inputs, x, name='NASNet') # load weights if weights == 'imagenet': if default_size == 224: # mobile version if include_top: weight_path = NASNET_MOBILE_WEIGHT_PATH model_name = 'nasnet_mobile.h5' else: weight_path = NASNET_MOBILE_WEIGHT_PATH_NO_TOP model_name = 'nasnet_mobile_no_top.h5' weights_file = get_file(model_name, weight_path, cache_subdir='models') model.load_weights(weights_file) elif default_size == 331: # large version if include_top: weight_path = NASNET_LARGE_WEIGHT_PATH model_name = 'nasnet_large.h5' else: weight_path = NASNET_LARGE_WEIGHT_PATH_NO_TOP model_name = 'nasnet_large_no_top.h5' weights_file = get_file(model_name, weight_path, cache_subdir='models') model.load_weights(weights_file) else: raise ValueError('ImageNet weights can only be loaded with NASNetLarge' ' or NASNetMobile') elif weights is not None: model.load_weights(weights) if old_data_format: K.set_image_data_format(old_data_format) return model
def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None, input_data=None, expected_output=None, expected_output_dtype=None, fixed_batch_size=False, supports_masking=False): # generate input data if input_data is None: if not input_shape: raise AssertionError() if not input_dtype: input_dtype = K.floatx() input_data_shape = list(input_shape) for i, e in enumerate(input_data_shape): if e is None: input_data_shape[i] = np.random.randint(1, 4) input_mask = [] if all(isinstance(e, tuple) for e in input_data_shape): input_data = [] for e in input_data_shape: input_data.append( (10 * np.random.random(e)).astype(input_dtype)) if supports_masking: a = np.full(e[:2], False) a[:, :e[1] // 2] = True input_mask.append(a) else: input_data = (10 * np.random.random(input_data_shape)) input_data = input_data.astype(input_dtype) if supports_masking: a = np.full(input_data_shape[:2], False) a[:, :input_data_shape[1] // 2] = True print(a) print(a.shape) input_mask.append(a) else: if input_shape is None: input_shape = input_data.shape if input_dtype is None: input_dtype = input_data.dtype if expected_output_dtype is None: expected_output_dtype = input_dtype # instantiation layer = layer_cls(**kwargs) # test get_weights , set_weights at layer level weights = layer.get_weights() layer.set_weights(weights) try: expected_output_shape = layer.compute_output_shape(input_shape) except Exception: expected_output_shape = layer._compute_output_shape(input_shape) # test in functional API if isinstance(input_shape, list): if fixed_batch_size: x = [Input(batch_shape=e, dtype=input_dtype) for e in input_shape] if supports_masking: mask = [ Input(batch_shape=e[0:2], dtype=bool) for e in input_shape ] else: x = [Input(shape=e[1:], dtype=input_dtype) for e in input_shape] if supports_masking: mask = [Input(shape=(e[1], ), dtype=bool) for e in input_shape] else: if fixed_batch_size: x = Input(batch_shape=input_shape, dtype=input_dtype) if supports_masking: mask = Input(batch_shape=input_shape[0:2], dtype=bool) else: x = Input(shape=input_shape[1:], dtype=input_dtype) if supports_masking: mask = Input(shape=(input_shape[1], ), dtype=bool) if supports_masking: y = layer(Masking()(x), mask=mask) else: y = layer(x) if not (K.dtype(y) == expected_output_dtype): raise AssertionError() # check with the functional API if supports_masking: model = Model([x, mask], y) actual_output = model.predict([input_data, input_mask[0]]) else: model = Model(x, y) actual_output = model.predict(input_data) actual_output_shape = actual_output.shape for expected_dim, actual_dim in zip(expected_output_shape, actual_output_shape): if expected_dim is not None: if not (expected_dim == actual_dim): raise AssertionError("expected_shape", expected_output_shape, "actual_shape", actual_output_shape) if expected_output is not None: assert_allclose(actual_output, expected_output, rtol=1e-3) # test serialization, weight setting at model level model_config = model.get_config() recovered_model = model.__class__.from_config(model_config) if model.weights: weights = model.get_weights() recovered_model.set_weights(weights) _output = recovered_model.predict(input_data) assert_allclose(_output, actual_output, rtol=1e-3) # test training mode (e.g. useful when the layer has a # different behavior at training and testing time). if has_arg(layer.call, 'training'): model.compile('rmsprop', 'mse') model.train_on_batch(input_data, actual_output) # test instantiation from layer config layer_config = layer.get_config() layer_config['batch_input_shape'] = input_shape layer = layer.__class__.from_config(layer_config) # for further checks in the caller function return actual_output
def cnn_lstm_ctc_model(height, nclass, tensors=None, width=None): if tensors is None: tensors = { 'the_input': None, 'the_labels': None, 'input_length': None, 'label_length': None, 'target_tensor': None } rnnunit = 256 inputs = Input(shape=(height, width, 1), name='the_input', tensor=tensors['the_input']) #1. convnet layers m = cnn_feature_extractor(inputs) cnn_model = Model(inputs=[inputs], outputs=[m]) #2. bi-lstm layers m = Permute((2, 1, 3), name='permute')(m) m = TimeDistributed(Flatten(), name='timedistrib')(m) m = Bidirectional(GRU(rnnunit, return_sequences=True), name='blstm1')(m) m = Dense(rnnunit, name='blstm1_out', activation='linear')(m) m = Bidirectional(GRU(rnnunit, return_sequences=True), name='blstm2')(m) basemodel = Model(inputs=inputs, outputs=m) last_features = Bidirectional(GRU(rnnunit, return_sequences=False), name='last_features')(m) is_datetime = Dense(1, activation='sigmoid')(last_features) model_is_datetime = Model([inputs], [is_datetime]) if not nclass == 3811: m = BatchNormalization()(m) y_pred = Dense(nclass, name='blstm2_out', activation='softmax')(m) #3. CTC loss compute labels = Input(name='the_labels', shape=[ None, ], dtype='float32', tensor=tensors['the_labels']) input_length = Input(name='input_length', shape=[1], dtype='int64', tensor=tensors['input_length']) label_length = Input(name='label_length', shape=[1], dtype='int64', tensor=tensors['label_length']) loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name='ctc')([y_pred, labels, input_length, label_length]) model = Model(inputs=[inputs, labels, input_length, label_length], outputs=[loss_out]) #sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.9) fn_compile = lambda: model.compile(loss={ 'ctc': lambda y_true, y_pred: y_pred }, metrics=['acc'], optimizer=adam, target_tensors= [tensors['target_tensor']]) test_func = K.function([inputs], [y_pred]) return {'model':model, \ 'basemodel':basemodel, \ 'test_func':test_func, \ 'fn_compile':fn_compile, \ 'cnn_model':cnn_model}
model.summary() score = model.evaluate(x=X_train, y=y_train) print('Train loss:', score[0]) print('Train accuracy:', score[1]) score = model.evaluate(x=X_test, y=y_test) print('Test loss:', score[0]) print('Test accuracy:', score[1]) predictions = model.predict(x=X_test) layer_name = "dense_3" intermediate_layer_model = Model(inputs=model.input, outputs=model.get_layer(layer_name).output) intermediate_output_train = intermediate_layer_model.predict(x=X_train) intermediate_output_test = intermediate_layer_model.predict(x=X_test) #get_3rd_layer_output = K.function([model.layers[0].input, K.learning_phase()],[model.layers[3].output]) # Testing # test = np.random.random(input_shape)[np.newaxis,...] # layer_outs = [func([test, 1.]) for func in functors] # print layer_outs print(intermediate_output_train.shape) print(intermediate_output_test.shape) final = []
def __init__(self): self.HOPS = 3 self.SCORE_FUNCTION = 'mlp' # scaled_dot_product / mlp (concat) / bi_linear (general dot) self.DATASET = 'twitter' # 'twitter', 'restaurant', 'laptop' self.POLARITIES_DIM = 3 self.EMBEDDING_DIM = 300 self.LEARNING_RATE = 0.001 self.INITIALIZER = initializers.RandomUniform(minval=-0.05, maxval=0.05) self.REGULARIZER = regularizers.l2(0.001) self.LSTM_PARAMS = { 'units': 200, 'activation': 'tanh', 'recurrent_activation': 'sigmoid', 'kernel_initializer': self.INITIALIZER, 'recurrent_initializer': self.INITIALIZER, 'bias_initializer': self.INITIALIZER, 'kernel_regularizer': self.REGULARIZER, 'recurrent_regularizer': self.REGULARIZER, 'bias_regularizer': self.REGULARIZER, 'dropout': 0, 'recurrent_dropout': 0, } self.MAX_SEQUENCE_LENGTH = 80 self.MAX_ASPECT_LENGTH = 10 self.BATCH_SIZE = 32 self.EPOCHS = 5 self.texts_raw_indices, self.texts_raw_without_aspects_indices, self.texts_left_indices, self.texts_left_with_aspects_indices, \ self.aspects_indices, self.texts_right_indices, self.texts_right_with_aspects_indices, \ self.polarities_matrix, \ self.embedding_matrix, \ self.tokenizer = \ read_dataset(type=self.DATASET, mode='train', embedding_dim=self.EMBEDDING_DIM, max_seq_len=self.MAX_SEQUENCE_LENGTH, max_aspect_len=self.MAX_ASPECT_LENGTH) if os.path.exists('ram_saved_model.h5'): print('loading saved model...') self.model = load_model('ram_saved_model.h5') else: print('Build model...') inputs_sentence = Input(shape=(self.MAX_SEQUENCE_LENGTH,), name='inputs_sentence') inputs_aspect = Input(shape=(self.MAX_ASPECT_LENGTH,), name='inputs_aspect') nonzero_count = Lambda(lambda xin: tf.count_nonzero(xin, dtype=tf.float32))(inputs_aspect) sentence = Embedding(input_dim=len(self.tokenizer.word_index) + 1, output_dim=self.EMBEDDING_DIM, input_length=self.MAX_SEQUENCE_LENGTH, mask_zero=True, weights=[self.embedding_matrix], trainable=False, name='sentence_embedding')(inputs_sentence) aspect = Embedding(input_dim=len(self.tokenizer.word_index) + 1, output_dim=self.EMBEDDING_DIM, input_length=self.MAX_ASPECT_LENGTH, mask_zero=True, weights=[self.embedding_matrix], trainable=False, name='aspect_embedding')(inputs_aspect) memory = Bidirectional(LSTM(return_sequences=True, **self.LSTM_PARAMS), name='memory')(sentence) aspect = Bidirectional(LSTM(return_sequences=True, **self.LSTM_PARAMS), name='aspect')(aspect) x = Lambda(lambda xin: K.sum(xin[0], axis=1) / xin[1], name='aspect_mean')([aspect, nonzero_count]) shared_attention = Attention(score_function=self.SCORE_FUNCTION, initializer=self.INITIALIZER, regularizer=self.REGULARIZER, name='shared_attention') for i in range(self.HOPS): x = shared_attention((memory, x)) x = Flatten()(x) x = Dense(self.POLARITIES_DIM)(x) predictions = Activation('softmax')(x) model = Model(inputs=[inputs_sentence, inputs_aspect], outputs=predictions) model.summary() model.compile(loss='categorical_crossentropy', optimizer=optimizers.Adam(lr=self.LEARNING_RATE), metrics=['acc', f1]) # plot_model(model, to_file='model.png') self.model = model
def create(self): """ Create model and return it :return: keras model """ input_layer = Input(shape=self.input_shape) x = input_layer if self.dim == 3: init_size = min(self.input_shape[0], self.input_shape[1], self.input_shape[2]) if self.dim == 2: init_size = min(self.input_shape[0], self.input_shape[1]) size = init_size convolutions = self.convolutions if convolutions is None: # Create convolutions convolutions = [] nr_of_convolutions = 8 for i in range(self.get_depth()): convolutions.append(nr_of_convolutions) nr_of_convolutions *= 2 convolutions.append(nr_of_convolutions) for i in range(self.get_depth()): convolutions.append(nr_of_convolutions) nr_of_convolutions /= 2 if self.dim == 3: connection = {} i = 0 while size % 2 == 0 and size > 4: x, connection[size] = encoder_block_3( x, convolutions[i], self.encoder_use_bn, self.encoder_spatial_dropout) size /= 2 i += 1 x = convolution_block_3(x, convolutions[i], self.encoder_use_bn, self.encoder_spatial_dropout) i += 1 while size < init_size: size *= 2 x = decoder_block_3(x, convolutions[i], connection[size], self.decoder_use_bn, self.decoder_spatial_dropout) i += 1 x = Convolution3D(self.nb_classes, 1, activation='softmax')(x) if self.dim == 2: connection = {} i = 0 while size % 2 == 0 and size > 4: x, connection[size] = encoder_block_2( x, convolutions[i], self.encoder_use_bn, self.encoder_spatial_dropout) size /= 2 i += 1 x = convolution_block_2(x, convolutions[i], self.encoder_use_bn, self.encoder_spatial_dropout) i += 1 while size < init_size: size *= 2 x = decoder_block_2(x, convolutions[i], connection[size], self.decoder_use_bn, self.decoder_spatial_dropout) i += 1 x = Convolution2D(self.nb_classes, 1, activation='softmax')(x) return Model(inputs=input_layer, outputs=x)
def __init__(self, env, sess, actor_noise, obs_normalizer=None, action_processor=None, predictor_type="cnn", use_batch_norm=False, load_root_model=False, config=DEFAULT_CONFIG): self.config = config assert self.config['max step'] > self.config[ 'batch size'], 'Max step must be bigger than batch size' self.episode = self.config["episode"] self.actor_learning_rate = self.config["actor learning rate"] self.critic_learning_rate = self.config["critic learning rate"] self.tau = self.config["tau"] self.gamma = self.config["gamma"] self.batch_size = self.config['batch size'] self.action_processor = action_processor np.random.seed(self.config['seed']) if env: env.seed(self.config['seed']) self.sess = sess # if env is None, then DDPG just predicts self.env = env self.actor_noise = actor_noise # share state input has_complex_state = ( isinstance(self.env.observation_space, gym.spaces.Dict) or isinstance(self.env.observation_space, gym.spaces.Tuple)) if obs_normalizer and has_complex_state: state_input = Input( shape=self.env.observation_space.spaces[obs_normalizer].shape, name="state_input") else: state_input = Input(shape=self.env.observation_space.shape, name="state_input") target_state_input = Input( shape=self.env.observation_space.spaces[obs_normalizer].shape, name="target_state_input") self.obs_normalizer = obs_normalizer # shape action_dim = env.action_space.shape[0] nb_assets = state_input.shape[1] window_length = state_input.shape[2] nb_features = state_input.shape[3] # paths self.model_save_path = get_model_path(window_length=window_length, predictor_type=predictor_type, use_batch_norm=use_batch_norm) self.summary_path = get_result_path( window_length=window_length, predictor_type=predictor_type, use_batch_norm=use_batch_norm) + "/" + datetime.now().strftime( "%Y-%m-%d-%H%M%S") self.root_model_save_path = get_root_model_path( window_length, predictor_type, use_batch_norm) # feature extraction self.predictor_type = predictor_type self.use_batch_norm = use_batch_norm root_net = RootNetwork(inputs=state_input, predictor_type=self.predictor_type, use_batch_norm=self.use_batch_norm).net self.root_model = Model(state_input, root_net) if load_root_model == True: try: self.root_model.load_weights(self.root_model_save_path) for layer in self.root_model.layers: layer.trainable = False except: print("ERROR while loading root model ", self.root_model_save_path) else: pass variable_summaries(root_net, "Root_Output") #array_variable_summaries(self.root_model.layers[1].weights, "Root_Input_1") #array_variable_summaries(self.root_model.layers[2].weights, "Root_Input_2") #array_variable_summaries(self.root_model.layers[-1].weights, "Root_Output_2") target_root_net = RootNetwork(inputs=target_state_input, predictor_type=predictor_type, use_batch_norm=use_batch_norm).net self.target_root_model = Model(target_state_input, target_root_net) if load_root_model == True: try: self.target_root_model.load_weights(self.root_model_save_path) for layer in self.target_root_model.layers: layer.trainable = False except: print("ERROR while loading root model ", self.root_model_save_path) else: pass self.target_root_model.set_weights(self.root_model.get_weights()) # ===================================================================== # # Actor Model # # Chain rule: find the gradient of changing the actor network params in # # getting closest to the final value network predictions, i.e. de/dA # # Calculate de/dA as = de/dC * dC/dA, where e is error, C critic, A act # # ===================================================================== # self.actor_state_input, self.actor_model = Actor( state_input=state_input, root_net=root_net, action_dim=action_dim).references() _, self.target_actor_model = Actor(state_input=target_state_input, root_net=target_root_net, action_dim=action_dim).references() # summary #array_variable_summaries(self.actor_model.layers[-1].weights, "Actor_Output") #actor_model_weights = self.actor_model.trainable_weights #self.actor_grads = K.gradients(self.actor_model.output,actor_model_weights) # dC/dA (from actor) # grads = zip(self.actor_grads, actor_model_weights) action_grad = Input(shape=(action_dim, )) loss = K.mean(-action_grad * self.actor_model.outputs) for regularizer_loss in self.actor_model.losses: loss += regularizer_loss loss = loss optimizer = Adam(lr=self.actor_learning_rate) updates_op = optimizer.get_updates( params=self.actor_model.trainable_weights, # constraints=self.model.constraints, loss=loss) self.optimize = K.function( inputs=[self.actor_state_input, action_grad, K.learning_phase()], outputs=[loss], updates=updates_op) # calling function for the loop """ self.actor_grads = tf.gradients(self.actor_model.output, actor_model_weights, -self.actor_critic_grad) # dC/dA (from actor) tf.summary.histogram("Actor_Critic_Grad", self.actor_critic_grad) grads = zip(self.actor_grads, actor_model_weights) self.optimize = tf.train.AdamOptimizer(self.actor_learning_rate).apply_gradients(grads) """ # ===================================================================== # # Critic Model # # ===================================================================== # self.critic_state_input, self.critic_action_input, self.critic_model = Critic( state_input=state_input, root_net=root_net, action_dim=action_dim, lr=self.critic_learning_rate).references() array_variable_summaries(self.critic_model.layers[-1].weights, "Critic_Output") _, _, self.target_critic_model = Critic( state_input=target_state_input, root_net=target_root_net, action_dim=action_dim, lr=self.critic_learning_rate).references() """ self.critic_grads = tf.gradients(self.critic_model.output, self.critic_action_input) # where we calcaulte de/dC for feeding above """ #self.actor_critic_grad = tf.placeholder(tf.float32,[None, self.env.action_space.shape[0]]) # where we will feed de/dC (from critic) # summary self.critic_grads = K.gradients( self.critic_model.outputs, self.critic_action_input ) # where we calculate de/dC for feeding above self.compute_critic_gradient = K.function( inputs=[ self.critic_model.output, self.critic_action_input, self.critic_state_input ], outputs=self.critic_grads) # calling function for the loop tf.summary.histogram("Critic_Grad", self.critic_grads) # Update target networks self.update_target() # summary #self.summary_ops, self.summary_vars = build_summaries(action_dim=action_dim) with tf.variable_scope("Global"): self.episode_reward = tf.Variable(0., name="episode_reward") tf.summary.scalar("Reward", self.episode_reward) self.episode_min_reward = tf.Variable(0., name="episode_min_reward") tf.summary.scalar("Min_Reward", self.episode_min_reward) self.episode_ave_max_q = tf.Variable(0., name="episode_ave_max_q") tf.summary.scalar("Qmax_Value", self.episode_ave_max_q) self.loss_critic = tf.Variable(0., name="loss_critic") tf.summary.scalar("Loss_critic", self.loss_critic) self.loss_actor = tf.Variable(0., name="loss_actor") tf.summary.scalar("Loss_actor", self.loss_actor) self.ep_base_action = tf.Variable(initial_value=self.env.sim.w0, name="ep_base_action") tf.summary.histogram("Action_base", self.ep_base_action) self.ep_action = tf.Variable(initial_value=self.env.sim.w0, name="ep_action") tf.summary.histogram("Action", self.ep_action) self.merged = tf.summary.merge_all()
test_id, X_test, _ = process_test_data(test_data_path=args.test_data_path, tokenizer=X_tokenizer) # Prepare dictionary reverse_source_word_index = X_tokenizer.index_word reverse_target_word_index = Y_tokenizer.index_word target_word_index = Y_tokenizer.word_index model = load_model(args.load_model) encoder_inputs = model.input[0] # input-1 encoder_embedding = model.layers[2] encoder_embedding_output = encoder_embedding(encoder_inputs) encoder_lstm = model.layers[4] encoder_outputs, state_h_encoder, state_c_encoder = encoder_lstm( encoder_embedding_output) encoder_states = [encoder_outputs, state_h_encoder, state_c_encoder] encoder_model = Model(encoder_inputs, encoder_states) decoder_inputs = model.input[1] decoder_state_input_h = Input(shape=(None, ), name='input_3') decoder_state_input_c = Input(shape=(None, ), name='input_4') decoder_hidden_state_input = Input(shape=(MAX_LEN, None)) decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c] decoder_embedding = model.layers[3] decoder_embedding_output = decoder_embedding(decoder_inputs) decoder_lstm = model.layers[5] decoder_outputs, state_h_dec, state_c_dec = decoder_lstm( decoder_embedding_output, initial_state=decoder_states_inputs) decoder_states = [decoder_outputs, state_h_dec, state_c_dec] decoder_dense = model.layers[6] decoder_outputs = decoder_dense(decoder_outputs)
def default_latent(num_outputs, input_shape): drop = 0.2 img_in = Input(shape=input_shape, name='img_in') x = img_in x = Lambda(lambda x: x / 255.)(x) # normalize x = Convolution2D(24, (5, 5), strides=(2, 2), activation='relu', name="conv2d_1")(x) x = Dropout(drop)(x) x = Convolution2D(32, (5, 5), strides=(2, 2), activation='relu', name="conv2d_2")(x) x = Dropout(drop)(x) x = Convolution2D(32, (5, 5), strides=(2, 2), activation='relu', name="conv2d_3")(x) x = Dropout(drop)(x) x = Convolution2D(32, (3, 3), strides=(1, 1), activation='relu', name="conv2d_4")(x) x = Dropout(drop)(x) x = Convolution2D(32, (3, 3), strides=(1, 1), activation='relu', name="conv2d_5")(x) x = Dropout(drop)(x) x = Convolution2D(64, (3, 3), strides=(2, 2), activation='relu', name="conv2d_6")(x) x = Dropout(drop)(x) x = Convolution2D(64, (3, 3), strides=(2, 2), activation='relu', name="conv2d_7")(x) x = Dropout(drop)(x) x = Convolution2D(10, (1, 1), strides=(2, 2), activation='relu', name="latent")(x) y = Conv2DTranspose(filters=64, kernel_size=(3, 3), strides=2, name="deconv2d_1")(x) y = Conv2DTranspose(filters=64, kernel_size=(3, 3), strides=2, name="deconv2d_2")(y) y = Conv2DTranspose(filters=32, kernel_size=(3, 3), strides=2, name="deconv2d_3")(y) y = Conv2DTranspose(filters=32, kernel_size=(3, 3), strides=2, name="deconv2d_4")(y) y = Conv2DTranspose(filters=32, kernel_size=(3, 3), strides=2, name="deconv2d_5")(y) y = Conv2DTranspose(filters=1, kernel_size=(3, 3), strides=2, name="img_out")(y) x = Flatten(name='flattened')(x) x = Dense(256, activation='relu')(x) x = Dropout(drop)(x) x = Dense(100, activation='relu')(x) x = Dropout(drop)(x) x = Dense(50, activation='relu')(x) x = Dropout(drop)(x) outputs = [y] for i in range(num_outputs): outputs.append( Dense(1, activation='linear', name='n_outputs' + str(i))(x)) model = Model(inputs=[img_in], outputs=outputs) return model
def default_categorical(input_shape=(120, 160, 3), roi_crop=(0, 0)): opt = keras.optimizers.Adam() drop = 0.2 #we now expect that cropping done elsewhere. we will adjust our expeected image size here: #input_shape = adjust_input_shape(input_shape, roi_crop) img_in = Input( shape=input_shape, name='img_in' ) # First layer, input layer, Shape comes from camera.py resolution, RGB x = img_in x = Convolution2D( 24, (5, 5), strides=(2, 2), activation='relu', name="conv2d_1" )( x ) # 24 features, 5 pixel x 5 pixel kernel (convolution, feauture) window, 2wx2h stride, relu activation x = Dropout(drop)( x ) # Randomly drop out (turn off) 10% of the neurons (Prevent overfitting) x = Convolution2D( 32, (5, 5), strides=(2, 2), activation='relu', name="conv2d_2" )(x) # 32 features, 5px5p kernel window, 2wx2h stride, relu activatiion x = Dropout(drop)( x ) # Randomly drop out (turn off) 10% of the neurons (Prevent overfitting) if input_shape[0] > 32: x = Convolution2D( 64, (5, 5), strides=(2, 2), activation='relu', name="conv2d_3")( x) # 64 features, 5px5p kernal window, 2wx2h stride, relu else: x = Convolution2D( 64, (3, 3), strides=(1, 1), activation='relu', name="conv2d_3")( x) # 64 features, 5px5p kernal window, 2wx2h stride, relu if input_shape[0] > 64: x = Convolution2D( 64, (3, 3), strides=(2, 2), activation='relu', name="conv2d_4")( x) # 64 features, 3px3p kernal window, 2wx2h stride, relu elif input_shape[0] > 32: x = Convolution2D( 64, (3, 3), strides=(1, 1), activation='relu', name="conv2d_4")( x) # 64 features, 3px3p kernal window, 2wx2h stride, relu x = Dropout(drop)( x ) # Randomly drop out (turn off) 10% of the neurons (Prevent overfitting) x = Convolution2D( 64, (3, 3), strides=(1, 1), activation='relu', name="conv2d_5")( x) # 64 features, 3px3p kernal window, 1wx1h stride, relu x = Dropout(drop)( x ) # Randomly drop out (turn off) 10% of the neurons (Prevent overfitting) # Possibly add MaxPooling (will make it less sensitive to position in image). Camera angle fixed, so may not to be needed x = Flatten(name='flattened')(x) # Flatten to 1D (Fully connected) x = Dense(100, activation='relu', name="fc_1")( x) # Classify the data into 100 features, make all negatives 0 x = Dropout(drop)( x ) # Randomly drop out (turn off) 10% of the neurons (Prevent overfitting) x = Dense(50, activation='relu', name="fc_2")( x) # Classify the data into 50 features, make all negatives 0 x = Dropout(drop)( x) # Randomly drop out 10% of the neurons (Prevent overfitting) #categorical output of the angle angle_out = Dense(15, activation='softmax', name='angle_out')( x ) # Connect every input with every output and output 15 hidden units. Use Softmax to give percentage. 15 categories and find best one based off percentage 0.0-1.0 #continous output of throttle throttle_out = Dense(20, activation='softmax', name='throttle_out')( x) # Reduce to 1 number, Positive number only model = Model(inputs=[img_in], outputs=[out]) #[angle_out, throttle_out]) return model
def Deeplabv3(weights='pascal_voc', input_tensor=None, input_shape=(512, 512, 3), classes=21, backbone='mobilenetv2', OS=16, alpha=1., activation=None): """ Instantiates the Deeplabv3+ architecture Optionally loads weights pre-trained on PASCAL VOC or Cityscapes. This model is available for TensorFlow only. # Arguments weights: one of 'pascal_voc' (pre-trained on pascal voc), 'cityscapes' (pre-trained on cityscape) or None (random initialization) input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. input_shape: shape of input image. format HxWxC PASCAL VOC model was trained on (512,512,3) images. None is allowed as shape/width classes: number of desired classes. PASCAL VOC has 21 classes, Cityscapes has 19 classes. If number of classes not aligned with the weights used, last layer is initialized randomly backbone: backbone to use. one of {'xception','mobilenetv2'} activation: optional activation to add to the top of the network. One of 'softmax', 'sigmoid' or None OS: determines input_shape/feature_extractor_output ratio. One of {8,16}. Used only for xception backbone. alpha: controls the width of the MobileNetV2 network. This is known as the width multiplier in the MobileNetV2 paper. - If `alpha` < 1.0, proportionally decreases the number of filters in each layer. - If `alpha` > 1.0, proportionally increases the number of filters in each layer. - If `alpha` = 1, default number of filters from the paper are used at each layer. Used only for mobilenetv2 backbone. Pretrained is only available for alpha=1. # Returns A Keras model instance. # Raises RuntimeError: If attempting to run this model with a backend that does not support separable convolutions. ValueError: in case of invalid argument for `weights` or `backbone` """ if not (weights in {'pascal_voc', 'cityscapes', None}): raise ValueError( 'The `weights` argument should be either ' '`None` (random initialization), `pascal_voc`, or `cityscapes` ' '(pre-trained on PASCAL VOC)') if not (backbone in {'xception', 'mobilenetv2'}): raise ValueError('The `backbone` argument should be either ' '`xception` or `mobilenetv2` ') if input_tensor is None: img_input = Input(shape=input_shape) else: img_input = input_tensor if backbone == 'xception': if OS == 8: entry_block3_stride = 1 middle_block_rate = 2 # ! Not mentioned in paper, but required exit_block_rates = (2, 4) atrous_rates = (12, 24, 36) else: entry_block3_stride = 2 middle_block_rate = 1 exit_block_rates = (1, 2) atrous_rates = (6, 12, 18) x = Conv2D(32, (3, 3), strides=(2, 2), name='entry_flow_conv1_1', use_bias=False, padding='same')(img_input) x = BatchNormalization(name='entry_flow_conv1_1_BN')(x) x = Activation('relu')(x) x = _conv2d_same(x, 64, 'entry_flow_conv1_2', kernel_size=3, stride=1) x = BatchNormalization(name='entry_flow_conv1_2_BN')(x) x = Activation('relu')(x) x = _xception_block(x, [128, 128, 128], 'entry_flow_block1', skip_connection_type='conv', stride=2, depth_activation=False) x, skip1 = _xception_block(x, [256, 256, 256], 'entry_flow_block2', skip_connection_type='conv', stride=2, depth_activation=False, return_skip=True) x = _xception_block(x, [728, 728, 728], 'entry_flow_block3', skip_connection_type='conv', stride=entry_block3_stride, depth_activation=False) for i in range(16): x = _xception_block(x, [728, 728, 728], 'middle_flow_unit_{}'.format(i + 1), skip_connection_type='sum', stride=1, rate=middle_block_rate, depth_activation=False) x = _xception_block(x, [728, 1024, 1024], 'exit_flow_block1', skip_connection_type='conv', stride=1, rate=exit_block_rates[0], depth_activation=False) x = _xception_block(x, [1536, 1536, 2048], 'exit_flow_block2', skip_connection_type='none', stride=1, rate=exit_block_rates[1], depth_activation=True) else: OS = 8 first_block_filters = _make_divisible(32 * alpha, 8) x = Conv2D(first_block_filters, kernel_size=3, strides=(2, 2), padding='same', use_bias=False, name='Conv')(img_input) x = BatchNormalization(epsilon=1e-3, momentum=0.999, name='Conv_BN')(x) x = Activation(relu6, name='Conv_Relu6')(x) x = _inverted_res_block(x, filters=16, alpha=alpha, stride=1, expansion=1, block_id=0, skip_connection=False) x = _inverted_res_block(x, filters=24, alpha=alpha, stride=2, expansion=6, block_id=1, skip_connection=False) x = _inverted_res_block(x, filters=24, alpha=alpha, stride=1, expansion=6, block_id=2, skip_connection=True) x = _inverted_res_block(x, filters=32, alpha=alpha, stride=2, expansion=6, block_id=3, skip_connection=False) x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=4, skip_connection=True) x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=5, skip_connection=True) # stride in block 6 changed from 2 -> 1, so we need to use rate = 2 x = _inverted_res_block( x, filters=64, alpha=alpha, stride=1, # 1! expansion=6, block_id=6, skip_connection=False) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=2, expansion=6, block_id=7, skip_connection=True) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=2, expansion=6, block_id=8, skip_connection=True) x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=2, expansion=6, block_id=9, skip_connection=True) x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=2, expansion=6, block_id=10, skip_connection=False) x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=2, expansion=6, block_id=11, skip_connection=True) x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=2, expansion=6, block_id=12, skip_connection=True) x = _inverted_res_block( x, filters=160, alpha=alpha, stride=1, rate=2, # 1! expansion=6, block_id=13, skip_connection=False) x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, rate=4, expansion=6, block_id=14, skip_connection=True) x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, rate=4, expansion=6, block_id=15, skip_connection=True) x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1, rate=4, expansion=6, block_id=16, skip_connection=False) # end of feature extractor # branching for Atrous Spatial Pyramid Pooling # Image Feature branch shape_before = tf.shape(x) b4 = GlobalAveragePooling2D()(x) # from (b_size, channels)->(b_size, 1, 1, channels) b4 = Lambda(lambda x: K.expand_dims(x, 1))(b4) b4 = Lambda(lambda x: K.expand_dims(x, 1))(b4) b4 = Conv2D(256, (1, 1), padding='same', use_bias=False, name='image_pooling')(b4) b4 = BatchNormalization(name='image_pooling_BN', epsilon=1e-5)(b4) b4 = Activation('relu')(b4) # upsample. have to use compat because of the option align_corners size_before = tf.keras.backend.int_shape(x) b4 = Lambda(lambda x: tf.compat.v1.image.resize( x, size_before[1:3], method='bilinear', align_corners=True))(b4) # simple 1x1 b0 = Conv2D(256, (1, 1), padding='same', use_bias=False, name='aspp0')(x) b0 = BatchNormalization(name='aspp0_BN', epsilon=1e-5)(b0) b0 = Activation('relu', name='aspp0_activation')(b0) # there are only 2 branches in mobilenetV2. not sure why if backbone == 'xception': # rate = 6 (12) b1 = SepConv_BN(x, 256, 'aspp1', rate=atrous_rates[0], depth_activation=True, epsilon=1e-5) # rate = 12 (24) b2 = SepConv_BN(x, 256, 'aspp2', rate=atrous_rates[1], depth_activation=True, epsilon=1e-5) # rate = 18 (36) b3 = SepConv_BN(x, 256, 'aspp3', rate=atrous_rates[2], depth_activation=True, epsilon=1e-5) # concatenate ASPP branches & project x = Concatenate()([b4, b0, b1, b2, b3]) else: x = Concatenate()([b4, b0]) x = Conv2D(256, (1, 1), padding='same', use_bias=False, name='concat_projection')(x) x = BatchNormalization(name='concat_projection_BN', epsilon=1e-5)(x) x = Activation('relu')(x) x = Dropout(0.1)(x) # DeepLab v.3+ decoder if backbone == 'xception': # Feature projection # x4 (x2) block size_before2 = tf.keras.backend.int_shape(x) x = Lambda(lambda xx: tf.compat.v1.image.resize( xx, skip1.shape[1:3], method='bilinear', align_corners=True))(x) dec_skip1 = Conv2D(48, (1, 1), padding='same', use_bias=False, name='feature_projection0')(skip1) dec_skip1 = BatchNormalization(name='feature_projection0_BN', epsilon=1e-5)(dec_skip1) dec_skip1 = Activation('relu')(dec_skip1) x = Concatenate()([x, dec_skip1]) x = SepConv_BN(x, 256, 'decoder_conv0', depth_activation=True, epsilon=1e-5) x = SepConv_BN(x, 256, 'decoder_conv1', depth_activation=True, epsilon=1e-5) # you can use it with arbitary number of classes if (weights == 'pascal_voc' and classes == 21) or (weights == 'cityscapes' and classes == 19): last_layer_name = 'logits_semantic' else: last_layer_name = 'custom_logits_semantic' #最后一层改变数量即可. x = Conv2D(classes, (1, 1), padding='same', name=last_layer_name)(x) size_before3 = tf.keras.backend.int_shape(img_input) x = Lambda(lambda xx: tf.compat.v1.image.resize( xx, #利用这行代码变换回之前的size size_before3[1:3], method='bilinear', align_corners=True))(x) # Ensure that the model takes into account # any potential predecessors of `input_tensor`. if input_tensor is not None: inputs = get_source_inputs(input_tensor) else: inputs = img_input if activation in {'softmax', 'sigmoid'}: x = tf.keras.layers.Activation(activation)(x) model = Model(inputs, x, name='deeplabv3plus') #最后是一个类似分类模型.输出19个512,512的图片.每一个图片表示一个遮罩图. # load weights if weights == 'pascal_voc': if backbone == 'xception': weights_path = get_file( 'deeplabv3_xception_tf_dim_ordering_tf_kernels.h5', WEIGHTS_PATH_X, cache_subdir='models') else: weights_path = get_file( 'deeplabv3_mobilenetv2_tf_dim_ordering_tf_kernels.h5', WEIGHTS_PATH_MOBILE, cache_subdir='models') model.load_weights(weights_path, by_name=True) elif weights == 'cityscapes': if backbone == 'xception': #下行的cache_subdir必须写绝对路径. weights_path = get_file( 'deeplabv3_xception_tf_dim_ordering_tf_kernels_cityscapes.h5', WEIGHTS_PATH_X_CS, cache_dir='models', #表示文件夹所在的地方 cache_subdir='.') #表示没有下一级文件夹 else: weights_path = get_file( 'deeplabv3_mobilenetv2_tf_dim_ordering_tf_kernels_cityscapes.h5', WEIGHTS_PATH_MOBILE_CS, cache_dir='models', # 表示文件夹所在的地方 cache_subdir='.') model.load_weights(weights_path, by_name=True) return model
def __create_dense_net(nb_classes, img_input, include_top, depth=40, nb_dense_block=3, growth_rate=12, nb_filter=-1, nb_layers_per_block=-1, bottleneck=False, reduction=0.0, dropout_rate=None, weight_decay=1e-4, subsample_initial_block=False, pooling=None, activation='softmax', transition_pooling='avg', aux_depth=None, num_coarse_classes=None, aux_layouts=None): ''' Build the DenseNet model # Arguments nb_classes: number of classes img_input: tuple of shape (channels, rows, columns) or (rows, columns, channels) include_top: flag to include the final Dense layer depth: number or layers nb_dense_block: number of dense blocks to add to end (generally = 3) growth_rate: number of filters to add per dense block nb_filter: initial number of filters. Default -1 indicates initial number of filters is 2 * growth_rate nb_layers_per_block: number of layers in each dense block. Can be a -1, positive integer or a list. If -1, calculates nb_layer_per_block from the depth of the network. If positive integer, a set number of layers per dense block. If list, nb_layer is used as provided. Note that list size must be (nb_dense_block + 1) bottleneck: add bottleneck blocks reduction: reduction factor of transition blocks. Note : reduction value is inverted to compute compression dropout_rate: dropout rate weight_decay: weight decay rate subsample_initial_block: Changes model type to suit different datasets. Should be set to True for ImageNet, and False for CIFAR datasets. When set to True, the initial convolution will be strided and adds a MaxPooling2D before the initial dense block. pooling: Optional pooling mode for feature extraction when `include_top` is `False`. - `None` means that the output of the model will be the 4D tensor output of the last convolutional layer. - `avg` means that global average pooling will be applied to the output of the last convolutional layer, and thus the output of the model will be a 2D tensor. - `max` means that global max pooling will be applied. activation: Type of activation at the top layer. Can be one of 'softmax' or 'sigmoid'. Note that if sigmoid is used, classes must be 1. transition_pooling: `avg` for avg pooling (default), `max` for max pooling, None for no pooling during scale transition blocks. Please note that this default differs from the DenseNetFCN paper in accordance with the DenseNet paper. # Returns a keras tensor # Raises ValueError: in case of invalid argument for `reduction` or `nb_dense_block` ''' with K.name_scope('DenseNet'): concat_axis = 1 if 'channels_last' == 'channels_first' else -1 if reduction != 0.0: if not (reduction <= 1.0 and reduction > 0.0): raise ValueError('`reduction` value must lie between 0.0 and 1.0') if depth == 121: nb_layers_per_block = [6,12,24,16] elif depth == 169: nb_layers_per_block = [6, 12, 32, 32] elif depth == 201: nb_layers_per_block = [6, 12, 48, 32] elif depth == 264: nb_layers_per_block = [6, 12, 64, 32] # layers in each dense block if type(nb_layers_per_block) is list or type(nb_layers_per_block) is tuple: nb_layers = list(nb_layers_per_block) # Convert tuple to list if len(nb_layers) != nb_dense_block: raise ValueError('If `nb_dense_block` is a list, its length must match ' 'the number of layers provided by `nb_layers`.') final_nb_layer = nb_layers[-1] nb_layers = nb_layers[:-1] else: if nb_layers_per_block == -1: assert (depth - 4) % 3 == 0, ('Depth must be 3 N + 4 ' 'if nb_layers_per_block == -1') count = int((depth - 4) / 3) if bottleneck: count = count // 2 nb_layers = [count for _ in range(nb_dense_block)] final_nb_layer = count else: final_nb_layer = nb_layers_per_block nb_layers = [nb_layers_per_block] * nb_dense_block # compute initial nb_filter if -1, else accept users initial nb_filter if nb_filter <= 0: nb_filter = 2 * growth_rate # compute compression factor compression = 1.0 - reduction # Initial convolution if subsample_initial_block: initial_kernel = (7, 7) initial_strides = (2, 2) else: initial_kernel = (3, 3) initial_strides = (1, 1) x = Conv2D(nb_filter, initial_kernel, kernel_initializer=init, padding='same', name='initial_conv2D', strides=initial_strides, use_bias=False, kernel_regularizer=l2(weight_decay))(img_input) if subsample_initial_block: x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5, name='initial_bn')(x) x = Activation('relu')(x) x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) curr_id = 0 aux_list = [] # Add dense blocks for block_idx in range(nb_dense_block - 1): # adds AUXi.0 to AUX<nb_dense_block-1>.<nb_layers-1> x, nb_filter, aux_list_temp, curr_id = __dense_block(x, nb_layers[block_idx], nb_filter, growth_rate, bottleneck=bottleneck, dropout_rate=dropout_rate, weight_decay=weight_decay, block_prefix='dense_%i' % block_idx, block_id=block_idx, curr_id=curr_id, aux_depth=aux_depth, num_coarse_classes=num_coarse_classes, aux_layouts=aux_layouts) aux_list = aux_list + aux_list_temp # adds AUXi.<nb_layers> aux_list_temp, curr_id = apply_aux(x, major_id=block_idx+1, minor_id=nb_layers[block_idx], curr_id=curr_id, aux_depth=aux_depth, num_coarse_classes=num_coarse_classes, aux_layouts=aux_layouts, initialization=aux_initia) aux_list = aux_list + aux_list_temp aux_list_temp, curr_id = apply_aux(x, major_id=block_idx + 1, minor_id=-1, curr_id=curr_id, aux_depth=aux_depth, num_coarse_classes=num_coarse_classes, aux_layouts=aux_layouts, initialization=aux_initia) aux_list = aux_list + aux_list_temp # add transition_block x = __transition_block(x, nb_filter, compression=compression, weight_decay=weight_decay, block_prefix='tr_%i' % block_idx, transition_pooling=transition_pooling) nb_filter = int(nb_filter * compression) # add AUX<nb_dense_block>.0 (AUX3.0 for CIFAR) aux_list_temp, curr_id = apply_aux(x, major_id=nb_dense_block, minor_id=0, curr_id=curr_id, aux_depth=aux_depth, num_coarse_classes=num_coarse_classes, aux_layouts=aux_layouts, initialization=aux_initia) aux_list = aux_list + aux_list_temp # The last dense_block does not have a transition_block x, nb_filter, aux_list_temp, curr_id = __dense_block(x, final_nb_layer, nb_filter, growth_rate, bottleneck=bottleneck, dropout_rate=dropout_rate, weight_decay=weight_decay, block_prefix='dense_%i' % (nb_dense_block - 1), block_id=nb_dense_block, curr_id=curr_id, aux_depth=aux_depth, num_coarse_classes=num_coarse_classes, aux_layouts=aux_layouts) x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5, name='final_bn')(x) x = Activation('relu')(x) if include_top: if pooling == 'avg': x = GlobalAveragePooling2D()(x) elif pooling == 'max': x = GlobalMaxPooling2D()(x) x = Dense(nb_classes, activation=activation, name='output', kernel_regularizer=l2(weight_decay))(x) if len(aux_depth) > 0: model = Model(inputs=img_input, outputs=[x] + aux_list) else: model = Model(inputs=img_input, outputs=x) else: raise NotImplementedError return model
def WDL_UDG(linear_feature_columns, dnn_feature_columns, untrain_feature_columns, dnn_hidden_units=(128, 128), l2_reg_linear=1e-5, l2_reg_embedding=1e-5, l2_reg_dnn=0, seed=1024, dnn_dropout=0, dnn_activation='relu', task='binary', uid_feature_name='', udg_embedding_size=128): """Instantiates the Wide&Deep Learning architecture. :param linear_feature_columns: An iterable containing all the features used by linear part of the model. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN :param l2_reg_linear: float. L2 regularizer strength applied to wide part :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param seed: integer ,to use as random seed. :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param dnn_activation: Activation function to use in DNN :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ features = build_input_features(linear_feature_columns + dnn_feature_columns) uid_features = OrderedDict() uid_features[uid_feature_name] = features[uid_feature_name] uid_feature_columns = [ x for x in linear_feature_columns if x.name == uid_feature_name ] inputs_list = list(features.values()) linear_logit = get_linear_logit(features, linear_feature_columns, untrain_feature_columns, seed=seed, prefix='linear', l2_reg=l2_reg_linear) sparse_embedding_list, dense_value_list, untrain_embedding_list = input_from_feature_columns( features, dnn_feature_columns, untrain_feature_columns, l2_reg_embedding, seed) uid_embedding_dict = create_embedding_dict(uid_feature_columns, [], 0.00001, prefix='udg_', seq_mask_zero=True) uid_emb_list = embedding_lookup(uid_embedding_dict, uid_features, uid_feature_columns, [], return_feat_list=[], to_list=True) uid_emb_list = uid_emb_list + untrain_embedding_list dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list, udg_label=1, udg_embedding_list=uid_emb_list) print(dnn_input) udg_embedding_size = (len(untrain_feature_columns) + 1) * udg_embedding_size dnn_out = DNN_UDG(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, False, seed, udg_embedding_size=udg_embedding_size)(dnn_input) dnn_logit = Dense(1, use_bias=False, activation=None)(dnn_out) final_logit = add_func([dnn_logit, linear_logit]) output = PredictionLayer(task)(final_logit) model = Model(inputs=inputs_list, outputs=output) return model
def cnn_lstm_ctc_pred_model(height, nclass, width=None): tensors = { 'the_input': None, 'the_labels': None, 'input_length': None, 'label_length': None, 'target_tensor': None } rnnunit = 256 inputs = Input(shape=(height, width, 1), name='the_input', tensor=tensors['the_input']) m = cnn_feature_extractor(inputs) model_is_datetime = keras.models.Model( [inputs], [Activation('sigmoid')(Dense(1)(GlobalAveragePooling2D()(m)))]) #2. bi-lstm layers m = Permute((2, 1, 3), name='permute')(m) m = TimeDistributed(Flatten(), name='timedistrib')(m) m = Bidirectional(GRU(rnnunit, return_sequences=True), name='blstm1')(m) m = Dense(rnnunit, name='blstm1_out', activation='linear')(m) m = Bidirectional(GRU(rnnunit, return_sequences=True), name='blstm2')(m) if not nclass == 3811: m = BatchNormalization()(m) y_pred = Dense(nclass, name='blstm2_out', activation='softmax')(m) #3. CTC loss compute labels = Input(name='the_labels', shape=[ None, ], dtype='float32', tensor=tensors['the_labels']) input_length = Input(name='input_length', shape=[1], dtype='int64', tensor=tensors['input_length']) label_length = Input(name='label_length', shape=[1], dtype='int64', tensor=tensors['label_length']) loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name='ctc')([y_pred, labels, input_length, label_length]) model = Model(inputs=[inputs, labels, input_length, label_length], outputs=[loss_out]) adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.9) fn_compile = lambda: model.compile(loss={ 'ctc': lambda y_true, y_pred: y_pred }, metrics=['acc'], optimizer=adam, target_tensors= [tensors['target_tensor']]) test_func = K.function([inputs], [y_pred, model_is_datetime.outputs[0]]) return {'model':model, \ 'test_func':test_func, \ 'fn_compile':fn_compile, \ 'model_is_datetime':model_is_datetime}
def create_embedding_model(self): """ """ sparse_emb_list = [] sparse_input_list = [] merge_input_len = 0 i = 0 for sparse_feature in USED_CATEGORY_FEATURES: sparse_input = Input(shape=(1, ), name=sparse_feature) sparse_input_list.append(sparse_input) max_id = self.cat_max[sparse_feature] emb_dim = self.emb_dim[i] #SPARSE_FEATURES[sparse_feature]["emb"] i += 1 sparse_embedding = Embedding( max_id + 1, emb_dim, input_length=1, trainable=True, name=sparse_feature + "_emb", #embeddings_regularizer = l1(0) )(sparse_input) sparse_embedding = Reshape((emb_dim, ))(sparse_embedding) sparse_emb_list.append(sparse_embedding) merge_input_len += emb_dim # for i in range(len(USED_CATEGORY_FEATURES)): # sparse_feature = USED_CATEGORY_FEATURES[i] # if sparse_feature == 'ip': # continue # sparse_input = sparse_input_list[i] # max_id = SPARSE_FEATURES[sparse_feature]["max"] # emb_dim = self.emb_dim[i] #SPARSE_FEATURES[sparse_feature]["emb"] # i += 1 # sparse_embedding = Embedding(max_id + 1, emb_dim, input_length = 1, name = sparse_feature + "_emb_trainable")(sparse_input) # sparse_embedding = Reshape((emb_dim,))(sparse_embedding) # sparse_emb_list.append(sparse_embedding) # merge_input_len += emb_dim merge_sparse_emb = Concatenate( name='merge_sparse_emb_trainable')(sparse_emb_list) dense_input = Input(shape=(self.dense_input_len, )) norm_dense_input = BatchNormalization( name='Dense_BN_trainable')(dense_input) desc_seq = Input(shape=(self.max_len[0], )) desc_cnn_conc = self.Create_CNN(desc_seq, name_suffix='_desc') title_seq = Input(shape=(self.max_len[1], )) title_cnn_conc = self.Create_CNN(title_seq, name_suffix='_title') merge_input = Concatenate(name = 'merge_input_trainable')([merge_sparse_emb, norm_dense_input, \ desc_cnn_conc, title_cnn_conc ]) dense_output = self.full_connect_layer(merge_input) deep_pre_sigmoid = Dense( 1, name='deep_pre_sigmoid_trainable')(dense_output) proba = Activation('sigmoid', name='proba_trainable')( deep_pre_sigmoid) #Add()([wide_pre_sigmoid, deep_pre_sigmoid])) model = Model(sparse_input_list + [dense_input, \ # desc_seq, title_seq ], proba) model.compile(optimizer='adam', loss='mean_squared_error') #, metrics = ['accuracy']) # k_model = load_model('../Data/model_allSparse_09763.h5') # print (k_model.summary()) # model.load_weights('../Data/model_allSparse_09763.h5', by_name=True) return model
def process_images_thru_tl(batch_size=32, input1=1024, input2=1024, model_name="vgg"): with tf.device("/device:GPU:1"): if model_name == "vgg": model = VGG16(weights = "imagenet", include_top=False, input_shape = [224, 224, 3]) size = 224 elif model_name == "inceptionv3": model = InceptionV3(weights = "imagenet", include_top=False, input_shape = [299, 299, 3]) size = 299 elif model_name == "resnet50": model = ResNet50(weights = "imagenet", include_top=False, input_shape = [224, 224, 3]) size = 224 elif model_name == "mobilenet": model = ResNet50(weights = "imagenet", include_top=False, input_shape = [224, 224, 3]) size = 224 elif model_name == "xception": model = Xception(weights = "imagenet", include_top=False) size = 299 print("%s %d %d %d" % (model_name, input1, input2, batch_size)) model.summary() model.get_weights() labels = [] batch = [] # input_ = Input(shape=(size,size,3),name = 'image_input') output_ = model.output with tf.device("/device:GPU:1"): if model_name == "inceptionv3" or model_name == "xception": x = GlobalAveragePooling2D(name='avg_pool')(output_) else: x = Flatten(name='flatten')(output_) if input1 != 0: x = Dense(input1, activation='relu', name='fc1')(x) if input2 != 0: x = Dense(input2, activation='relu', name='fc2')(x) x = Dense(128, activation='softmax', name='predictions')(x) for layer in model.layers: layer.trainable = False my_model = Model(inputs=output_, outputs=x) my_model.summary() if os.path.exists("weights_%s_%d_%d_%d.h5" % (model_name, input1, input2, batch_size)): my_model.load_weights("weights_%s_%d_%d_%d.h5" % (model_name, input1, input2, batch_size)) my_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) train_generator = get_train_data.train_generator(img_size=size, batch_size=batch_size) valid_generator = get_train_data.valid_generator(img_size=size, batch_size=8) csv_logger = CSVLogger('log.csv', append=True, separator=',') my_model.fit_generator( train_generator, steps_per_epoch=2000,#1000 epochs=10, validation_data=valid_generator, validation_steps=200,#200 callbacks=[csv_logger]) my_model.save_weights("weights_%s_%d_%d_%d.h5" % (model_name, input1, input2, batch_size))
def YoutubeDNN( user_feature_columns, item_feature_columns, num_sampled=5, user_dnn_hidden_units=(64, 16), dnn_activation='relu', dnn_use_bn=False, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024, ): """Instantiates the YoutubeDNN Model architecture. :param user_feature_columns: An iterable containing user's features used by the model. :param item_feature_columns: An iterable containing item's features used by the model. :param num_sampled: int, the number of classes to randomly sample per batch. :param user_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of user tower :param dnn_activation: Activation function to use in deep net :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :return: A Keras model instance. """ if len(item_feature_columns) > 1: raise ValueError( "Now YoutubeNN only support 1 item feature like item_id") item_feature_name = item_feature_columns[0].name #todo 创建user的Embedding和item的Embedding embedding_matrix_dict = create_embedding_matrix(user_feature_columns + item_feature_columns, l2_reg_embedding, init_std, seed, prefix="") #todo 输入特征的构造 返回值是字典 user_features = build_input_features(user_feature_columns) user_inputs_list = list(user_features.values()) user_sparse_embedding_list, user_dense_value_list = input_from_feature_columns( user_features, user_feature_columns, l2_reg_embedding, init_std, seed, embedding_matrix_dict=embedding_matrix_dict) user_dnn_input = combined_dnn_input(user_sparse_embedding_list, user_dense_value_list) item_features = build_input_features(item_feature_columns) item_inputs_list = list(item_features.values()) user_dnn_out = DNN( user_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed, )(user_dnn_input) item_embedding = embedding_matrix_dict[item_feature_name] output = SampledSoftmaxLayer(item_embedding, num_sampled=num_sampled)( inputs=(user_dnn_out, item_features[item_feature_name])) model = Model(inputs=user_inputs_list + item_inputs_list, outputs=output) model.__setattr__("user_input", user_inputs_list) model.__setattr__("user_embedding", user_dnn_out) model.__setattr__("item_input", item_inputs_list) model.__setattr__( "item_embedding", get_item_embedding(item_embedding, item_features[item_feature_name])) return model
def Create_2DCNN(self): """ """ inp = Input(shape=(self.max_len, )) fixed_embedding = Embedding(self.max_token, self.embedding_dim, weights=[self.embedding_weight], trainable=False) # retrain_embedding = Embedding(self.max_token, self.embedding_dim, weights=[self.embedding_weight] , trainable=True) fixed_x = fixed_embedding(inp) x = Bidirectional(GRU(self.context_vector_dim, return_sequences=True))(fixed_x) # retrain_x = retrain_embedding(inp) # x = Lambda(lambda x: backend.stack([x[0], x[1]], axis = 1))([fixed_x, retrain_x]) # x = SpatialDropout1D(0.2)(x) x = Reshape((self.max_len, self.context_vector_dim * 2, 1))(x) # x = Conv2D(filters = self.filter_size, kernel_size = [3, 3], activation = 'relu', \ # data_format = 'channels_last', padding='same')(x) x1 = self.pooling2d_blend(x, pool_size=(10, 1), strides=None, padding='valid') x2 = self.pooling2d_blend(x, pool_size=(20, 1), strides=None, padding='valid') # x = Conv2D(filters = self.filter_size, kernel_size = [3, 3], activation = 'relu', \ # data_format = 'channels_last', padding='same')(x) # x = self.pooling2d_blend(x, pool_size = (2, 2)) # x = Conv2D(filters = self.filter_size, kernel_size = [3, 3], activation = 'relu', \ # data_format = 'channels_last', padding='same')(x) # x = self.pooling2d_blend(x, pool_size = (5, 5)) # kernel2_maps = Conv1D(filters = 50, kernel_size = 2, activation = 'linear')(x) # kernel2_maps_act = self.act_blend(kernel2_maps) # kernel2_conc = self.pooling_blend(kernel2_maps_act) # kernel3_maps = Conv1D(filters = 50, kernel_size = 3, activation = 'linear')(x) # kernel3_maps_act = self.act_blend(kernel3_maps) # kernel3_conc = self.pooling_blend(kernel3_maps_act) # kernel4_maps = Conv1D(filters = 50, kernel_size = 4, activation = 'linear')(x) # kernel4_maps_act = self.act_blend(kernel4_maps) # kernel4_conc = self.pooling_blend(kernel4_maps_act) conc = concatenate([x1, x2], axis=1) # conc = self.pooling_blend(x) # full_conv_pre_act_0 = Dense(self.hidden_dim[0])(conc) # full_conv_0 = self.act_blend(full_conv_pre_act_0) # full_conv_pre_act_1 = Dense(self.hidden_dim[1])(full_conv_0) # full_conv_1 = self.act_blend(full_conv_pre_act_1) flat = Flatten()(conc) outp = Dense(6, activation="sigmoid")(flat) model = Model(inputs=inp, outputs=outp) print(model.summary()) model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"]) return model
def build_model(initial_filters, size_final_dense): # Input Layer image_input = Input(shape=( image_height, image_width, image_depth )) # Final element is number of channels, set as 1 for greyscale #x = BatchNormalization()(image_input) ### Block 1 # Convolutional Layer 1 x = Conv2D(filters=initial_filters, kernel_size=(3, 3), activation='relu', padding='same')(image_input) #x = BatchNormalization()(x) # Convolutional Layer 2 x = Conv2D(filters=initial_filters, kernel_size=(3, 3), activation='relu', padding='same')(x) #x = BatchNormalization()(x) # Pooling Layer 1 - halve spatial dimension x = MaxPooling2D(pool_size=(2, 2))(x) ### Block 2 # Convolutional Layer 3 - double number of filters x = Conv2D(filters=initial_filters * 2, kernel_size=(3, 3), activation='relu', padding='same')(x) #x = BatchNormalization()(x) # Convolutional Layer 4 x = Conv2D(filters=initial_filters * 2, kernel_size=(3, 3), activation='relu', padding='same')(x) #x = BatchNormalization()(x) # Pooling Layer 2 - halve spatial dimension x = MaxPooling2D(pool_size=(2, 2))(x) ### Block 3 # # Convolutional Layer 5 - double number of filters # x = Conv2D( filters = initial_filters*2*2 # , kernel_size = (3,3) # , activation='relu' # , padding='same' )(x) # #x = BatchNormalization()(x) # # # Convolutional Layer 6 # x = Conv2D( filters = initial_filters*2*2 # , kernel_size = (3,3) # , activation='relu' # , padding='same' )(x) # #x = BatchNormalization()(x) # # # Pooling Layer 3 - halve spatial dimension # x = MaxPooling2D(pool_size = (2,2))(x) # Dense Layer x = Flatten()(x) x = Dense(size_final_dense, activation='relu')(x) # Output Layer out = Dense(num_classes, activation='softmax')(x) # Task is binary classification model = Model(image_input, out) return (model)
def NCF(user_feature_columns, item_feature_columns, user_gmf_embedding_dim=20, item_gmf_embedding_dim=20, user_mlp_embedding_dim=20, item_mlp_embedding_dim=20, dnn_use_bn=False, dnn_hidden_units=(64, 16), dnn_activation='relu', l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024): """Instantiates the NCF Model architecture. :param user_feature_columns: A dict containing user's features and features'dim. :param item_feature_columns: A dict containing item's features and features'dim. :param user_gmf_embedding_dim: int. :param item_gmf_embedding_dim: int. :param user_mlp_embedding_dim: int. :param item_mlp_embedding_dim: int. :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param dnn_activation: Activation function to use in deep net :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :return: A Keras model instance. """ user_dim = len(user_feature_columns) * user_gmf_embedding_dim item_dim = len(item_feature_columns) * item_gmf_embedding_dim dim = (user_dim * item_dim) / (math.gcd(user_dim, item_dim)) user_gmf_embedding_dim = int(dim / len(user_feature_columns)) item_gmf_embedding_dim = int(dim / len(item_feature_columns)) # Generalized Matrix Factorization (GMF) Part user_gmf_feature_columns = [ SparseFeat(feat, vocabulary_size=size, embedding_dim=user_gmf_embedding_dim) for feat, size in user_feature_columns.items() ] user_features = build_input_features(user_gmf_feature_columns) user_inputs_list = list(user_features.values()) user_gmf_sparse_embedding_list, user_gmf_dense_value_list = input_from_feature_columns( user_features, user_gmf_feature_columns, l2_reg_embedding, init_std, seed, prefix='gmf_') user_gmf_input = combined_dnn_input(user_gmf_sparse_embedding_list, []) user_gmf_out = Lambda(lambda x: x, name="user_gmf_embedding")(user_gmf_input) item_gmf_feature_columns = [ SparseFeat(feat, vocabulary_size=size, embedding_dim=item_gmf_embedding_dim) for feat, size in item_feature_columns.items() ] item_features = build_input_features(item_gmf_feature_columns) item_inputs_list = list(item_features.values()) item_gmf_sparse_embedding_list, item_gmf_dense_value_list = input_from_feature_columns( item_features, item_gmf_feature_columns, l2_reg_embedding, init_std, seed, prefix='gmf_') item_gmf_input = combined_dnn_input(item_gmf_sparse_embedding_list, []) item_gmf_out = Lambda(lambda x: x, name="item_gmf_embedding")(item_gmf_input) gmf_out = Multiply()([user_gmf_out, item_gmf_out]) # Multi-Layer Perceptron (MLP) Part user_mlp_feature_columns = [ SparseFeat(feat, vocabulary_size=size, embedding_dim=user_mlp_embedding_dim) for feat, size in user_feature_columns.items() ] user_mlp_sparse_embedding_list, user_mlp_dense_value_list = input_from_feature_columns( user_features, user_mlp_feature_columns, l2_reg_embedding, init_std, seed, prefix='mlp_') user_mlp_input = combined_dnn_input(user_mlp_sparse_embedding_list, user_mlp_dense_value_list) user_mlp_out = Lambda(lambda x: x, name="user_mlp_embedding")(user_mlp_input) item_mlp_feature_columns = [ SparseFeat(feat, vocabulary_size=size, embedding_dim=item_mlp_embedding_dim) for feat, size in item_feature_columns.items() ] item_mlp_sparse_embedding_list, item_mlp_dense_value_list = input_from_feature_columns( item_features, item_mlp_feature_columns, l2_reg_embedding, init_std, seed, prefix='mlp_') item_mlp_input = combined_dnn_input(item_mlp_sparse_embedding_list, item_mlp_dense_value_list) item_mlp_out = Lambda(lambda x: x, name="item_mlp_embedding")(item_mlp_input) mlp_input = Concatenate(axis=1)([user_mlp_out, item_mlp_out]) mlp_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed, name="mlp_embedding")(mlp_input) # Fusion of GMF and MLP neumf_input = Concatenate(axis=1)([gmf_out, mlp_out]) neumf_out = DNN(hidden_units=[1], activation='sigmoid')(neumf_input) output = Lambda(lambda x: x, name='neumf_out')(neumf_out) # output = PredictionLayer(task, False)(neumf_out) model = Model(inputs=user_inputs_list + item_inputs_list, outputs=output) return model
def unet_512(input_shape=(512, 512, 3), num_classes=1, dropout=0.25): inputs = Input(shape=input_shape) # s = Lambda(lambda x: x / 255) (inputs) # # 512 down0a = Conv2D(16, (3, 3), padding='same')(inputs) down0a = BatchNormalization()(down0a) down0a = Activation('relu')(down0a) down0a = Conv2D(16, (3, 3), padding='same')(down0a) down0a = BatchNormalization()(down0a) down0a = Activation('relu')(down0a) down0a_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0a) # 256 down0 = Conv2D(32, (3, 3), padding='same')(down0a_pool) down0 = BatchNormalization()(down0) down0 = Activation('relu')(down0) down0 = Conv2D(32, (3, 3), padding='same')(down0) down0 = BatchNormalization()(down0) down0 = Activation('relu')(down0) down0_pool = MaxPooling2D((2, 2), strides=(2, 2))(down0) p2 = Dropout(dropout)(down0_pool) # 128 down1 = Conv2D(64, (3, 3), padding='same')(p2) down1 = BatchNormalization()(down1) down1 = Activation('relu')(down1) down1 = Conv2D(64, (3, 3), padding='same')(down1) down1 = BatchNormalization()(down1) down1 = Activation('relu')(down1) down1_pool = MaxPooling2D((2, 2), strides=(2, 2))(down1) p3 = Dropout(dropout)(down1_pool) # 64 down2 = Conv2D(128, (3, 3), padding='same')(p3) down2 = BatchNormalization()(down2) down2 = Activation('relu')(down2) down2 = Conv2D(128, (3, 3), padding='same')(down2) down2 = BatchNormalization()(down2) down2 = Activation('relu')(down2) down2_pool = MaxPooling2D((2, 2), strides=(2, 2))(down2) p4 = Dropout(dropout)(down2_pool) # 32 down3 = Conv2D(256, (3, 3), padding='same')(p4) down3 = BatchNormalization()(down3) down3 = Activation('relu')(down3) down3 = Conv2D(256, (3, 3), padding='same')(down3) down3 = BatchNormalization()(down3) down3 = Activation('relu')(down3) down3_pool = MaxPooling2D((2, 2), strides=(2, 2))(down3) p5 = Dropout(dropout)(down3_pool) # 16 down4 = Conv2D(512, (3, 3), padding='same')(p5) down4 = BatchNormalization()(down4) down4 = Activation('relu')(down4) down4 = Conv2D(512, (3, 3), padding='same')(down4) down4 = BatchNormalization()(down4) down4 = Activation('relu')(down4) down4_pool = MaxPooling2D((2, 2), strides=(2, 2))(down4) p6 = Dropout(dropout)(down4_pool) # 8 center = Conv2D(1024, (3, 3), padding='same')(p6) center = BatchNormalization()(center) center = Activation('relu')(center) center = Conv2D(1024, (3, 3), padding='same')(center) center = BatchNormalization()(center) center = Activation('relu')(center) # center up4 = UpSampling2D((2, 2))(center) up4 = concatenate([down4, up4], axis=3) p7 = Dropout(dropout)(up4) up4 = Conv2D(512, (3, 3), padding='same')(p7) up4 = BatchNormalization()(up4) up4 = Activation('relu')(up4) up4 = Conv2D(512, (3, 3), padding='same')(up4) up4 = BatchNormalization()(up4) up4 = Activation('relu')(up4) up4 = Conv2D(512, (3, 3), padding='same')(up4) up4 = BatchNormalization()(up4) up4 = Activation('relu')(up4) # 16 up3 = UpSampling2D((2, 2))(up4) up3 = concatenate([down3, up3], axis=3) p8 = Dropout(dropout)(up3) up3 = Conv2D(256, (3, 3), padding='same')(p8) up3 = BatchNormalization()(up3) up3 = Activation('relu')(up3) up3 = Conv2D(256, (3, 3), padding='same')(up3) up3 = BatchNormalization()(up3) up3 = Activation('relu')(up3) up3 = Conv2D(256, (3, 3), padding='same')(up3) up3 = BatchNormalization()(up3) up3 = Activation('relu')(up3) # 32 up2 = UpSampling2D((2, 2))(up3) up2 = concatenate([down2, up2], axis=3) p9 = Dropout(dropout)(up2) up2 = Conv2D(128, (3, 3), padding='same')(p9) up2 = BatchNormalization()(up2) up2 = Activation('relu')(up2) up2 = Conv2D(128, (3, 3), padding='same')(up2) up2 = BatchNormalization()(up2) up2 = Activation('relu')(up2) up2 = Conv2D(128, (3, 3), padding='same')(up2) up2 = BatchNormalization()(up2) up2 = Activation('relu')(up2) # 64 up1 = UpSampling2D((2, 2))(up2) up1 = concatenate([down1, up1], axis=3) p10 = Dropout(dropout)(up1) up1 = Conv2D(64, (3, 3), padding='same')(p10) up1 = BatchNormalization()(up1) up1 = Activation('relu')(up1) up1 = Conv2D(64, (3, 3), padding='same')(up1) up1 = BatchNormalization()(up1) up1 = Activation('relu')(up1) up1 = Conv2D(64, (3, 3), padding='same')(up1) up1 = BatchNormalization()(up1) up1 = Activation('relu')(up1) # 128 up0 = UpSampling2D((2, 2))(up1) up0 = concatenate([down0, up0], axis=3) p11 = Dropout(dropout)(up0) up0 = Conv2D(32, (3, 3), padding='same')(p11) up0 = BatchNormalization()(up0) up0 = Activation('relu')(up0) up0 = Conv2D(32, (3, 3), padding='same')(up0) up0 = BatchNormalization()(up0) up0 = Activation('relu')(up0) up0 = Conv2D(32, (3, 3), padding='same')(up0) up0 = BatchNormalization()(up0) up0 = Activation('relu')(up0) # 256 up0a = UpSampling2D((2, 2))(up0) up0a = concatenate([down0a, up0a], axis=3) up0a = Conv2D(16, (3, 3), padding='same')(up0a) up0a = BatchNormalization()(up0a) up0a = Activation('relu')(up0a) up0a = Conv2D(16, (3, 3), padding='same')(up0a) up0a = BatchNormalization()(up0a) up0a = Activation('relu')(up0a) up0a = Conv2D(16, (3, 3), padding='same')(up0a) up0a = BatchNormalization()(up0a) up0a = Activation('relu')(up0a) # 512 classify = Conv2D(num_classes, (1, 1), activation='sigmoid')(up0a) model = Model(inputs=[inputs], outputs=[classify]) model.load_weights( '/home/tonee/.config/spyder-py3/deeplearning practise/line_crop_best_9000.h5' ) # model.compile(optimizer = RMSprop(lr=0.001), loss = [jacard_coef_loss], metrics = [jacard_coef]) # model.compile(optimizer = Adam(lr = 1e-4), loss = [jacard_coef_loss], metrics = [jacard_coef]) model.compile(optimizer=RMSprop(lr=0.0001), loss=bce_dice_loss, metrics=[dice_coeff, jacard_coef]) # model.summary() return model
# x.add(Dropout(0.3)) # x.add(Dense(1, activation='sigmoid')) # LSTM x.add(LSTM(n_hidden)) shared_model = x # The visible layer left_input = Input(shape=(max_seq_length, ), dtype=tf.float32) right_input = Input(shape=(max_seq_length, ), dtype=tf.float32) # Pack it all up into a Manhattan Distance model malstm_distance = ManDist()( [shared_model(left_input), shared_model(right_input)]) model = Model(inputs=[left_input, right_input], outputs=[malstm_distance]) model.compile(loss='sparse_categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(), metrics=['accuracy']) model.summary() shared_model.summary() # Start trainings training_start_time = time() malstm_trained = model.fit( [X_train['left'], X_train['right']], Y_train, batch_size=batch_size, epochs=n_epoch, validation_data=([X_validation['left'],
def MIND(user_feature_columns, item_feature_columns, num_sampled=5, k_max=2, p=1.0, dynamic_k=False, user_dnn_hidden_units=(64, 32), dnn_activation='relu', dnn_use_bn=False, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024): """Instantiates the MIND Model architecture. :param user_feature_columns: An iterable containing user's features used by the model. :param item_feature_columns: An iterable containing item's features used by the model. :param num_sampled: int, the number of classes to randomly sample per batch. :param k_max: int, the max size of user interest embedding :param p: float,the parameter for adjusting the attention distribution in LabelAwareAttention. :param dynamic_k: bool, whether or not use dynamic interest number :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param user_dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of user tower :param dnn_activation: Activation function to use in deep net :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param l2_reg_dnn: L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :return: A Keras model instance. """ if len(item_feature_columns) > 1: raise ValueError("Now MIND only support 1 item feature like item_id") item_feature_column = item_feature_columns[0] item_feature_name = item_feature_column.name item_vocabulary_size = item_feature_columns[0].vocabulary_size item_embedding_dim = item_feature_columns[0].embedding_dim #item_index = Input(tensor=tf.constant([list(range(item_vocabulary_size))])) history_feature_list = [item_feature_name] features = build_input_features(user_feature_columns) sparse_feature_columns = list( filter(lambda x: isinstance(x, SparseFeat), user_feature_columns)) if user_feature_columns else [] dense_feature_columns = list( filter(lambda x: isinstance(x, DenseFeat), user_feature_columns)) if user_feature_columns else [] varlen_sparse_feature_columns = list( filter(lambda x: isinstance(x, VarLenSparseFeat), user_feature_columns)) if user_feature_columns else [] history_feature_columns = [] sparse_varlen_feature_columns = [] history_fc_names = list(map(lambda x: "hist_" + x, history_feature_list)) for fc in varlen_sparse_feature_columns: feature_name = fc.name if feature_name in history_fc_names: history_feature_columns.append(fc) else: sparse_varlen_feature_columns.append(fc) seq_max_len = history_feature_columns[0].maxlen inputs_list = list(features.values()) embedding_matrix_dict = create_embedding_matrix(user_feature_columns + item_feature_columns, l2_reg_embedding, init_std, seed, prefix="") item_features = build_input_features(item_feature_columns) query_emb_list = embedding_lookup(embedding_matrix_dict, item_features, item_feature_columns, history_feature_list, history_feature_list, to_list=True) keys_emb_list = embedding_lookup(embedding_matrix_dict, features, history_feature_columns, history_fc_names, history_fc_names, to_list=True) dnn_input_emb_list = embedding_lookup(embedding_matrix_dict, features, sparse_feature_columns, mask_feat_list=history_feature_list, to_list=True) dense_value_list = get_dense_input(features, dense_feature_columns) sequence_embed_dict = varlen_embedding_lookup( embedding_matrix_dict, features, sparse_varlen_feature_columns) sequence_embed_list = get_varlen_pooling_list( sequence_embed_dict, features, sparse_varlen_feature_columns, to_list=True) dnn_input_emb_list += sequence_embed_list # keys_emb = concat_func(keys_emb_list, mask=True) # query_emb = concat_func(query_emb_list, mask=True) history_emb = PoolingLayer()(NoMask()(keys_emb_list)) target_emb = PoolingLayer()(NoMask()(query_emb_list)) #target_emb_size = target_emb.get_shape()[-1].value #max_len = history_emb.get_shape()[1].value hist_len = features['hist_len'] high_capsule = CapsuleLayer(input_units=item_embedding_dim, out_units=item_embedding_dim, max_len=seq_max_len, k_max=k_max)((history_emb, hist_len)) if len(dnn_input_emb_list) > 0 or len(dense_value_list) > 0: user_other_feature = combined_dnn_input(dnn_input_emb_list, dense_value_list) other_feature_tile = tf.keras.layers.Lambda( tile_user_otherfeat, arguments={'k_max': k_max})(user_other_feature) user_deep_input = Concatenate()( [NoMask()(other_feature_tile), high_capsule]) else: user_deep_input = high_capsule user_embeddings = DNN(user_dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed, name="user_embedding")(user_deep_input) item_inputs_list = list(item_features.values()) item_embedding_matrix = embedding_matrix_dict[item_feature_name] item_index = EmbeddingIndex(list(range(item_vocabulary_size)))( item_features[item_feature_name]) item_embedding_weight = NoMask()(item_embedding_matrix(item_index)) pooling_item_embedding_weight = PoolingLayer()([item_embedding_weight]) if dynamic_k: user_embedding_final = LabelAwareAttention( k_max=k_max, pow_p=p, )((user_embeddings, target_emb, hist_len)) else: user_embedding_final = LabelAwareAttention( k_max=k_max, pow_p=p, )((user_embeddings, target_emb)) output = SampledSoftmaxLayer(num_sampled=num_sampled)( inputs=(pooling_item_embedding_weight, user_embedding_final, item_features[item_feature_name])) model = Model(inputs=inputs_list + item_inputs_list, outputs=output) model.__setattr__("user_input", inputs_list) model.__setattr__("user_embedding", user_embeddings) model.__setattr__("item_input", item_inputs_list) model.__setattr__( "item_embedding", get_item_embedding(pooling_item_embedding_weight, item_features[item_feature_name])) return model
def specialUnet(width, height, chann, nc, n_filters = 16, bn = True, dilation_rate = 1): '''Validation Image data generator Inputs: n_filters - base convolution filters bn - flag to set batch normalization dilation_rate - convolution dilation rate Output: Unet keras Model ''' inputs = Input((height, width, chann)) # image normalization between 0 and 1 s = Lambda(lambda x: x / 255) (inputs) #print(inputs) conv1 = Conv2D(n_filters * 1, (3, 3), activation='relu', padding = 'same', dilation_rate = dilation_rate)(s) if bn: conv1 = BatchNormalization()(conv1) conv1 = Conv2D(n_filters * 1, (3, 3), activation='relu', padding = 'same', dilation_rate = dilation_rate)(conv1) if bn: conv1 = BatchNormalization()(conv1) pool1 = MaxPooling2D(pool_size=(2, 2), data_format='channels_last')(conv1) conv2 = Conv2D(n_filters * 2, (3, 3), activation='relu', padding = 'same', dilation_rate = dilation_rate)(pool1) if bn: conv2 = BatchNormalization()(conv2) conv2 = Conv2D(n_filters * 2, (3, 3), activation='relu', padding = 'same', dilation_rate = dilation_rate)(conv2) if bn: conv2 = BatchNormalization()(conv2) pool2 = MaxPooling2D(pool_size=(2, 2), data_format='channels_last')(conv2) conv3 = Conv2D(n_filters * 4, (3, 3), activation='relu', padding = 'same', dilation_rate = dilation_rate)(pool2) if bn: conv3 = BatchNormalization()(conv3) conv3 = Conv2D(n_filters * 4, (3, 3), activation='relu', padding = 'same', dilation_rate = dilation_rate)(conv3) if bn: conv3 = BatchNormalization()(conv3) pool3 = MaxPooling2D(pool_size=(2, 2), data_format='channels_last')(conv3) conv4 = Conv2D(n_filters * 8, (3, 3), activation='relu', padding = 'same', dilation_rate = dilation_rate)(pool3) if bn: conv4 = BatchNormalization()(conv4) conv4 = Conv2D(n_filters * 8, (3, 3), activation='relu', padding = 'same', dilation_rate = dilation_rate)(conv4) if bn: conv4 = BatchNormalization()(conv4) pool4 = MaxPooling2D(pool_size=(2, 2), data_format='channels_last')(conv4) conv5 = Conv2D(n_filters * 16, (3, 3), activation='relu', padding = 'same', dilation_rate = dilation_rate)(pool4) if bn: conv5 = BatchNormalization()(conv5) conv5 = Conv2D(n_filters * 16, (3, 3), activation='relu', padding = 'same', dilation_rate = dilation_rate)(conv5) if bn: conv5 = BatchNormalization()(conv5) up6 = concatenate([UpSampling2D(size=(2, 2))(conv5), conv4], axis=3) conv6 = Conv2D(n_filters * 8, (3, 3), activation='relu', padding = 'same', dilation_rate = dilation_rate)(up6) if bn: conv6 = BatchNormalization()(conv6) conv6 = Conv2D(n_filters * 8, (3, 3), activation='relu', padding = 'same', dilation_rate = dilation_rate)(conv6) if bn: conv6 = BatchNormalization()(conv6) up7 = concatenate([UpSampling2D(size=(2, 2))(conv6), conv3], axis=3) conv7 = Conv2D(n_filters * 4, (3, 3), activation='relu', padding = 'same', dilation_rate = dilation_rate)(up7) if bn: conv7 = BatchNormalization()(conv7) conv7 = Conv2D(n_filters * 4, (3, 3), activation='relu', padding = 'same', dilation_rate = dilation_rate)(conv7) if bn: conv7 = BatchNormalization()(conv7) up8 = concatenate([UpSampling2D(size=(2, 2))(conv7), conv2], axis=3) conv8 = Conv2D(n_filters * 2, (3, 3), activation='relu', padding = 'same', dilation_rate = dilation_rate)(up8) if bn: conv8 = BatchNormalization()(conv8) conv8 = Conv2D(n_filters * 2, (3, 3), activation='relu', padding = 'same', dilation_rate = dilation_rate)(conv8) if bn: conv8 = BatchNormalization()(conv8) up9 = concatenate([UpSampling2D(size=(2, 2))(conv8), conv1], axis=3) conv9 = Conv2D(n_filters * 1, (3, 3), activation='relu', padding = 'same', dilation_rate = dilation_rate)(up9) if bn: conv9 = BatchNormalization()(conv9) conv9 = Conv2D(n_filters * 1, (3, 3), activation='relu', padding = 'same', dilation_rate = dilation_rate)(conv9) if bn: conv9 = BatchNormalization()(conv9) outputs = Conv2D(nc, (1, 1), activation='softmax', padding = 'same', dilation_rate = dilation_rate)(conv9) model = Model(inputs=inputs, outputs=outputs) return model
plt.grid = False plt.xticks([]) plt.yticks([]) plt.imshow(img) return #display_img(preprocess_input_image('style2.jpg')) #display_img(preprocess_input_image('content.jpeg')) #layers we will use from VGG19 model for training content_layer = 'block5_conv2' style_layers = ['block1_conv1', 'block3_conv1', 'block5_conv1'] #takes output of activation for content image from content_model = Model(model.input, outputs=model.get_layer(content_layer).output) #takes output of activation for style image style_models = [ Model(inputs=model.input, outputs=model.get_layer(layer).output) for layer in style_layers ] #function for generating content cost def content_cost(content, generated): a_C = content_model(content) a_G = content_model(generated) cost = tf.reduce_mean(tf.square(a_C - a_G)) return cost
def DIN(dnn_feature_columns, history_feature_list, embedding_size=8, hist_len_max=16, dnn_use_bn=False, dnn_hidden_units=(200, 80), dnn_activation='relu', att_hidden_size=(80, 40), att_activation="dice", att_weight_normalization=False, l2_reg_dnn=0, l2_reg_embedding=1e-6, dnn_dropout=0, init_std=0.0001, seed=1024, task='binary'): """Instantiates the Deep Interest Network architecture. :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. :param history_feature_list: list,to indicate sequence sparse field :param embedding_size: positive integer,sparse feature embedding_size. :param hist_len_max: positive int, to indicate the max length of seq input :param dnn_use_bn: bool. Whether use BatchNormalization before activation or not in deep net :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of deep net :param dnn_activation: Activation function to use in deep net :param att_hidden_size: list,list of positive integer , the layer number and units in each layer of attention net :param att_activation: Activation function to use in attention net :param att_weight_normalization: bool.Whether normalize the attention score of local activation unit. :param l2_reg_dnn: float. L2 regularizer strength applied to DNN :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. :param init_std: float,to use as the initialize std of embedding vector :param seed: integer ,to use as random seed. :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss :return: A Keras model instance. """ features = build_input_features(dnn_feature_columns) sparse_feature_columns = list( filter(lambda x: isinstance(x, SparseFeat), dnn_feature_columns)) if dnn_feature_columns else [] dense_feature_columns = list( filter(lambda x: isinstance(x, DenseFeat), dnn_feature_columns)) if dnn_feature_columns else [] varlen_sparse_feature_columns = list( filter(lambda x: isinstance(x, VarLenSparseFeat), dnn_feature_columns)) if dnn_feature_columns else [] history_feature_columns = [] sparse_varlen_feature_columns = [] history_fc_names = list(map(lambda x: "hist_" + x, history_feature_list)) for fc in varlen_sparse_feature_columns: feature_name = fc.name if feature_name in history_fc_names: history_feature_columns.append(fc) else: sparse_varlen_feature_columns.append(fc) inputs_list = list(features.values()) embedding_dict = create_embedding_matrix(dnn_feature_columns, l2_reg_embedding, init_std, seed, embedding_size, prefix="") query_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, history_feature_list, history_feature_list) #query是单独的 keys_emb_list = embedding_lookup(embedding_dict, features, history_feature_columns, history_fc_names, history_fc_names) dnn_input_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, mask_feat_list=history_feature_list) dense_value_list = get_dense_input(features, dense_feature_columns) sequence_embed_dict = varlen_embedding_lookup( embedding_dict, features, sparse_varlen_feature_columns) sequence_embed_list = get_varlen_pooling_list( sequence_embed_dict, features, sparse_varlen_feature_columns) dnn_input_emb_list += sequence_embed_list keys_emb = concat_fun(keys_emb_list) deep_input_emb = concat_fun(dnn_input_emb_list) query_emb = concat_fun(query_emb_list) hist = AttentionSequencePoolingLayer( att_hidden_size, att_activation, weight_normalization=att_weight_normalization, supports_masking=True)([query_emb, keys_emb]) deep_input_emb = Concatenate()([NoMask()(deep_input_emb), hist]) deep_input_emb = Flatten()(deep_input_emb) dnn_input = combined_dnn_input([deep_input_emb], dense_value_list) output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, dnn_use_bn, seed)(dnn_input) final_logit = Dense(1, use_bias=False)(output) output = PredictionLayer(task)(final_logit) model = Model(inputs=inputs_list, outputs=output) return model
# 捨棄 ResNet50 頂層的 fully connected layers net = ResNet50(include_top=False, weights='imagenet', input_tensor=None, input_shape=(IMAGE_SIZE[0], IMAGE_SIZE[1], 3)) x = net.output x = Flatten()(x) # 增加 DropOut layer x = Dropout(0.5)(x) # 增加 Dense layer,以 softmax 產生個類別的機率值 output_layer = Dense(NUM_CLASSES, activation='softmax', name='softmax')(x) # 設定凍結與要進行訓練的網路層 net_final = Model(inputs=net.input, outputs=output_layer) for layer in net_final.layers[:FREEZE_LAYERS]: layer.trainable = False for layer in net_final.layers[FREEZE_LAYERS:]: layer.trainable = True # 使用 Adam optimizer,以較低的 learning rate 進行 fine-tuning net_final.compile(optimizer=Adam(lr=1e-5), loss='categorical_crossentropy', metrics=['accuracy']) # 輸出整個網路結構 print(net_final.summary()) # 訓練模型 hist = net_final.fit_generator(