def test_model_trainability_switch(): # a non-trainable model has no trainable weights x = Input(shape=(1,)) y = Dense(2)(x) model = Model(x, y) model.trainable = False assert model.trainable_weights == [] # same for Sequential model = Sequential() model.add(Dense(2, input_dim=1)) model.trainable = False assert model.trainable_weights == []
def test_trainable_weights(): a = Input(shape=(2,)) b = Dense(1)(a) model = Model(a, b) weights = model.weights assert model.trainable_weights == weights assert model.non_trainable_weights == [] model.trainable = False assert model.trainable_weights == [] assert model.non_trainable_weights == weights model.trainable = True assert model.trainable_weights == weights assert model.non_trainable_weights == [] model.layers[1].trainable = False assert model.trainable_weights == [] assert model.non_trainable_weights == weights # sequential model model = Sequential() model.add(Dense(1, input_dim=2)) weights = model.weights assert model.trainable_weights == weights assert model.non_trainable_weights == [] model.trainable = False assert model.trainable_weights == [] assert model.non_trainable_weights == weights model.trainable = True assert model.trainable_weights == weights assert model.non_trainable_weights == [] model.layers[0].trainable = False assert model.trainable_weights == [] assert model.non_trainable_weights == weights
class RationaleCNN: def __init__(self, preprocessor, filters=None, n_filters=100, dropout=0.0): ''' parameters --- preprocessor: an instance of the Preprocessor class, defined below ''' self.preprocessor = preprocessor if filters is None: self.ngram_filters = [3, 4, 5] else: self.ngram_filters = filters self.nb_filter = n_filters self.dropout = dropout self.sentence_model_trained = False #self.build_model() # build model #self.train_sentence_model() @staticmethod def weighted_sum(X): # @TODO.. add sentence preds! return K.sum(X, axis=0) # I *think* axis 0 is correct... @staticmethod def weighted_sum_output_shape(input_shape): # expects something like (None, max_doc_len, num_features) shape = list(input_shape) #assert len(shape) == 2 # not sure if correct... #print len(shape) print("shape: %s" % shape) # (1 x num_features) return tuple((1, shape[-1])) @staticmethod def balanced_sample(X, y): _, pos_rationale_indices = np.where([y[:,0] > 0]) _, neg_rationale_indices = np.where([y[:,1] > 0]) _, non_rationale_indices = np.where([y[:,2] > 0]) # sample a number of non-rationales equal to the total # number of pos/neg rationales. m = pos_rationale_indices.shape[0] + neg_rationale_indices.shape[0] sampled_non_rationale_indices = np.array(random.sample(non_rationale_indices, m)) train_indices = np.concatenate([pos_rationale_indices, neg_rationale_indices, sampled_non_rationale_indices]) np.random.shuffle(train_indices) # why not return X[train_indices,:], y[train_indices] # r_CNN.sentence_model.predict(X[:10], batch_size=128) def train_sentence_model(self, train_documents, nb_epoch=5, downsample=True, batch_size=128, optimizer='adam'): # assumes sentence sequences have been generated! assert(train_documents[0].sentence_sequences is not None) X, y= [], [] # flatten sentences/sentence labels for d in train_documents: X.extend(d.sentence_sequences) y.extend(d.sentences_y) # @TODO sub-sample magic? X, y = np.asarray(X), np.asarray(y) # downsample if downsample: X, y = RationaleCNN.balanced_sample(X, y) #self.train(X[:1000], y[:1000]) self.train(X, y) self.sentence_model_trained = True def train(self, X_train, y_train, X_val=None, y_val=None, nb_epoch=5, batch_size=32, optimizer='adam'): ''' Accepts an X matrix (presumably some slice of self.X) and corresponding vector of labels. May want to revisit this. X_val and y_val are to be used to validate during training. ''' checkpointer = ModelCheckpoint(filepath="weights.hdf5", verbose=1, save_best_only=(X_val is not None)) if X_val is not None: self.sentence_model.fit({'input': X_train, 'output': y_train}, batch_size=batch_size, nb_epoch=nb_epoch, validation_data={'input': X_val, 'output': y_val}, verbose=2, callbacks=[checkpointer]) else: print("no validation data provided!") #self.sentence_model.fit({'input': X_train, 'output': y_train}, # batch_size=batch_size, nb_epoch=nb_epoch, # verbose=2, callbacks=[checkpointer]) self.sentence_model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=2, callbacks=[checkpointer]) ''' def predict(self, X_test, batch_size=32, binarize=False): raw_preds = self.model.predict({'input': X_test}, batch_size=batch_size)['output'] #np.array(self.model.predict({'input': X_test}, # batch_size=batch_size)['output']) if binarize: return np.round(raw_preds) return raw_preds ''' def build_sentence_model(self): ''' Build the *sentence* level model, which operates over, erm, sentences. The task is to predict which sentences are pos/neg rationales. ''' tokens_input = Input(name='input', shape=(self.preprocessor.max_sent_len,), dtype='int32') x = Embedding(self.preprocessor.max_features, self.preprocessor.embedding_dims, input_length=self.preprocessor.max_sent_len, weights=self.preprocessor.init_vectors)(tokens_input) x = Dropout(0.1)(x) convolutions = [] for n_gram in self.ngram_filters: cur_conv = Convolution1D(nb_filter=self.nb_filter, filter_length=n_gram, border_mode='valid', activation='relu', subsample_length=1, input_dim=self.preprocessor.embedding_dims, input_length=self.preprocessor.max_sent_len)(x) # pool one_max = MaxPooling1D(pool_length=self.preprocessor.max_sent_len - n_gram + 1)(cur_conv) flattened = Flatten()(one_max) convolutions.append(flattened) sentence_vector = merge(convolutions, name="sentence_vector") # hang on to this layer! output = Dense(3, activation="softmax")(sentence_vector) self.sentence_model = Model(input=tokens_input, output=output) print("model built") print(self.sentence_model.summary()) self.sentence_model.compile(loss='categorical_crossentropy', optimizer="adam") self.sentence_embedding_dim = self.sentence_model.layers[-2].output_shape[1] return self.sentence_model def build_doc_model_fixed(self): # no magic here. #input_layer = Dense(1, batch_input_shape=(None, self.sentence_embedding_dim))#input_shape=(self.sentence_embedding_dim, )) #output_layer = Activation('sigmoid')(input_layer) self.document_model = Sequential() self.document_model.add(Dense(1, input_dim=self.sentence_embedding_dim)) self.document_model.add(Activation("sigmoid")) #self.document_model = Model(input=tokens_input, output=output) self.document_model.compile(loss='binary_crossentropy', optimizer="adam") def train_doc_model_fixed(self, train_documents): conv_f = K.function( [self.sentence_model.layers[0].input, K.learning_phase()], [self.sentence_model.layers[-2].output]) X, y = [], [] for d in train_documents: sentence_vectors = np.matrix([conv_f([np.matrix(sent_seq),1])[0][0] for sent_seq in d.sentence_sequences]) #sentence_predictions = self.sentence_model.predict(d.sentence_sequences) sentence_predictions = self.sentence_model.predict(d.sentence_sequences) weights = np.amax(sentence_predictions[:,0:2],axis=1) weighted = np.dot(weights, sentence_vectors) X.append(weighted) y.append(d.doc_y) #train_sequences = X = np.vstack(X) y = np.array(y) #import pdb; pdb.set_trace() self.document_model.fit(X, y) #return np.matrix(np.dot(weights, vecs)) def train_document_model(self, train_documents, nb_epoch=5, downsample=True, batch_size=128, optimizer='adam'): # assumes sentence sequences have been generated! assert(train_documents[0].sentence_sequences is not None) X, y= [], [] # flatten sentences/sentence labels for d in train_documents: X.extend(d.sentence_sequences) y.extend(d.sentences_y) # @TODO sub-sample magic? X, y = np.asarray(X), np.asarray(y) # downsample if downsample: X, y = RationaleCNN.balanced_sample(X, y) #self.train(X[:1000], y[:1000]) self.train(X, y) self.sentence_model_trained = True def build_doc_model_concat(self): # the idea is here is to concatenate the sentence inputs; so represent each document # by one very long row doc_len = self.preprocessor.max_sent_len * self.preprocessor.max_doc_len tokens_input = Input(name='input', shape=(doc_len,), dtype='int32') x = Embedding(self.preprocessor.max_features, self.preprocessor.embedding_dims, input_length=doc_len, weights=self.preprocessor.init_vectors)(tokens_input) def build_sequential_doc_model(self): #self.document_model = Sequential() m = Sequential() # input layer. this is a matrix with dimensions: # (max_doc_length x max_sent_length) # m.add(Dense(100, input_shape=(p.max_sent_len,))) #pass def build_doc_model3(self): model = Sequential() # 32 is just n_filters; 1 is n_gram nb_feature_maps = n_filters = 32 maxlen = self.preprocessor.max_sent_len conv_filters = [] for n_gram in self.ngram_filters: sequential = Sequential() conv_filters.append(sequential) sequential.add(Embedding(self.preprocessor.max_features, self.preprocessor.embedding_dims)) sequential.add(Reshape(1, maxlen, self.preprocessor.embedding_dims)) sequential.add(Convolution2D(nb_feature_maps, 1, n_gram, self.preprocessor.embedding_dims)) sequential.add(Activation("relu")) sequential.add(MaxPooling2D(poolsize=(maxlen - n_gram + 1, 1))) sequential.add(Flatten()) model = Sequential() model.add(Merge(conv_filters, mode='concat')) model.add(Dropout(0.5)) model.add(Dense(nb_feature_maps * len(conv_filters), 1)) model.add(Activation("sigmoid")) ''' convolutions = [] for n_gram in self.ngram_filters: cur_conv = Convolution2D(n_filters, 1, n_gram, input_shape=(1, p.max_doc_len, p.max_sent_len), activation='relu', border_mode='valid') #Convolution1D(nb_filter=self.nb_filter, # filter_length=n_gram, # border_mode='valid', # activation='relu', # subsample_length=1, # input_dim=self.preprocessor.embedding_dims, # input_length=self.preprocessor.max_sent_len)(x) # pool one_max = MaxPooling1D(pool_length=self.preprocessor.max_sent_len - n_gram + 1)(cur_conv) flattened = Flatten()(one_max) convolutions.append(flattened) ''' #model.add( # Convolution2D(n_filters, 1, n_gram, # input_shape=(1, p.max_doc_len, p.max_sent_len)) # get vectors for each sentence #MaxPooling1D(pool_length=self.preprocessor.max_sent_len - n_gram + 1) #one_max = MaxPooling1D(pool_length=self.preprocessor.max_sent_len - n_gram + 1)(cur_conv) ''' document_input = Input(name='input', shape=(None, self.preprocessor.max_doc_len, self.preprocessor.max_sent_len), dtype='int32') # filter, nb_rows, nb_cols n_gram = 1 cur_conv = Convolution2D(32, n_gram, self.preprocessor.embedding_dims, activation='relu', # samples, channels, rows, cols input_shape=(1, self.preprocessor.max_doc_len, self.preprocessor.embedding_dims, ))(document_input) ''' def build_doc_model2(self): document_input = Input(name='input', shape=(self.preprocessor.max_doc_len, self.preprocessor.max_sent_len,), dtype='int32') document_vector = WeightedSumSentenceVector(self.sentence_model)(document_input) # sentence_vectors = # #conv_f = K.function([self.sentence_model.layers[0].input, K.learning_phase()], # [self.sentence_model.layers[-2].output]) # test_sent.shape # (1,50) ### this is the list of token indices! # sentence_v = conv_f([test_sent,1])[0] ''' Re-construct the (start of) the *sentence* level model, which operates over, erm, sentences. The task is to predict which sentences are pos/neg rationales. ''' # ''' tokens_input = Input(name='input', shape=(self.preprocessor.max_sent_len,), dtype='int32') x = Embedding(self.preprocessor.max_features, self.preprocessor.embedding_dims, input_length=self.preprocessor.max_sent_len, weights=self.preprocessor.init_vectors)(tokens_input) x = Dropout(0.1)(x) convolutions = [] for n_gram in self.ngram_filters: cur_conv = Convolution1D(nb_filter=self.nb_filter, filter_length=n_gram, border_mode='valid', activation='relu', subsample_length=1, input_dim=self.preprocessor.embedding_dims, input_length=self.preprocessor.max_sent_len)(x) # pool one_max = MaxPooling1D(pool_length=self.preprocessor.max_sent_len - n_gram + 1)(cur_conv) flattened = Flatten()(one_max) convolutions.append(flattened) sentence_vector = merge(convolutions, name="sentence_vector") # hang on to this layer! ''' # ok initialize each layer with parameters! ### # ''' output = Dense(3, activation="softmax")(self.penultimate_layer) self.sentence_model = Model(input=tokens_input, output=output) ''' ''' In [137]: model.summary() ____________________________________________________________________________________________________ Layer (type) Output Shape Param # Connected to ==================================================================================================== input (InputLayer) (None, 500, 50) 0 ____________________________________________________________________________________________________ reshape_16 (Reshape) (None, 25000) 0 input[0][0] ____________________________________________________________________________________________________ embedding_12 (Embedding) (None, 25000, 200) 2000000 reshape_16[0][0] ____________________________________________________________________________________________________ reshape_17 (Reshape) (None, 500, 10000) 0 embedding_12[0][0] ____________________________________________________________________________________________________ reshape_18 (Reshape) (None, 1, 500, 100000 reshape_17[0][0] ____________________________________________________________________________________________________ convolution2d_4 (Convolution2D) (None, 32, 500, 50) 6432 reshape_18[0][0] ____________________________________________________________________________________________________ maxpooling2d_1 (MaxPooling2D) (None, 32, 500, 1) 0 convolution2d_4[0][0] ____________________________________________________________________________________________________ permute_2 (Permute) (None, 1, 500, 32) 0 maxpooling2d_1[0][0] ____________________________________________________________________________________________________ reshape_19 (Reshape) (None, 500, 32) 0 permute_2[0][0] ===================================================================================== ''' def build_doc_model_clean(self, n_filters=32): # input dim is (max_doc_len x max_sent_len) -- eliding the batch size tokens_input = Input(name='input', shape=(self.preprocessor.max_doc_len, self.preprocessor.max_sent_len), dtype='int32') # flatten; create a very wide matrix to hand to embedding layer tokens_reshaped = Reshape([self.preprocessor.max_doc_len*self.preprocessor.max_sent_len])(tokens_input) # embed the tokens; output will be (p.max_doc_len*p.max_sent_len x embedding_dims) x = Embedding(self.preprocessor.max_features, self.preprocessor.embedding_dims, weights=self.preprocessor.init_vectors)(tokens_reshaped) # reshape to preserve document structure; each doc will now be a # a row in this matrix x = Reshape((1, self.preprocessor.max_doc_len, self.preprocessor.max_sent_len*self.preprocessor.embedding_dims))(x) #x = Reshape((1, p.max_doc_len, p.max_sent_len*p.embedding_dims))(x) x = Dropout(0.1)(x) #### # @TODO wrap in loop to include all n_grams! n_gram = 1 # tmp cur_conv = Convolution2D(n_filters, 1, n_gram*self.preprocessor.embedding_dims, subsample=(1, self.preprocessor.embedding_dims))(x) # model = Model(input=tokens_input, output=cur_conv) # this output (n_filters x max_doc_len x 1) one_max = MaxPooling2D(pool_size=(1, self.preprocessor.max_sent_len - n_gram + 1))(cur_conv) # flip around, to get (1 x max_doc_len x n_filters) permuted = Permute((3,2,1)) (one_max) # drop extra dimension r = Reshape((self.preprocessor.max_doc_len, n_filters))(permuted) # now we want to average the sentence vectors! x_doc = Lambda(RationaleCNN.weighted_sum, output_shape=RationaleCNN.weighted_sum_output_shape)(r) # finally, the sigmoid layer for classification y_hat = Dense(1, activation="softmax")(x_doc) model = Model(input=tokens_input, output=x_doc) return model #model.summary() def build_doc_model(self): ''' Builds the *document* level model, which uses the sentence level model to inform its predictions. ''' #tokens_input = Input(name='input', shape=(None, # self.preprocessor.max_doc_len, # self.preprocessor.max_sent_len), dtype='int32') tokens_input = Input(name='input', shape=(p.max_doc_len, p.max_sent_len), dtype='int32') tokens_reshaped = Reshape([p.max_doc_len*p.max_sent_len])(tokens_input) x = Embedding(p.max_features, p.embedding_dims, weights=p.init_vectors)(tokens_reshaped) #tokens_reshaped = Reshape((self.preprocessor.max_doc_len, # self.preprocessor.max_sent_len*self.preprocessor.embedding_dims))(tokens_input) # so this will be (max_doc_len, max_sent_len, wv_size), i think #x = Embedding(self.preprocessor.max_features, self.preprocessor.embedding_dims, # weights=self.preprocessor.init_vectors)(tokens_input) #input_length=self.preprocessor.max_sent_len, #weights=self.preprocessor.init_vectors)(tokens_input) x = Reshape((p.max_doc_len, p.max_sent_len*p.embedding_dims))(x) x = Dropout(0.1)(x) # (max_doc_len, max_sent_len, wv_size) -> (max_doc_len, max_sent_len * wv_size) #r = Reshape(self.preprocessor.max_doc_len, # self.preprocessor.max_sent_len * self.preprocessor.embedding_dims)(x) convolutions = [] for n_gram in self.ngram_filters: #cur_conv = Convolution1D(nb_filter=self.nb_filter, filter_length=n_gram) ''' # filter, nb_rows, nb_cols cur_conv = Convolution2D(self.nb_filter, 1, self.preprocessor.embedding_dims, filter_length=n_gram, activation='relu', input_dim=self.preprocessor.embedding_dims, input_length=self.preprocessor.max_sent_len)(x) ''' # cur_conv = Convolution2D(32, p.embedding_dims, n_gram, input_shape=(1, p.embedding_dims, p.max_sent_len))(x) cur_conv = Convolution1D(nb_filter=self.nb_filter, filter_length=n_gram, border_mode='valid', activation='relu', subsample_length=1, input_dim=self.preprocessor.embedding_dims, input_length=self.preprocessor.max_sent_len)(x) # pool #one_max = MaxPooling1D(pool_length=self.preprocessor.max_sent_len - n_gram + 1)(cur_conv) one_max = MaxPooling1D(pool_length=self.preprocessor.max_sent_len - n_gram + 1)(cur_conv) flattened = Flatten()(one_max) convolutions.append(flattened) penultimate_layer = merge(convolutions) output = Dense(1, activation="sigmoid")(penultimate_layer) self.document_model = Model(input=tokens_input, output=output) print(self.document_model.summary()) self.document_model.compile(loss='binary_crossentropy', optimizer="adam") return self.document_model '''
def test_sequential_regression(): from keras.models import Sequential, Model from keras.layers import Merge, Embedding, BatchNormalization, LSTM, InputLayer, Input # start with a basic example of using a Sequential model # inside the functional API seq = Sequential() seq.add(Dense(input_dim=10, output_dim=10)) x = Input(shape=(10, )) y = seq(x) model = Model(x, y) model.compile('rmsprop', 'mse') weights = model.get_weights() # test serialization config = model.get_config() model = Model.from_config(config) model.compile('rmsprop', 'mse') model.set_weights(weights) # more advanced model with multiple branches branch_1 = Sequential(name='branch_1') branch_1.add( Embedding(input_dim=100, output_dim=10, input_length=2, name='embed_1')) branch_1.add(LSTM(32, name='lstm_1')) branch_2 = Sequential(name='branch_2') branch_2.add(Dense(32, input_shape=(8, ), name='dense_2')) branch_3 = Sequential(name='branch_3') branch_3.add(Dense(32, input_shape=(6, ), name='dense_3')) branch_1_2 = Sequential([Merge([branch_1, branch_2], mode='concat')], name='branch_1_2') branch_1_2.add(Dense(16, name='dense_1_2-0')) # test whether impromtu input_shape breaks the model branch_1_2.add(Dense(16, input_shape=(16, ), name='dense_1_2-1')) model = Sequential([Merge([branch_1_2, branch_3], mode='concat')], name='final') model.add(Dense(16, name='dense_final')) model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) model.summary() x = (100 * np.random.random((100, 2))).astype('int32') y = np.random.random((100, 8)) z = np.random.random((100, 6)) labels = np.random.random((100, 16)) model.fit([x, y, z], labels, nb_epoch=1) # test if Sequential can be called in the functional API a = Input(shape=(2, ), dtype='int32') b = Input(shape=(8, )) c = Input(shape=(6, )) o = model([a, b, c]) outer_model = Model([a, b, c], o) outer_model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) outer_model.fit([x, y, z], labels, nb_epoch=1) # test serialization config = outer_model.get_config() outer_model = Model.from_config(config) outer_model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) outer_model.fit([x, y, z], labels, nb_epoch=1)
class KerasMnist(object): def __init__(self, hidden_layers, skips, epochs, batch_size): assert len(hidden_layers) > 0 self.hidden_layer_dims = hidden_layers self.skips = skips self.num_classes = 10 self.input_dim = 784 self.epochs = epochs self.batch_size = batch_size self.model = None self.x_train = None self.y_train = None self.x_test = None self.y_test = None def load_data(self): # the data, split between train and test sets (self.x_train, self.y_train), (self.x_test, self.y_test) = mnist.load_data() self.x_train = self.x_train.reshape(60000, 784) self.x_test = self.x_test.reshape(10000, 784) self.x_train = self.x_train.astype('float32') self.x_test = self.x_test.astype('float32') self.x_train /= 255 self.x_test /= 255 # convert class vectors to binary class matrices self.y_train = keras.utils.to_categorical(self.y_train, self.num_classes) self.y_test = keras.utils.to_categorical(self.y_test, self.num_classes) def build_model(self): if self.skips > 1: self.build_model_skip() else: self.build_model_no_skip() def build_model_no_skip(self): ''' MLP network with ReLU activations. For the last layer use the softmax activation. Initialize self.model as a Sequential model and add layers to it according to the class variables input_dim, hidden_layer_dims and num_classes. ''' self.model = Sequential() input_dim = self.input_dim for layer in self.hidden_layer_dims: self.model.add( Dense(units=layer, activation='relu', input_dim=input_dim)) input_dim = layer self.model.add(Dense(units=self.num_classes, activation='softmax')) self.model.compile(loss='categorical_crossentropy', optimizer=SGD(), metrics=['accuracy']) self.model.summary() def build_model_skip(self): ''' MLP with skip connections. Using the Model functional API, create layers as before, with ReLU as the activation function, and softmax for the last layer. In addition, create skip connections between every n layers, where n is defined by the class parameter skips. Make sure to: 1) Define the variable x as the input to the network. 2) Define the variable out as the output of the network. ''' x = Input(shape=(self.input_dim, )) prev = x tensors = [x] for index, layer in enumerate(self.hidden_layer_dims): if index >= self.skips and index % self.skips == 0: hidden_layer = Dense(units=layer, activation='relu')(prev) n_skip_back_layer = tensors[index - self.skips + 1] prev = keras.layers.add([hidden_layer, n_skip_back_layer]) else: hidden_layer = Dense(units=layer, activation='relu')(prev) prev = hidden_layer tensors.append(prev) out = Dense(units=self.num_classes, activation='softmax')(prev) self.model = Model([x], out) self.model.compile(loss='categorical_crossentropy', optimizer=SGD(), metrics=['accuracy']) self.model.summary() def train_eval_model(self): history = self.model.fit(self.x_train, self.y_train, batch_size=self.batch_size, epochs=self.epochs, verbose=0, validation_data=(self.x_test, self.y_test)) score_train = self.model.evaluate(self.x_train, self.y_train, verbose=0) score_test = self.model.evaluate(self.x_test, self.y_test, verbose=0) return history, score_train, score_test @staticmethod def plot_curves(history, figpath): history_dict = history.history for metric in ['loss', 'acc']: plt.clf() metric_values = history_dict[metric] val_metric_values = history_dict['val_' + metric] epochs = range(1, len(metric_values) + 1) plt.plot(epochs, metric_values, 'bo') plt.plot(epochs, val_metric_values, 'b+') plt.xlabel('epochs') plt.ylabel(metric) plt.savefig(figpath + '_' + metric + '.png')
# In[72]: #trying to fetch last but fourth layer(batch normalisation) print(base_model.layers[-3].output) # In[97]: from tensorflow import keras from tensorflow.keras import layers from tensorflow.keras.layers import InputLayer base_model = tensorflow.keras.applications.resnet50.ResNet50( weights='imagenet', pooling=max, include_top=False) #base_model = ResNet50(weights='imagenet', pooling=max, include_top = False) #input = Input(shape=(32,32,3),name = 'img') model = tensorflow.keras.Sequential() model.add(InputLayer(input_shape=(32, 32, 3), name='img')) for layer in base_model.layers[0:176]: model.add(layer) #intermediate_layer_model = Model(inputs=input,outputs = base_model.layers[-3].output) # In[ ]: features = model.predict(img) features # In[74]: #from keras.models import Model #base_model = ResNet50(weights='imagenet', pooling=max, include_top = False) #model = base_model # include here your original model
preds = model.predict_generator(test2) pred = [] for i in preds: if i>0.45: pred.append(1) else: pred.append(0) print(classification_report(test2.classes,pred)) print(confusion_matrix(test2.classes,pred)) model = Sequential() model.add(Conv2D(32,(3,3), activation ='relu',input_shape = (256,256,3))) model.add(Conv2D(32, (3,3),activation ='relu')) model.add(Conv2D(32, (3,3),activation ='relu')) model.add(MaxPooling2D(pool_size = (2,2))) model.add(Dropout(0.25)) model.add(Conv2D(64,(3,3), activation ='relu')) model.add(Conv2D(64, (3,3),activation ='relu')) model.add(Conv2D(64, (3,3),activation ='relu')) model.add(MaxPooling2D(pool_size = (3,3))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(256, activation = "relu")) model.add(Dropout(0.4)) model.add(Dense(64, activation = "relu"))
EPOCS = 50 input_tensor = Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3)) base_model = VGG16(weights='imagenet', include_top=False,input_tensor=input_tensor) x = base_model.output x = GlobalAveragePooling2D()(x) x = Dense(1024, activation='relu')(x) predictions = Dense(N_CATEGORIES, activation='softmax')(x) model = Model(inputs=base_model.input, outputs=predictions) for layer in base_model.layers[:15]: layer.trainable = False elif(MODELS=='small_cnn'): IMAGE_SIZE = 32 EPOCS = 50 model = Sequential() input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3) model.add(InputLayer(input_shape=input_shape)) model.add(Convolution2D(96, 3, 3, border_mode='same')) model.add(Activation('relu')) model.add(Convolution2D(128, 3, 3)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Flatten()) model.add(Dense(1024)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(N_CATEGORIES)) model.add(Activation('softmax',name='predictions')) elif(MODELS=='simple_cnn'): IMAGE_SIZE = 48 EPOCS = 50 input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)
class QNetwork(): # This class essentially defines the network architecture. # The network should take in state of the world as an input, # and output Q values of the actions available to the agent as the output. def __init__(self, env, replay, deep, duel): # Define your network architecture here. It is also a good idea to define any training operations # and optimizers here, initialize your variables, or alternately compile your model here. self.learning_rate = 0.001 #HYPERPARAMETER1 #linear network if (deep == False and duel == False): print("Setting up linear network....") self.model = Sequential() # self.model.add(Dense(env.action_space.n, input_dim = env.observation_space.shape[0], activation='linear', kernel_initializer='he_uniform', use_bias = True)) self.model.add( Dense(32, input_dim=env.observation_space.shape[0] * 2, activation='linear', kernel_initializer='he_uniform', use_bias=True)) self.model.add( Dense(env.action_space.n, input_dim=32, activation='linear', kernel_initializer='he_uniform', use_bias=True)) self.model.compile(optimizer=Adam(lr=self.learning_rate), loss='mse') # plot_model(self.model, to_file='graphs/Linear.png', show_shapes = True) self.model.summary() #deep network elif (deep == True): print("Setting up DDQN network....") self.model = Sequential() self.model.add( Dense(32, input_dim=env.observation_space.shape[0] * 2, activation='relu', kernel_initializer='he_uniform', use_bias=True)) # self.model.add(BatchNormalization()) self.model.add( Dense(32, input_dim=32, activation='relu', kernel_initializer='he_uniform', use_bias=True)) # self.model.add(BatchNormalization()) self.model.add( Dense(32, input_dim=32, activation='relu', kernel_initializer='he_uniform', use_bias=True)) # self.model.add(BatchNormalization()) self.model.add( Dense(env.action_space.n, input_dim=32, activation='linear', kernel_initializer='he_uniform', use_bias=True)) print("Q-Network initialized.... :)\n") self.model.compile(optimizer=Adam(lr=self.learning_rate), loss='mse') # plot_model(self.model, to_file='graphs/DDQN.png', show_shapes = True) self.model.summary() #dueling network elif (duel == True): print("Setting up Dueling DDQN network....") inp = Input(shape=(env.observation_space.shape[0] * 2, )) layer_shared1 = Dense(32, activation='relu', kernel_initializer='he_uniform', use_bias=True)(inp) # layer_shared1 = BatchNormalization()(layer_shared1) layer_shared2 = Dense(32, activation='relu', kernel_initializer='he_uniform', use_bias=True)(layer_shared1) # layer_shared2 = BatchNormalization()(layer_shared2) print("Shared layers initialized....") # layer_v1 = Dense(16,activation='relu',kernel_initializer='he_uniform',use_bias = True)(layer_shared2) # # layer_v1 = BatchNormalization()(layer_v1) # layer_a1 = Dense(16,activation='relu',kernel_initializer='he_uniform',use_bias = True)(layer_shared2) # layer_a1 = BatchNormalization()(layer_a1) layer_v2 = Dense(1, activation='linear', kernel_initializer='he_uniform', use_bias=True)(layer_shared2) layer_a2 = Dense(env.action_space.n, activation='linear', kernel_initializer='he_uniform', use_bias=True)(layer_shared2) print("Value and Advantage Layers initialised....") layer_mean = Lambda(lambda x: K.mean(x, axis=-1, keepdims=True))( layer_a2) temp = layer_v2 temp2 = layer_mean for i in range(env.action_space.n - 1): layer_v2 = keras.layers.concatenate([layer_v2, temp], axis=-1) layer_mean = keras.layers.concatenate([layer_mean, temp2], axis=-1) # layer_q = Lambda(lambda x: K.expand_dims(x[0],axis=-1) + x[1] - K.mean(x[1],keepdims=True), output_shape=(env.action_space.n,))([layer_v2, layer_a2]) layer_q = Subtract()([layer_a2, layer_mean]) layer_q = Add()([layer_q, layer_v2]) print("Q-function layer initialized.... :)\n") self.model = Model(inp, layer_q) self.model.summary() self.model.compile(optimizer=Adam(lr=self.learning_rate), loss='mse') # plot_model(self.model, to_file='graphs/Duel_DQN.png', show_shapes = True) def save_model_weights(self, suffix): # Helper function to save your model / weights. self.model.save_weights(suffix) def load_model(self, model_file): # Helper function to load an existing model. self.model = keras.models.load_model(model_file) def load_model_weights(self, weight_file): # Helper funciton to load model weights. self.model.set_weights(weight_file) def visualise_weights(self): print("Current Weights\n") for layer in self.model.layers: temp = layer.get_weights() print(temp)
# bx = GlobalMaxPooling1D()(bx) # bx = Dense(128, activation='relu')(bx) # seq_features = merge([ax, bx], mode='concat') # # seq_features = Dense(128, activation='relu')(seq_features) # preds = Dense(len(ys_index), activation='softmax')(seq_features) # model = Model(inputs=[asequence_input, bsequence_input], outputs=preds) # model.compile(loss='categorical_crossentropy', # optimizer='rmsprop', # metrics=['acc']) cx = Merge([ax, bx], mode='mul') model = Sequential() model.add(Merge([ax, bx, cx], mode='concat')) # model.add(Dense(len(ys_index), activation='softmax')) model.add(Dense(2, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['acc']) print model.summary() # print y_train.shape # happy learning! count = 0 while count < EPOCH: model.fit([x_train_a, x_train_b], y_train_prov,
input_shape = (img_rows, img_cols, 1) x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') # convert class vectors to binary class matrices y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) model = Sequential() model.add( Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape)) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.5)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(num_classes, activation='softmax')) activation = Activation('relu') activation = my_nl nw_input = Input(input_shape) nw = nw_input nw = Dropout(0.25)(nw) nw = Conv2D(32, kernel_size=(3, 3), activation='linear')(nw)
class DEEPVSA(object): def __init__(self, inst, label, train_label, seq_len, inst_len, model_option, use_attention): self.train_label = train_label self.seq_len = seq_len self.model_option = model_option self.inst_len = inst_len self.X, self.Y, self.Y_one_hot, self.n_class = self.list2np( inst, label, seq_len, model_option) self.build_model(use_attention) self.use_attention = use_attention def predict_classes(self, proba): if proba.shape[-1] > 1: return proba.argmax(axis=-1) else: return (proba > 0.5).astype('int32') def list2np(self, inst, label, seq_len, model_option): label_all = [10, 20, 30, 40] label_right = int((self.train_label + 1) * 10) label[label == label_right] = 1 label_all.remove(label_right) for ii in label_all: label[label == ii] = 0 n_class = 2 num_sample = inst.shape[0] / seq_len if model_option == 3: X = inst[0:(num_sample * seq_len), ].reshape( num_sample, seq_len, inst.shape[1]) else: X = inst[0:(num_sample * seq_len), ].reshape(num_sample, seq_len) Y = label[0:(num_sample * seq_len), ].reshape(num_sample, seq_len) Y_one_hot = to_categorical(Y).astype('int32') return X, Y, Y_one_hot, n_class def build_model(self, use_attention): if self.model_option == 0: print "Using Bi-SimpleRNN >>>>>>>>>>>>>>>>>>" self.model = Sequential() self.model.add( Embedding(input_dim=256, output_dim=64, input_length=self.seq_len)) self.model.add( Bidirectional( SimpleRNN(units=32, activation='tanh', return_sequences=True))) self.model.add(Dropout(0.5)) self.model.add( Bidirectional( SimpleRNN(units=16, activation='tanh', return_sequences=True))) self.model.add(Dropout(0.5)) self.model.add( Bidirectional( SimpleRNN(units=8, activation='tanh', return_sequences=True))) self.model.add( TimeDistributed(Dense(self.n_class, activation='softmax'), input_shape=(self.seq_len, 16))) self.model.summary() elif self.model_option == 1: print "Using Bi-GRU >>>>>>>>>>>>>>>>>>>>>>>>" self.model = Sequential() self.model.add( Embedding(input_dim=256, output_dim=64, input_length=self.seq_len)) self.model.add( Bidirectional( GRU(units=32, activation='tanh', return_sequences=True))) self.model.add(Dropout(0.5)) self.model.add( Bidirectional( GRU(units=16, activation='tanh', return_sequences=True))) self.model.add(Dropout(0.5)) self.model.add( Bidirectional( GRU(units=8, activation='tanh', return_sequences=True))) self.model.add( TimeDistributed(Dense(self.n_class, activation='softmax'), input_shape=(self.seq_len, 16))) self.model.summary() elif self.model_option == 2: print "Using Bi-LSTM >>>>>>>>>>>>>>>>>>>>>>>" self.model = Sequential() self.model.add( Embedding(input_dim=256, output_dim=64, input_length=self.seq_len)) self.model.add( Bidirectional( LSTM(units=32, activation='tanh', return_sequences=True))) self.model.add(Dropout(0.5)) self.model.add( Bidirectional( LSTM(units=16, activation='tanh', return_sequences=True))) self.model.add(Dropout(0.5)) self.model.add( Bidirectional( LSTM(units=8, activation='tanh', return_sequences=True))) self.model.add( TimeDistributed(Dense(self.n_class, activation='softmax'), input_shape=(self.seq_len, 16))) self.model.summary() elif self.model_option == 3: print "Using hierarchical attention networks >>>>>>>>>>" X_input = Input(shape=(self.seq_len, self.inst_len), name='X_input') inst_input = Input(shape=(self.inst_len, ), name='inst_input') bin_embedded = Embedding(input_dim=257, output_dim=64, input_length=self.inst_len)(inst_input) inst_embedded = Bidirectional( LSTM(units=32, dropout=0.5, return_sequences=True))(bin_embedded) if use_attention: inst_embedded = Bidirectional( LSTM(units=16, dropout=0.5, return_sequences=True))(inst_embedded) inst_embedded = AttLayer(16)(inst_embedded) else: inst_embedded = Bidirectional(LSTM(units=16, dropout=0.5))(inst_embedded) inst_model = Model(inst_input, inst_embedded) seq_embedded = TimeDistributed(inst_model)(X_input) if use_attention and False: lstm_out_f = (AttentionLSTM( units=8, seq_len=self.seq_len, seq_input=seq_embedded, dropout=0.25, recurrent_dropout=0.25, return_sequences=True))(seq_embedded) lstm_out_b = (AttentionLSTM(units=8, seq_len=self.seq_len, seq_input=seq_embedded, dropout=0.25, recurrent_dropout=0.25, return_sequences=True, go_backwards=True))(seq_embedded) lstm_out = concatenate([lstm_out_f, lstm_out_b]) lstm_out_f = (AttentionLSTM(units=32, seq_len=self.seq_len, seq_input=lstm_out, dropout=0.25, recurrent_dropout=0.25, return_sequences=True))(lstm_out) lstm_out_b = (AttentionLSTM(units=8, seq_len=self.seq_len, seq_input=lstm_out, dropout=0.25, recurrent_dropout=0.25, return_sequences=True, go_backwards=True))(lstm_out) lstm_out = concatenate([lstm_out_f, lstm_out_b]) else: lstm_out = Bidirectional( LSTM(units=32, dropout=0.5, return_sequences=True))(seq_embedded) lstm_out = Bidirectional( LSTM(units=16, dropout=0.5, return_sequences=True))(lstm_out) model_out = TimeDistributed(Dense(self.n_class, activation='softmax'), name='model_out')(lstm_out) self.model = Model([X_input], model_out) self.model.summary() # inst_embedded = Bidirectional(GRU(units=32, dropout=0.5, return_sequences=True))(bin_embedded) # if use_attention: # inst_embedded = Bidirectional(GRU(units=16, dropout=0.5, return_sequences=True))(inst_embedded) # inst_embedded = AttLayer(16)(inst_embedded) # else: # inst_embedded = Bidirectional(GRU(units=16, dropout=0.5))(inst_embedded) # # inst_model = Model(inst_input, inst_embedded) # # seq_embedded = TimeDistributed(inst_model)(X_input) # if use_attention and False: # lstm_out_f = (AttentionLSTM(units=8, seq_len=self.seq_len, seq_input=seq_embedded, dropout=0.25, # recurrent_dropout=0.25, return_sequences=True))(seq_embedded) # lstm_out_b = (AttentionLSTM(units=8, seq_len=self.seq_len, seq_input=seq_embedded, dropout=0.25, # recurrent_dropout=0.25, return_sequences=True, # go_backwards=True))(seq_embedded) # lstm_out = concatenate([lstm_out_f, lstm_out_b]) # # lstm_out_f = (AttentionLSTM(units=32, seq_len=self.seq_len, seq_input=lstm_out, dropout=0.25, # recurrent_dropout=0.25, return_sequences=True))(lstm_out) # lstm_out_b = (AttentionLSTM(units=8, seq_len=self.seq_len, seq_input=lstm_out, dropout=0.25, # recurrent_dropout=0.25, return_sequences=True, # go_backwards=True))(lstm_out) # lstm_out = concatenate([lstm_out_f, lstm_out_b]) # # else: # lstm_out = Bidirectional(GRU(units=32, dropout=0.5, return_sequences=True))(seq_embedded) # lstm_out = Bidirectional(GRU(units=16, dropout=0.5, return_sequences=True))(lstm_out) # # model_out = TimeDistributed(Dense(self.n_class, activation='softmax'), name='model_out')(lstm_out) # # self.model = Model([X_input], model_out) # self.model.summary() def fit(self, batch_size, epoch_1, epoch_2, save_model, save_dir, truncate): # self.X = self.X[0:1000, ] # self.Y = self.Y[0:1000, ] # self.Y_one_hot = self.Y_one_hot[0:1000, ] print '================================================' print "Data shape..." print self.X.shape print self.Y_one_hot.shape print "Counting the number of data in each category..." print collections.Counter(self.Y.flatten()) print '================================================' print 'Starting training...' if self.train_label == 0 or self.train_label == 1: sample_weights = class_weight.compute_sample_weight( 'balanced', self.Y.flatten()).reshape(self.Y.shape) self.model.compile(optimizer=Adam(lr=0.005), loss='categorical_crossentropy', metrics=['accuracy'], sample_weight_mode="temporal") self.model.fit(self.X, self.Y_one_hot, batch_size=batch_size, epochs=epoch_1, verbose=1, sample_weight=sample_weights) # if save_model: # if truncate: # if self.model_option == 0: # name = str(self.train_label) + '_bi_rnn_truncate_1.h5' # elif self.model_option == 1: # name = str(self.train_label) + '_bigru_truncate_1.h5' # elif self.model_option == 2: # name = str(self.train_label) + '_bilstm_truncate_1.h5' # else: # name = str(self.train_label) + '_han_truncate_1.h5' # else: # if self.model_option == 0: # name = str(self.train_label) + 'bi_rnn_1.h5' # elif self.model_option == 1: # name = str(self.train_label) + '_bigru_1.h5' # elif self.model_option == 2: # name = str(self.train_label) + '_bilstm_1.h5' # else: # name = str(self.train_label) + '_han_1.h5' # save_dir_1 = os.path.join(save_dir, name) # if model_option==3 and self.use_attention: # weights = self.model.get_weights() # io.savemat(save_dir_1, {'weights':weights}) # else: # self.model.save(save_dir_1) if self.train_label == 3 or self.train_label == 2: self.model.compile(optimizer=Adam(lr=0.005), loss='categorical_crossentropy', metrics=['accuracy'], sample_weight_mode="temporal") self.model.fit(self.X, self.Y_one_hot, batch_size=batch_size, epochs=epoch_2, verbose=1) if save_model: if truncate: if self.model_option == 0: name = str(self.train_label) + '_birnn_truncate.h5' elif self.model_option == 1: name = str(self.train_label) + '_bigru_truncate.h5' elif self.model_option == 2: name = str(self.train_label) + '_bilstm_truncate.h5' else: name = str(self.train_label) + '_han_truncate.h5' else: if self.model_option == 0: name = str(self.train_label) + '_birnn.h5' elif self.model_option == 1: name = str(self.train_label) + '_bigru.h5' elif self.model_option == 2: name = str(self.train_label) + '_bilstm.h5' else: name = str(self.train_label) + '_han.h5' save_dir = os.path.join(save_dir, name) if model_option == 3 and self.use_attention: weights = self.model.get_weights() io.savemat(save_dir, {'weights': weights}) else: self.model.save(save_dir) return 0 def evaluate(self): y_pred = self.predict_classes( self.model.predict(self.X, batch_size=batch_size)) print 'Evaluating training results' precision, recall, f1, _ = precision_recall_fscore_support( self.Y.flatten(), y_pred.flatten(), labels=[0, 1], average='weighted') print("Precision: %s Recall: %s F1: %s" % (precision, recall, f1)) print '================================================' for i in xrange(2): print 'Evaluating training results of positive labels at region ' + str( i) precision, recall, f1, _ = precision_recall_fscore_support( self.Y.flatten(), y_pred.flatten(), labels=[i], average='weighted') print("Precision: %s Recall: %s F1: %s" % (precision, recall, f1)) print '================================================' return 0 def predict(self, inst_test, label_test): X_test, Y_test, _, _, = self.list2np(inst_test, label_test, self.seq_len, self.model_option) y_pred = self.predict_classes( self.model.predict(X_test, batch_size=batch_size)) print 'Evaluating testing results' precision, recall, f1, _ = precision_recall_fscore_support( Y_test.flatten(), y_pred.flatten(), labels=[0, 1], average='weighted') print("Precision: %s Recall: %s F1: %s" % (precision, recall, f1)) print '================================================' for i in xrange(2): print 'Evaluating testing results of positive labels at region ' + str( i) precision, recall, f1, _ = precision_recall_fscore_support( Y_test.flatten(), y_pred.flatten(), labels=[i], average='weighted') print("Precision: %s Recall: %s F1: %s" % (precision, recall, f1)) print '================================================' return y_pred
# - # ### モデルのアーキテクチャを可視化 # + import sys sys.path.append('../../python_lib/convnet-drawer') # + from keras.models import Sequential from convnet_drawer import Model, Conv2D, MaxPooling2D, Flatten, Dense drawer_model = Model(input_shape=(28, 28, 3)) drawer_model.add(Flatten()) drawer_model.add(Dense(1500)) drawer_model.add(Dense(1000)) drawer_model.add(Dense(500)) drawer_model.add(Dense(10)) drawer_model.add(Dense(500)) drawer_model.add(Dense(1000)) drawer_model.add(Dense(1500)) drawer_model.add(Dense(784)) drawer_model.save_fig('alexnet.svg') # + {"colab_type": "text", "id": "mvFdHRtk2GGY", "cell_type": "markdown"} # ### モデルの概要を表示 # + {"colab": {"base_uri": "https://localhost:8080/", "height": 442}, "colab_type": "code", "id": "cr3WlpfZBd7a", "outputId": "75a74782-6a78-4ed2-bbb1-34dc3d770b83"}
class BiblioEater: # Class to design, train and validate two topologies of NN DROPOUT_PROB = 0.2 DROPOUT_PROB_OUT = 0.3 NUM_WRITERS = 2 NUM_FILTERS_1 = 8 NUM_FILTERS_2 = 16 NUM_FILTERS_3 = 32 HIDDEN_DIMS = 16 RECEPTIVE_FIELD = 4 STRIDES = 1 KERNEL_SIZE_2 = 3 KERNEL_SIZE_3 = 2 POOL_SIZE = 2 POOL_SIZE_2 = 2 POOL_SIZE_3 = 2 NUM_EPOCHS = 12 BATCH_SIZE = 8 def __init__(self): self.model = None self.max_tokens_per_paragraph = 0 self.pos_vector_length = 0 def design_sequential_net(self, max_tokens_per_paragraph, pos_vector_length): # This is the sequential model featured in the article self.max_tokens_per_paragraph = max_tokens_per_paragraph self.pos_vector_length = pos_vector_length input_shape = (max_tokens_per_paragraph, pos_vector_length) self.model = Sequential() # Block 1 self.model.add( Conv1D(filters=self.NUM_FILTERS_1, kernel_size=self.RECEPTIVE_FIELD, strides=self.STRIDES, input_shape=input_shape, activation='relu')) self.model.add(Dropout(self.DROPOUT_PROB)) self.model.add(MaxPooling1D(pool_size=self.POOL_SIZE)) # Block 2 self.model.add( Conv1D(filters=self.NUM_FILTERS_2, kernel_size=self.KERNEL_SIZE_2, activation='relu')) self.model.add(MaxPooling1D(pool_size=self.POOL_SIZE_2)) # Block 3 self.model.add( Conv1D(filters=self.NUM_FILTERS_3, kernel_size=self.KERNEL_SIZE_3, activation='relu')) self.model.add(MaxPooling1D(pool_size=self.POOL_SIZE_3)) # Final block self.model.add(Flatten()) self.model.add(Dropout(self.DROPOUT_PROB_OUT)) self.model.add(Dense(self.HIDDEN_DIMS, activation="relu")) self.model.add(Dense(self.NUM_WRITERS, activation='softmax')) self.model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc']) print(self.model.summary()) # plot_model(self.model, to_file=os.path.join(OUT_FOLDER, "model.png")) def train_sequential_net(self, pos_training_set, writer_labels): # Sequential model training # fit_generator not really needed as the dataset is small. It works all the same. # We train with the whole set. We will validate with other books model_steps = round(len(pos_training_set) / self.BATCH_SIZE) self.model.fit_generator(self.generate_training_batch( pos_training_set, writer_labels, self.BATCH_SIZE), nb_epoch=self.NUM_EPOCHS, steps_per_epoch=model_steps, verbose=2) # serialize to disk with open(MODEL_FILE, "wb") as outfile: pickle.dump(self.model, outfile) nlp_logger.info("Sequential model written to file") def design_multi_sentence_net(self, max_tokens_per_sentence, pos_vector_length): # Alternative design. Although more complex, it does not yield better results consistently # Keras Functional API is required for non sequential networks sentence_input_1 = Input(shape=( max_tokens_per_sentence, pos_vector_length, ), name='sentence_input_1') sentence_input_2 = Input(shape=( max_tokens_per_sentence, pos_vector_length, ), name='sentence_input_2') sentence_input_3 = Input(shape=( max_tokens_per_sentence, pos_vector_length, ), name='sentence_input_3') shared_conv = Conv1D(filters=self.NUM_FILTERS_1, kernel_size=self.RECEPTIVE_FIELD, strides=2, activation='relu') shared_max_pooling = MaxPooling1D(pool_size=self.POOL_SIZE) x1 = shared_conv(sentence_input_1) x1 = shared_max_pooling(x1) x2 = shared_conv(sentence_input_2) x2 = shared_max_pooling(x2) x3 = shared_conv(sentence_input_3) x3 = shared_max_pooling(x3) # Now we concatenate the 3 outputs as input to the next layer x = concatenate([ shared_max_pooling.get_output_at(0), shared_max_pooling.get_output_at(1), shared_max_pooling.get_output_at(2) ], axis=-1) # Block 2 x = Conv1D(filters=self.NUM_FILTERS_3, kernel_size=self.KERNEL_SIZE_2, activation='relu')(x) x = Dropout(self.DROPOUT_PROB)(x) x = MaxPooling1D(pool_size=self.POOL_SIZE_2)(x) # Final block x = Flatten()(x) x = Dense(self.HIDDEN_DIMS, activation="relu")(x) main_output = Dense(1, activation='sigmoid', name='main_output')(x) self.model = Model( inputs=[sentence_input_1, sentence_input_2, sentence_input_3], outputs=[main_output]) self.model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) print(self.model.summary()) def train_multi_sentence_net(self, pos_training_set, writer_labels): # Non-sequential model training # fit_generator not used this time self.model.fit(pos_training_set, writer_labels, batch_size=self.BATCH_SIZE, epochs=self.NUM_EPOCHS, verbose=2) # serialize to disk with open(MULTI_MODEL_FILE, "wb") as outfile: pickle.dump(self.model, outfile) def generate_training_batch(self, training_set, labels, batch_size): # generator function avoid memory problems with big training sets - not adding much value in this case batch_features = np.zeros((batch_size, self.max_tokens_per_paragraph, self.pos_vector_length)) batch_labels = np.zeros((batch_size, self.NUM_WRITERS)) while True: for i in range(batch_size): # choose random index in features index = randint(0, len(training_set) - 1) batch_features[i] = training_set[index] if labels[index] == 1: batch_labels[i] = [0, 1] else: batch_labels[i] = [1, 0] yield batch_features, batch_labels
model = t #Allows us to create model only knowing the inputs model = Conv2D(64, kernel_size=3, activation='relu', strides=2)(model) model = Conv2D(128, kernel_size=3, activation='relu', strides=2)(model) model = Conv2D(256, kernel_size=3, activation='relu', strides=2)(model) #Generate Latent Layer model = Flatten()(model) latent = Dense(latent_dim, name="latent_vector")(model) # make encoder model encoder = Model(t, latent, name="encoder_layers") encoder.summary() #encoder model '''encoder=Sequential() encoder.add(Conv2D(64,kernel_size=3,activation='relu',input_shape=(400,350,1),strides=2)) encoder.add(Conv2D(128,kernel_size=3,activation='relu',strides=2)) encoder.add(Conv2D(256,kernel_size=3,activation='relu',strides=2)) encoder.add(Flatten()) encoder.add(Dense(latent_dim,name="latent_vector")) # make encoder model #encoder=Model(t,latent,name="encoder_layers") encoder.summary() # Decoder Model decoder=Sequential() decoder.add(Dense(400*350*1)) decoder.add(Reshape((400,350,1)))
base_model = VGG16(weights='imagenet', include_top=False, input_tensor=input_tensor) x = base_model.output x = GlobalAveragePooling2D()(x) x = Dense(1024, activation='relu')(x) predictions = Dense(N_CATEGORIES, activation='softmax')(x) model = Model(inputs=base_model.input, outputs=predictions) for layer in base_model.layers[:15]: layer.trainable = False elif (MODELS == 'small_cnn'): IMAGE_SIZE = 32 EPOCS = 50 model = Sequential() input_shape = (IMAGE_SIZE, IMAGE_SIZE, 3) model.add(InputLayer(input_shape=input_shape)) model.add(Convolution2D(96, 3, 3, border_mode='same')) model.add(Activation('relu')) model.add(Convolution2D(128, 3, 3)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Flatten()) model.add(Dense(1024)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(N_CATEGORIES)) model.add(Activation('softmax', name='predictions')) elif (MODELS == 'simple_cnn'): IMAGE_SIZE = 48 EPOCS = 50 input_shape = (IMAGE_SIZE, IMAGE_SIZE, 3)
TensorBoard Tensor面板,以下代码通过,但输入histogram_freq=1,要求输入 embeddings_data,没有搞清楚需要填充什么数据 let's demonstrate these features on a simple example. You 'll train a 1D convent ont the IMDB sentiment-analysis task. ''' #text-calssfication model to use with TensorBoard max_features = 2000 max_len = 500 (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) x_train = sequence.pad_sequences(x_train, maxlen=max_len) x_test = sequence.pad_sequences(x_test, maxlen=max_len) #把每个数组的长度变成500 model = keras.models.Sequential() model.add( layers.Embedding(max_features, 128, input_length=max_len, name='embed')) model.add(layers.Conv1D(32, 7, activation='relu')) model.add(layers.MaxPool1D(5)) model.add(layers.Conv1D(32, 7, activation='relu')) model.add(layers.GlobalMaxPool1D()) model.add(layers.Dense(1)) model.summary() model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc']) #creating a directory for TensorBoard log files #training the model with a TensorBoard callback callbacks = [ keras.callbacks.TensorBoard( log_dir='/Users/zhaolei/Desktop/dataset/my_log_dir', # histogram_freq=1, #records activation histograms every 1 epoch # embeddings_freq = 1, #recoreds embedding data every 1 epoch
# print(base_model.summary()) last_layer = base_model.get_layer('mixed7') last_output = last_layer.output x = Flatten()(last_output) x = Dense(1024, activation='relu')(x) x = Dense(1, activation='sigmoid')(x) model = Model(base_model.input, x) """ x = base_model.output x = GlobalAveragePooling2D()(x) x = Dense(512, activation='relu')(x) preds = Dense(2, activation='softmax')(x) model = Model(inputs=base_model.input, outputs=preds) """ """ # Initialising the CNN model = Sequential() # Create convolutional layer. There are 3 dimensions for input shape model.add(Conv2D(32, kernel_size=(3, 3), activation = 'relu', input_shape=(299 ,299, 3))) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2,2))) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dense(2, activation='softmax')) """ """ # Pooling layer model.add(axPooling2D((2, 2))) # Convolutional layer
def create_network(self, architecture=Architecture.DIRECT, algorithm=Algorithm.DDQN): if algorithm == Algorithm.DRQN: network_type = "recurrent" else: network_type = "sequential" if architecture == Architecture.DIRECT: if network_type == "inception": print("Built an inception DQN") input_img = Input(shape=(self.history_length, self.state_height, self.state_width)) tower_1 = Convolution2D(16, 1, 1, border_mode='same', activation='relu')(input_img) tower_1 = Convolution2D(16, 3, 3, border_mode='same', activation='relu')(tower_1) tower_2 = Convolution2D(16, 1, 1, border_mode='same', activation='relu')(input_img) tower_2 = Convolution2D(16, 5, 5, border_mode='same', activation='relu')(tower_2) tower_3 = MaxPooling2D((3, 3), strides=(1, 1), border_mode='same')(input_img) tower_3 = Convolution2D(16, 1, 1, border_mode='same', activation='relu')(tower_3) output1 = merge([tower_1, tower_2, tower_3], mode='concat', concat_axis=1) avgpool = AveragePooling2D((7, 7), strides=(8, 8))(output1) flatten = Flatten()(avgpool) output = Dense(len(self.environment.actions))(flatten) model = Model(input=input_img, output=output) model.compile(rmsprop(lr=self.learning_rate), "mse") #model.summary() elif network_type == "sequential": print("Built a sequential DQN") model = Sequential() # print self.history_length, self.state_height, self.state_width # model.add(Convolution2D(16, 3, 3, subsample=(2,2), activation='relu', input_shape=(self.history_length, self.state_height, self.state_width), init='uniform', trainable=True)) # model.add(Convolution2D(32, 3, 3, subsample=(2,2), activation='relu', init='uniform', trainable=True)) # model.add(Convolution2D(64, 3, 3, subsample=(2,2), activation='relu', init='uniform', trainable=True)) # model.add(Convolution2D(128, 3, 3, subsample=(1,1), activation='relu', init='uniform')) # model.add(Convolution2D(256, 3, 3, subsample=(1,1), activation='relu', init='uniform')) model.add( Convolution2D(16, 8, 8, subsample=(4, 4), activation='relu', name='conv1_agent', input_shape=(self.history_length, self.state_height, self.state_width), init='uniform', trainable=True)) model.add( Convolution2D(32, 4, 4, subsample=(2, 2), activation='relu', init='conv2_agent', trainable=True)) model.add(Flatten()) model.add( Dense(512, activation='relu', name='FC1_agent', init='uniform')) model.add(Dense(len(self.environment.actions), init='uniform')) model.compile(rmsprop(lr=self.learning_rate), "mse") elif network_type == "recurrent": print("Built a recurrent DQN") model = Sequential() model.add( TimeDistributed(Convolution2D(16, 3, 3, subsample=(2, 2), activation='relu', init='uniform', trainable=True), input_shape=(self.history_length, 1, self.state_height, self.state_width))) model.add( TimeDistributed( Convolution2D(32, 3, 3, subsample=(2, 2), activation='relu', init='uniform', trainable=True))) model.add( TimeDistributed( Convolution2D(64, 3, 3, subsample=(2, 2), activation='relu', init='uniform', trainable=True))) model.add( TimeDistributed( Convolution2D(128, 3, 3, subsample=(1, 1), activation='relu', init='uniform'))) model.add( TimeDistributed( Convolution2D(256, 3, 3, subsample=(1, 1), activation='relu', init='uniform'))) model.add(TimeDistributed(Flatten())) model.add( LSTM(512, activation='relu', init='uniform', unroll=True)) model.add(Dense(len(self.environment.actions), init='uniform')) model.compile(rmsprop(lr=self.learning_rate), "mse") #model.summary() elif architecture == Architecture.DUELING: if network_type == "sequential": print("Built a dueling sequential DQN") input = Input(shape=(self.history_length, self.state_height, self.state_width)) x = Convolution2D(16, 3, 3, subsample=(2, 2), activation='relu', input_shape=(self.history_length, image_height, image_width), init='uniform', trainable=True)(input) x = Convolution2D(32, 3, 3, subsample=(2, 2), activation='relu', init='uniform', trainable=True)(x) x = Convolution2D(64, 3, 3, subsample=(2, 2), activation='relu', init='uniform', trainable=True)(x) x = Convolution2D(128, 3, 3, subsample=(1, 1), activation='relu', init='uniform')(x) x = Convolution2D(256, 3, 3, subsample=(1, 1), activation='relu', init='uniform')(x) x = Flatten()(x) # state value tower - V state_value = Dense(256, activation='relu', init='uniform')(x) state_value = Dense(1, init='uniform')(state_value) state_value = Lambda( lambda s: K.expand_dims(s[:, 0], dim=-1), output_shape=(len( self.environment.actions), ))(state_value) # action advantage tower - A action_advantage = Dense(256, activation='relu', init='uniform')(x) action_advantage = Dense(len(self.environment.actions), init='uniform')(action_advantage) action_advantage = Lambda( lambda a: a[:, :] - K.mean(a[:, :], keepdims=True), output_shape=(len( self.environment.actions), ))(action_advantage) # merge to state-action value function Q state_action_value = merge([state_value, action_advantage], mode='sum') model = Model(input=input, output=state_action_value) model.compile(rmsprop(lr=self.learning_rate), "mse") #model.summary() else: print("ERROR: not implemented") exit() elif architecture == Architecture.SEQUENCE: print("Built a recurrent DQN") """ state_model = Sequential() state_model.add(Convolution2D(16, 3, 3, subsample=(2, 2), activation='relu', input_shape=(self.history_length, self.state_height, self.state_width), init='uniform', trainable=True)) state_model.add(Convolution2D(32, 3, 3, subsample=(2, 2), activation='relu', init='uniform', trainable=True)) state_model.add(Convolution2D(64, 3, 3, subsample=(2, 2), activation='relu', init='uniform', trainable=True)) state_model.add(Convolution2D(128, 3, 3, subsample=(1, 1), activation='relu', init='uniform')) state_model.add(Convolution2D(256, 3, 3, subsample=(1, 1), activation='relu', init='uniform')) state_model.add(Flatten()) state_model.add(Dense(512, activation='relu', init='uniform')) state_model.add(RepeatVector(self.max_action_sequence_length)) action_model = Sequential() action_model.add(Masking(mask_value=self.end_token, input_shape=(self.max_action_sequence_length,))) action_model.add(Embedding(input_dim=self.input_action_space_size, output_dim=100, init='uniform', input_length=self.max_action_sequence_length)) action_model.add(TimeDistributed(Dense(100, init='uniform', activation='relu'))) model = Sequential() model.add(Merge([state_model, action_model], mode='concat', concat_axis=-1)) model.add(LSTM(512, return_sequences=True, activation='relu', init='uniform')) model.add(TimeDistributed(Dense(len(self.environment.actions), init='uniform'))) model.compile(rmsprop(lr=self.learning_rate), "mse") model.summary() """ state_model_input = Input(shape=(self.history_length, self.state_height, self.state_width)) state_model = Convolution2D(16, 3, 3, subsample=(2, 2), activation='relu', input_shape=(self.history_length, self.state_height, self.state_width), init='uniform', trainable=True)(state_model_input) state_model = Convolution2D(32, 3, 3, subsample=(2, 2), activation='relu', init='uniform', trainable=True)(state_model) state_model = Convolution2D(64, 3, 3, subsample=(2, 2), activation='relu', init='uniform', trainable=True)(state_model) state_model = Convolution2D(128, 3, 3, subsample=(1, 1), activation='relu', init='uniform')(state_model) state_model = Convolution2D(256, 3, 3, subsample=(1, 1), activation='relu', init='uniform')(state_model) state_model = Flatten()(state_model) state_model = Dense(512, activation='relu', init='uniform')(state_model) state_model = RepeatVector( self.max_action_sequence_length)(state_model) action_model_input = Input( shape=(self.max_action_sequence_length, )) action_model = Masking( mask_value=self.end_token, input_shape=( self.max_action_sequence_length, ))(action_model_input) action_model = Embedding( input_dim=self.input_action_space_size, output_dim=100, init='uniform', input_length=self.max_action_sequence_length)(action_model) action_model = TimeDistributed( Dense(100, init='uniform', activation='relu'))(action_model) x = merge([state_model, action_model], mode='concat', concat_axis=-1) x = LSTM(512, return_sequences=True, activation='relu', init='uniform')(x) # state value tower - V state_value = TimeDistributed( Dense(256, activation='relu', init='uniform'))(x) state_value = TimeDistributed(Dense(1, init='uniform'))(state_value) state_value = Lambda(lambda s: K.repeat_elements( s, rep=len(self.environment.actions), axis=2))(state_value) # action advantage tower - A action_advantage = TimeDistributed( Dense(256, activation='relu', init='uniform'))(x) action_advantage = TimeDistributed( Dense(len(self.environment.actions), init='uniform'))(action_advantage) action_advantage = TimeDistributed( Lambda(lambda a: a - K.mean(a, keepdims=True, axis=-1)))( action_advantage) # merge to state-action value function Q state_action_value = merge([state_value, action_advantage], mode='sum') model = Model(input=[state_model_input, action_model_input], output=state_action_value) model.compile(rmsprop(lr=self.learning_rate), "mse") model.summary() return model
def DoubleCNNWordEmbed(nb_labels, wvmodel=None, nb_filters_1=1200, nb_filters_2=600, n_gram=2, filter_length_2=10, maxlen=15, vecsize=100, cnn_dropout_1=0.0, cnn_dropout_2=0.0, final_activation='softmax', dense_wl2reg=0.0, dense_bl2reg=0.0, optimizer='adam', with_gensim=False): """ Returns the double-layered convolutional neural network (CNN/ConvNet) for word-embedded vectors. :param nb_labels: number of class labels :param wvmodel: pre-trained Gensim word2vec model :param nb_filters_1: number of filters for the first CNN/ConvNet layer (Default: 1200) :param nb_filters_2: number of filters for the second CNN/ConvNet layer (Default: 600) :param n_gram: n-gram, or window size of first CNN/ConvNet (Default: 2) :param filter_length_2: window size for second CNN/ConvNet layer (Default: 10) :param maxlen: maximum number of words in a sentence (Default: 15) :param vecsize: length of the embedded vectors in the model (Default: 100) :param cnn_dropout_1: dropout rate for the first CNN/ConvNet layer (Default: 0.0) :param cnn_dropout_2: dropout rate for the second CNN/ConvNet layer (Default: 0.0) :param final_activation: activation function. Options: softplus, softsign, relu, tanh, sigmoid, hard_sigmoid, linear. (Default: 'softmax') :param dense_wl2reg: L2 regularization coefficient (Default: 0.0) :param dense_bl2reg: L2 regularization coefficient for bias (Default: 0.0) :param optimizer: optimizer for gradient descent. Options: sgd, rmsprop, adagrad, adadelta, adam, adamax, nadam. (Default: adam) :return: keras sequantial model for CNN/ConvNet for Word-Embeddings :type nb_labels: int :type wvmodel: gensim.models.keyedvectors.KeyedVectors :type nb_filters_1: int :type nb_filters_2: int :type n_gram: int :type filter_length_2: int :type maxlen: int :type vecsize: int :type cnn_dropout_1: float :type cnn_dropout_2: float :type final_activation: str :type dense_wl2reg: float :type dense_bl2reg: float :type optimizer: str :type with_gensim: bool :rtype: keras.models.Sequential or keras.models.Model """ if with_gensim == True: embedding_layer = wvmodel.get_embedding_layer() sequence_input = Input(shape=(maxlen, ), dtype='int32') x = embedding_layer(sequence_input) x = Conv1D(filters=nb_filters_1, kernel_size=n_gram, padding='valid', activation='relu', input_shape=(maxlen, vecsize))(x) if cnn_dropout_1 > 0.0: x = Dropout(cnn_dropout_1)(x) x = Conv1D(filters=nb_filters_2, kernel_size=filter_length_2, padding='valid', activation='relu')(x) if cnn_dropout_2 > 0.0: x = Dropout(cnn_dropout_2)(x) x = MaxPooling1D(pool_size=maxlen - n_gram - filter_length_2 + 1)(x) x = Flatten()(x) x = Dense(nb_labels, activation=final_activation, kernel_regularizer=l2(dense_wl2reg), bias_regularizer=l2(dense_bl2reg))(x) model = Model(sequence_input, x) model.compile(loss='categorical_crossentropy', optimizer=optimizer) else: model = Sequential() model.add( Conv1D(filters=nb_filters_1, kernel_size=n_gram, padding='valid', activation='relu', input_shape=(maxlen, vecsize))) if cnn_dropout_1 > 0.0: model.add(Dropout(cnn_dropout_1)) model.add( Conv1D(filters=nb_filters_2, kernel_size=filter_length_2, padding='valid', activation='relu')) if cnn_dropout_2 > 0.0: model.add(Dropout(cnn_dropout_2)) model.add(MaxPooling1D(pool_size=maxlen - n_gram - filter_length_2 + 1)) model.add(Flatten()) model.add( Dense(nb_labels, activation=final_activation, kernel_regularizer=l2(dense_wl2reg), bias_regularizer=l2(dense_bl2reg))) model.compile(loss='categorical_crossentropy', optimizer=optimizer) return model
def test_convolutional_recurrent(): num_row = 3 num_col = 3 filters = 2 num_samples = 1 input_channel = 2 input_num_row = 5 input_num_col = 5 sequence_len = 2 for data_format in ['channels_first', 'channels_last']: if data_format == 'channels_first': inputs = np.random.rand(num_samples, sequence_len, input_channel, input_num_row, input_num_col) else: inputs = np.random.rand(num_samples, sequence_len, input_num_row, input_num_col, input_channel) for return_sequences in [True, False]: # test for return state: x = Input(batch_shape=inputs.shape) kwargs = {'data_format': data_format, 'return_sequences': return_sequences, 'return_state': True, 'stateful': True, 'filters': filters, 'kernel_size': (num_row, num_col), 'padding': 'valid'} layer = convolutional_recurrent.ConvLSTM2D(**kwargs) layer.build(inputs.shape) outputs = layer(x) output, states = outputs[0], outputs[1:] assert len(states) == 2 model = Model(x, states[0]) state = model.predict(inputs) np.testing.assert_allclose( K.eval(layer.states[0]), state, atol=1e-4) # test for output shape: output = layer_test(convolutional_recurrent.ConvLSTM2D, kwargs={'data_format': data_format, 'return_sequences': return_sequences, 'filters': filters, 'kernel_size': (num_row, num_col), 'padding': 'valid'}, input_shape=inputs.shape) # No need to check following tests for both data formats if data_format == 'channels_first' or return_sequences: continue # Tests for statefulness model = Sequential() kwargs = {'data_format': data_format, 'return_sequences': return_sequences, 'filters': filters, 'kernel_size': (num_row, num_col), 'stateful': True, 'batch_input_shape': inputs.shape, 'padding': 'same'} layer = convolutional_recurrent.ConvLSTM2D(**kwargs) model.add(layer) model.compile(optimizer='sgd', loss='mse') out1 = model.predict(np.ones_like(inputs)) # train once so that the states change model.train_on_batch(np.ones_like(inputs), np.random.random(out1.shape)) out2 = model.predict(np.ones_like(inputs)) # if the state is not reset, output should be different assert(out1.max() != out2.max()) # check that output changes after states are reset # (even though the model itself didn't change) layer.reset_states() out3 = model.predict(np.ones_like(inputs)) assert(out2.max() != out3.max()) # check that container-level reset_states() works model.reset_states() out4 = model.predict(np.ones_like(inputs)) assert_allclose(out3, out4, atol=1e-5) # check that the call to `predict` updated the states out5 = model.predict(np.ones_like(inputs)) assert(out4.max() != out5.max()) # cntk doesn't support eval convolution with static # variable, will enable it later if K.backend() != 'cntk': # check regularizers kwargs = {'data_format': data_format, 'return_sequences': return_sequences, 'kernel_size': (num_row, num_col), 'stateful': True, 'filters': filters, 'batch_input_shape': inputs.shape, 'kernel_regularizer': regularizers.L1L2(l1=0.01), 'recurrent_regularizer': regularizers.L1L2(l1=0.01), 'bias_regularizer': 'l2', 'activity_regularizer': 'l2', 'kernel_constraint': 'max_norm', 'recurrent_constraint': 'max_norm', 'bias_constraint': 'max_norm', 'padding': 'same'} layer = convolutional_recurrent.ConvLSTM2D(**kwargs) layer.build(inputs.shape) assert len(layer.losses) == 3 assert layer.activity_regularizer output = layer(K.variable(np.ones(inputs.shape))) assert len(layer.losses) == 4 K.eval(output) # check dropout layer_test(convolutional_recurrent.ConvLSTM2D, kwargs={'data_format': data_format, 'return_sequences': return_sequences, 'filters': filters, 'kernel_size': (num_row, num_col), 'padding': 'same', 'dropout': 0.1, 'recurrent_dropout': 0.1}, input_shape=inputs.shape) # check state initialization layer = convolutional_recurrent.ConvLSTM2D(filters=filters, kernel_size=(num_row, num_col), data_format=data_format, return_sequences=return_sequences) layer.build(inputs.shape) x = Input(batch_shape=inputs.shape) initial_state = layer.get_initial_state(x) y = layer(x, initial_state=initial_state) model = Model(x, y) assert model.predict(inputs).shape == layer.compute_output_shape(inputs.shape)
def CNNWordEmbed(nb_labels, wvmodel=None, nb_filters=1200, n_gram=2, maxlen=15, vecsize=100, cnn_dropout=0.0, final_activation='softmax', dense_wl2reg=0.0, dense_bl2reg=0.0, optimizer='adam', with_gensim=False): """ Returns the convolutional neural network (CNN/ConvNet) for word-embedded vectors. Reference: Yoon Kim, "Convolutional Neural Networks for Sentence Classification," *EMNLP* 2014, 1746-1751 (arXiv:1408.5882). [`arXiv <https://arxiv.org/abs/1408.5882>`_] :param nb_labels: number of class labels :param wvmodel: pre-trained Gensim word2vec model :param nb_filters: number of filters (Default: 1200) :param n_gram: n-gram, or window size of CNN/ConvNet (Default: 2) :param maxlen: maximum number of words in a sentence (Default: 15) :param vecsize: length of the embedded vectors in the model (Default: 100) :param cnn_dropout: dropout rate for CNN/ConvNet (Default: 0.0) :param final_activation: activation function. Options: softplus, softsign, relu, tanh, sigmoid, hard_sigmoid, linear. (Default: 'softmax') :param dense_wl2reg: L2 regularization coefficient (Default: 0.0) :param dense_bl2reg: L2 regularization coefficient for bias (Default: 0.0) :param optimizer: optimizer for gradient descent. Options: sgd, rmsprop, adagrad, adadelta, adam, adamax, nadam. (Default: adam) :param with_gensim: boolean variable to indicate if the word-embeddings being used derived from a Gensim's Word2Vec model. (Default: True) :return: keras model (`Sequential` or`Model`) for CNN/ConvNet for Word-Embeddings :type nb_labels: int :type wvmodel: gensim.models.keyedvectors.KeyedVectors :type nb_filters: int :type n_gram: int :type maxlen: int :type vecsize: int :type cnn_dropout: float :type final_activation: str :type dense_wl2reg: float :type dense_bl2reg: float :type optimizer: str :type with_gensim: bool :rtype: keras.models.Sequential or keras.models.Model """ if with_gensim == True: embedding_layer = wvmodel.get_embedding_layer() sequence_input = Input(shape=(maxlen, ), dtype='int32') x = embedding_layer(sequence_input) x = Conv1D(filters=nb_filters, kernel_size=n_gram, padding='valid', activation='relu', input_shape=(maxlen, vecsize))(x) if cnn_dropout > 0.0: x = Dropout(cnn_dropout)(x) x = MaxPooling1D(pool_size=maxlen - n_gram + 1)(x) x = Flatten()(x) x = Dense(nb_labels, activation=final_activation, kernel_regularizer=l2(dense_wl2reg), bias_regularizer=l2(dense_bl2reg))(x) model = Model(sequence_input, x) model.compile(loss='categorical_crossentropy', optimizer=optimizer) else: model = Sequential() model.add( Conv1D(filters=nb_filters, kernel_size=n_gram, padding='valid', activation='relu', input_shape=(maxlen, vecsize))) if cnn_dropout > 0.0: model.add(Dropout(cnn_dropout)) model.add(MaxPooling1D(pool_size=maxlen - n_gram + 1)) model.add(Flatten()) model.add( Dense(nb_labels, activation=final_activation, kernel_regularizer=l2(dense_wl2reg), bias_regularizer=l2(dense_bl2reg))) model.compile(loss='categorical_crossentropy', optimizer=optimizer) return model
def BuildDiscriminator(summary=True, spectral_normalization=True, batch_normalization=False, bn_momentum=0.9, bn_epsilon=0.00002, resnet=True, name='Discriminator', plot=False): if resnet: model_input = Input(shape=(32, 32, 3)) resblock_1 = ResBlock(input_shape=(32, 32, 3), channels=128, sampling='down', batch_normalization=True, spectral_normalization=spectral_normalization, name='Discriminator_resblock_Down_1') h = resblock_1(model_input) resblock_2 = ResBlock(input_shape=(16, 16, 128), channels=128, sampling='down', batch_normalization=True, spectral_normalization=spectral_normalization, name='Discriminator_resblock_Down_2') h = resblock_2(h) resblock_3 = ResBlock(input_shape=(8, 8, 128), channels=128, sampling=None, batch_normalization=True, spectral_normalization=spectral_normalization, trainable_sortcut=False, name='Discriminator_resblock_1') h = resblock_3(h) resblock_4 = ResBlock(input_shape=(8, 8, 128), channels=128, sampling=None, batch_normalization=True, spectral_normalization=spectral_normalization, trainable_sortcut=False, name='Discriminator_resblock_2') h = resblock_4(h) h = Activation('relu')(h) h = GlobalSumPooling2D()(h) model_output = DenseSN(1, kernel_initializer='glorot_uniform')(h) model = Model(model_input, model_output, name=name) else: if spectral_normalization: model = Sequential(name=name) model.add( ConvSN2D(64, kernel_size=3, strides=1, kernel_initializer='glorot_uniform', padding='same', input_shape=(32, 32, 3))) model.add(LeakyReLU(0.1)) model.add( ConvSN2D(64, kernel_size=4, strides=2, kernel_initializer='glorot_uniform', padding='same')) model.add(LeakyReLU(0.1)) model.add( ConvSN2D(128, kernel_size=3, strides=1, kernel_initializer='glorot_uniform', padding='same')) model.add(LeakyReLU(0.1)) model.add( ConvSN2D(128, kernel_size=4, strides=2, kernel_initializer='glorot_uniform', padding='same')) model.add(LeakyReLU(0.1)) model.add( ConvSN2D(256, kernel_size=3, strides=1, kernel_initializer='glorot_uniform', padding='same')) model.add(LeakyReLU(0.1)) model.add( ConvSN2D(256, kernel_size=4, strides=2, kernel_initializer='glorot_uniform', padding='same')) model.add(LeakyReLU(0.1)) model.add( ConvSN2D(512, kernel_size=3, strides=1, kernel_initializer='glorot_uniform', padding='same')) model.add(LeakyReLU(0.1)) model.add(GlobalSumPooling2D()) model.add(DenseSN(1, kernel_initializer='glorot_uniform')) else: model = Sequential(name=name) model.add( Conv2D(64, kernel_size=3, strides=1, kernel_initializer='glorot_uniform', padding='same', input_shape=(32, 32, 3))) model.add(LeakyReLU(0.1)) model.add( Conv2D(64, kernel_size=4, strides=2, kernel_initializer='glorot_uniform', padding='same')) model.add(LeakyReLU(0.1)) model.add( Conv2D(128, kernel_size=3, strides=1, kernel_initializer='glorot_uniform', padding='same')) model.add(LeakyReLU(0.1)) model.add( Conv2D(128, kernel_size=4, strides=2, kernel_initializer='glorot_uniform', padding='same')) model.add(LeakyReLU(0.1)) model.add( Conv2D(256, kernel_size=3, strides=1, kernel_initializer='glorot_uniform', padding='same')) model.add(LeakyReLU(0.1)) model.add( Conv2D(256, kernel_size=4, strides=2, kernel_initializer='glorot_uniform', padding='same')) model.add(LeakyReLU(0.1)) model.add( Conv2D(512, kernel_size=3, strides=1, kernel_initializer='glorot_uniform', padding='same')) model.add(LeakyReLU(0.1)) model.add(GlobalSumPooling2D()) model.add(Dense(1, kernel_initializer='glorot_uniform')) if plot: plot_model(model, name + '.png', show_layer_names=True) if summary: print('Discriminator') print('Spectral Normalization: {}'.format(spectral_normalization)) model.summary() return model
def CLSTMWordEmbed(nb_labels, wvmodel=None, nb_filters=1200, n_gram=2, maxlen=15, vecsize=100, cnn_dropout=0.0, nb_rnnoutdim=1200, rnn_dropout=0.2, final_activation='softmax', dense_wl2reg=0.0, dense_bl2reg=0.0, optimizer='adam', with_gensim=False): """ Returns the C-LSTM neural networks for word-embedded vectors. Reference: Chunting Zhou, Chonglin Sun, Zhiyuan Liu, Francis Lau, "A C-LSTM Neural Network for Text Classification," (arXiv:1511.08630). [`arXiv <https://arxiv.org/abs/1511.08630>`_] :param nb_labels: number of class labels :param wvmodel: pre-trained Gensim word2vec model :param nb_filters: number of filters (Default: 1200) :param n_gram: n-gram, or window size of CNN/ConvNet (Default: 2) :param maxlen: maximum number of words in a sentence (Default: 15) :param vecsize: length of the embedded vectors in the model (Default: 100) :param cnn_dropout: dropout rate for CNN/ConvNet (Default: 0.0) :param nb_rnnoutdim: output dimension for the LSTM networks (Default: 1200) :param rnn_dropout: dropout rate for LSTM (Default: 0.2) :param final_activation: activation function. Options: softplus, softsign, relu, tanh, sigmoid, hard_sigmoid, linear. (Default: 'softmax') :param dense_wl2reg: L2 regularization coefficient (Default: 0.0) :param dense_bl2reg: L2 regularization coefficient for bias (Default: 0.0) :param optimizer: optimizer for gradient descent. Options: sgd, rmsprop, adagrad, adadelta, adam, adamax, nadam. (Default: adam) :return: keras sequantial model for CNN/ConvNet for Word-Embeddings :type nb_labels: int :type wvmodel: gensim.models.keyedvectors.KeyedVectors :type nb_filters: int :type n_gram: int :type maxlen: int :type vecsize: int :type cnn_dropout: float :type nb_rnnoutdim: int :type rnn_dropout: float :type final_activation: str :type dense_wl2reg: float :type dense_bl2reg: float :type optimizer: str :type with_gensim: bool :rtype: keras.models.Sequential or keras.models.Model """ if with_gensim == True: embedding_layer = wvmodel.get_embedding_layer() sequence_input = Input(shape=(maxlen, ), dtype='int32') x = embedding_layer(sequence_input) x = Conv1D(filters=nb_filters, kernel_size=n_gram, padding='valid', activation='relu', input_shape=(maxlen, vecsize))(x) if cnn_dropout > 0.0: x = Dropout(cnn_dropout)(x) x = MaxPooling1D(pool_size=maxlen - n_gram + 1)(x) x = LSTM(nb_rnnoutdim)(x) if rnn_dropout > 0.0: x = Dropout(rnn_dropout)(x) x = Dense( nb_labels, activation=final_activation, kernel_regularizer=l2(dense_wl2reg), bias_regularizer=l2(dense_bl2reg), )(x) model = Model(sequence_input, x) model.compile(loss='categorical_crossentropy', optimizer=optimizer) else: model = Sequential() model.add( Conv1D(filters=nb_filters, kernel_size=n_gram, padding='valid', activation='relu', input_shape=(maxlen, vecsize))) if cnn_dropout > 0.0: model.add(Dropout(cnn_dropout)) model.add(MaxPooling1D(pool_size=maxlen - n_gram + 1)) model.add(LSTM(nb_rnnoutdim)) if rnn_dropout > 0.0: model.add(Dropout(rnn_dropout)) model.add( Dense( nb_labels, activation=final_activation, kernel_regularizer=l2(dense_wl2reg), bias_regularizer=l2(dense_bl2reg), )) model.compile(loss='categorical_crossentropy', optimizer=optimizer) return model
class Model(ModelHelper): """ Class that encapsulates an LSTM model that we have been building. This class makes it easy to work with the different functions used to work with the model. Parameters ---------- path: str Location to load model from. data: pandas DataFrame Pandas dataframe with the variable from `variable` privided. This is used to eventually train and run the model. variable: str Variable to use from `data`. predicted_period_size: int Number of predicted time periods predictions to make. holdout: int, default 0 Number of periods to hold-out from the training set. """ def __init__(self, data, variable, predicted_period_size, path=None, holdout=0, normalize=True, model_type='sequential'): self.path = path self.data = data self.variable = variable self.predicted_period_size = predicted_period_size self.holdout = holdout if model_type in ['sequential', 'functional']: self.model_type = model_type else: raise ValueError( 'Wrong model type: It can be either "sequential" or "functional"' ) if path: self.model = load_model(self.path, custom_objects={'loss': tilted_loss(0.5)}) self.X, self.Y = self.__prepare_data(normalize=normalize) self.__extract_last_series_value() super().__init__() def __extract_last_series_value(self): """ Method for extracting the last value from a series prior to normalization. This value is then used for denormalizing the set. """ if self.remainder: self.last_value = self.data.sort_values('date', ascending=False)\ [:-self.remainder][self.variable].values[0] self.last_date = self.data.sort_values('date', ascending=False)\ [:-self.remainder]['date'].values[0] else: self.last_value = self.data.sort_values('date', ascending=False)\ [self.variable].values[0] self.last_date = self.data.sort_values('date', ascending=False)\ ['date'].values[0] def __prepare_data(self, normalize): """ Prepares data for model. Parameters ---------- normalize: bool If the method should normalize data or not. Normalization is done using normalizations.point_relative_normalization() Returns ------- X and Y prepared for training. """ series = self.data[self.variable].values self.remainder = len(series) % self.predicted_period_size groups = self.create_groups(data=series, group_size=self.predicted_period_size, normalize=normalize) if self.holdout == 0: self.holdout_groups = [] else: self.holdout_groups = groups[::-self.holdout] groups = groups[::-self.holdout] self.default_number_of_periods = groups.shape[1] - 1 return self.split_lstm_input(groups) def build(self, number_of_periods=None, period_length=7, batch_size=1, loss="mse"): """ Builds an LSTM model using Keras. This function works as a simple wrapper for a manually created model. Parameters ---------- period_length: int The size of each observation used as input. number_of_periods: int, default None The number of periods available in the dataset. If None, the model will be built using all available periods - 1 (used for validation). batch_size: int The size of the batch used in each training period. Returns ------- model: Keras model Compiled Keras model that can be trained and stored in disk. """ if not number_of_periods: number_of_periods = self.default_number_of_periods if self.model_type == 'sequential': self.model = Sequential() self.model.add( LSTM(units=period_length, batch_input_shape=(batch_size, number_of_periods, period_length), input_shape=(number_of_periods, period_length), return_sequences=False, stateful=False)) self.model.add(Dense(units=period_length)) self.model.add(Activation("linear")) self.model.compile(loss=loss, optimizer="rmsprop") else: input = Input(shape=(number_of_periods, period_length)) x = LSTM(units=period_length, batch_input_shape=(batch_size, number_of_periods, period_length), input_shape=(number_of_periods, period_length), return_sequences=False, stateful=False)(input) x0 = Dense(units=period_length, activation='linear')(x) x1 = Dense(units=period_length, activation='linear')(x) x2 = Dense(units=period_length, activation='linear')(x) self.model = Functional_Model(input, [x0, x1, x2]) self.model.compile(loss=loss, optimizer="rmsprop") return self.model def save(self, path): """ Stores trained model in disk. Useful for storing trained models. Parameters ---------- path: str Location of where to store model. """ return self.model.save(path) def predict(self, output=None, denormalized=False, return_dict=False): """ Makes a prediction based on input data. Parameters ---------- output: int, default None Output index in a multi-output model. It is unused in a single-output model denormalized: bool, default True If method should denormalize data. Method will use the normalizations.point_relative_normalization() return_dict: bool, default False If should return dict that can be serializable as JSON. Useful for returning prediction results with dates as keys. """ if self.model_type == 'sequential': predictions = self.model.predict(x=self.X) else: predictions = self.model.predict(x=self.X)[output] if denormalized: predictions = point_relative_normalization( series=predictions, reverse=True, last_value=self.last_value) dates = [] base_date = datetime.strptime(self.last_date, '%Y-%m-%d') for i in range(1, len(predictions[0]) + 1): d = (base_date + timedelta(days=i)).strftime('%Y-%m-%d') dates.append(d) results = [] for d, p in zip(dates, predictions[0].tolist()): results.append({'date': d, 'prediction': round(p, 2)}) if return_dict: return results else: return predictions[0] def train(self, data=None, epochs=300, verbose=0): """ Trains model using data from class. Parameters ---------- X: pandas DataFrame Pandas dataframe with `variable` used to fir model for the fist time. epochs: int Number of epochs to train model for. verbose: int, default 0 Verbosity level to use. The default (0) means that nothing is printed on the screen. Returns ------- Metrics from the model history. """ if data is not None: self.data = data self.X, self.Y = self.__prepare_data(normalize=self.normalize) self.__extract_last_series_value() if self.model_type == 'sequential': self.train_history = self.model.fit(x=self.X, y=self.Y, batch_size=1, epochs=epochs, verbose=verbose, shuffle=False) else: self.train_history = self.model.fit(x=self.X, y=[self.Y, self.Y, self.Y], batch_size=1, epochs=epochs, verbose=verbose, shuffle=False) self.last_trained = datetime.now().strftime('%Y-%m-%d %H:%M:%S') return self.train_history def evaluate(self, metrics=['mse', 'rmse', 'mape']): """ Evaluates model using provided metrics. The evaluation """ y = point_relative_normalization(series=self.Y[0], reverse=True, last_value=self.last_value) results = {} for metric in metrics: if metric == 'mse': r = round(self.mse(A=self.Y[0], B=self.predict()), 2) else: r = round( getattr(self, metric)(A=self.predict(denormalized=True)[0], B=y), 2) results[metric] = r return results
def test_sequential_regression(): from keras.models import Sequential, Model from keras.layers import Merge, Embedding, BatchNormalization, LSTM, InputLayer, Input # start with a basic example of using a Sequential model # inside the functional API seq = Sequential() seq.add(Dense(input_dim=10, output_dim=10)) x = Input(shape=(10,)) y = seq(x) model = Model(x, y) model.compile('rmsprop', 'mse') weights = model.get_weights() # test serialization config = model.get_config() model = Model.from_config(config) model.compile('rmsprop', 'mse') model.set_weights(weights) # more advanced model with multiple branches branch_1 = Sequential(name='branch_1') branch_1.add(Embedding(input_dim=100, output_dim=10, input_length=2, name='embed_1')) branch_1.add(LSTM(32, name='lstm_1')) branch_2 = Sequential(name='branch_2') branch_2.add(Dense(32, input_shape=(8,), name='dense_2')) branch_3 = Sequential(name='branch_3') branch_3.add(Dense(32, input_shape=(6,), name='dense_3')) branch_1_2 = Sequential([Merge([branch_1, branch_2], mode='concat')], name='branch_1_2') branch_1_2.add(Dense(16, name='dense_1_2-0')) # test whether impromtu input_shape breaks the model branch_1_2.add(Dense(16, input_shape=(16,), name='dense_1_2-1')) model = Sequential([Merge([branch_1_2, branch_3], mode='concat')], name='final') model.add(Dense(16, name='dense_final')) model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) model.summary() x = (100 * np.random.random((100, 2))).astype('int32') y = np.random.random((100, 8)) z = np.random.random((100, 6)) labels = np.random.random((100, 16)) model.fit([x, y, z], labels, nb_epoch=1) # test if Sequential can be called in the functional API a = Input(shape=(2,), dtype='int32') b = Input(shape=(8,)) c = Input(shape=(6,)) o = model([a, b, c]) outer_model = Model([a, b, c], o) outer_model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) outer_model.fit([x, y, z], labels, nb_epoch=1) # test serialization config = outer_model.get_config() outer_model = Model.from_config(config) outer_model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) outer_model.fit([x, y, z], labels, nb_epoch=1)
def make_cifar10_model(**args): nb_classes = 10 img_rows, img_cols = 32, 32 # use 1 kernel size for all convolutional layers ks = args.get('kernel_size', 3) # tune the number of filters for each convolution layer nb_filters1 = args.get('nb_filters1', 48) nb_filters2 = args.get('nb_filters2', 96) nb_filters3 = args.get('nb_filters3', 192) # tune the pool size once ps = args.get('pool_size', 2) pool_size = (ps,ps) # tune the dropout rates independently #do1 = args.get('dropout1', 0.25) #do2 = args.get('dropout2', 0.25) #do3 = args.get('dropout3', 0.25) do4 = args.get('dropout1', 0.25) do5 = args.get('dropout2', 0.5) #do1 = args.get('dropout1', 0.) #do2 = args.get('dropout2', 0.) #do3 = args.get('dropout3', 0.) #do4 = args.get('dropout4', 0.) #do5 = args.get('dropout5', 0.) # tune the dense layers independently dense1 = args.get('dense1', 512) dense2 = args.get('dense2', 256) if K.image_dim_ordering() == 'th': input_shape = (3, img_rows, img_cols) else: input_shape = (img_rows, img_cols, 3) #act = 'sigmoid' act = 'relu' i = Input( input_shape) l = Conv2D(nb_filters1,( ks, ks), padding='same', activation = act)(i) #l = Conv2D(nb_filters1, (ks, ks), activation=act)(l) l = MaxPooling2D(pool_size=pool_size)(l) #l = Dropout(do1)(l) l = Conv2D(nb_filters2, (ks, ks), padding='same',activation=act)(l) #l = Conv2D(nb_filters2, (ks, ks))(l) l = MaxPooling2D(pool_size=pool_size)(l) #l = Dropout(do2)(l) l = Conv2D(nb_filters3, (ks, ks), padding='same',activation=act)(l) #l = Conv2D(nb_filters3, (ks, ks))(l) l = MaxPooling2D(pool_size=pool_size)(l) #l = Dropout(do3)(l) l = Flatten()(l) l = Dense(dense1,activation=act)(l) l = Dropout(do4)(l) l = Dense(dense2,activation=act)(l) l =Dropout(do5)(l) o = Dense(nb_classes, activation='softmax')(l) model = Model(inputs=i, outputs=o) model.summary() return model model = Sequential() model.add(Convolution2D(nb_filters1, ks, ks, border_mode='same', input_shape=input_shape)) model.add(Activation(act)) model.add(Convolution2D(nb_filters1, ks, ks)) model.add(Activation(act)) model.add(MaxPooling2D(pool_size=pool_size)) model.add(Dropout(do1)) model.add(Convolution2D(nb_filters2, ks, ks, border_mode='same')) model.add(Activation(act)) model.add(Convolution2D(nb_filters2, ks, ks)) model.add(Activation(act)) model.add(MaxPooling2D(pool_size=pool_size)) model.add(Dropout(do2)) model.add(Convolution2D(nb_filters3, ks, ks, border_mode='same')) model.add(Activation(act)) model.add(Convolution2D(nb_filters3, ks, ks)) model.add(Activation(act)) model.add(MaxPooling2D(pool_size=pool_size)) model.add(Dropout(do3)) model.add(Flatten()) model.add(Dense(dense1)) model.add(Activation(act)) model.add(Dropout(do4)) model.add(Dense(dense2)) model.add(Activation(act)) model.add(Dropout(do5)) model.add(Dense(nb_classes, activation='softmax')) return model
def buildModel(self): B = Input(shape=(3, )) b = Dense(5, activation="relu")(B) inputs = [B] merges = [b] S = Input(shape=[2, 60, 1]) inputs.append(S) h = Conv2D(2048, (3, 1), padding="same")(S) h = LeakyReLU(0.001)(h) merges.append(h) m = concatenate(merges, axis=1) m = Dense(1024)(m) m = LeakyReLU(0.001)(m) V = Dense(2, activation='softmax')(m) model = Model(input=inputs, output=V) model.summary() model = Sequential() model.add( Dense(70, input_shape=(42, ), kernel_initializer="lecun_uniform")) model.add(Activation('relu')) model.add(Dense(28)) model.add(Activation('relu')) model.add(Dense(12)) model.add(Activation('relu')) # model.add(Dense(2000)) # model.add(Dropout(0.2)) # model.add(Activation('relu')) # model.add(Dense(5000)) # model.add(Dropout(0.2)) # model.add(Activation('relu')) model.add(Dense(3, kernel_initializer="lecun_uniform")) model.add(Activation('linear')) return model
feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) # Adjust preditions to each spatial grid point and anchor size. box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs def weather(feats, anchors, num_classes, input_shape, [y1, y2, y3], calc_loss=False): x = Model(inputs=feats, outputs= prob) x.add(Conv2D(feats, (3,3), padding='same')) x.add(Activation('relu')) x.add(MaxPooling2D(pool_size=(2,2), strides=(1,1), padding='same')) x.add(flatten()) x.add(Dense(512, kernel_constraint=maxnorm(3))) x.add(Activation('relu')) x.add(Dense(num_classes)) x.add(activation('softmax')) prob = x return prob def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape): '''Get corrected boxes''' box_yx = box_xy[..., ::-1] box_hw = box_wh[..., ::-1]
class UrlDetector: def __init__(self, model="simple_nn", vocab_size=87, max_length=200): """ Initiates URL detector model. Default parameters values are taken from J. Saxe et al. - eXpose: A Character- Level Convolutional Neural Network with Embeddings For Detecting Malicious URLs, File Paths and Registry Keys Parameters ---------- model: {"simple_nn", "big_conv_nn"} Path of csv file containing the dataset. max_length: Maximum length of considered URL (crops longer URL). vocab_size: Size of alphabet (letters, digits, symbols...). """ self.max_length = max_length self.vocab_size = vocab_size self.model = Model() self.build_model(model) def build_model(self, model: str): """ Builds given model. Parameters ---------- model: {"simple_nn", "big_conv_nn"} Path of csv file containing the dataset. """ if model == "simple_nn": self._build_simple_nn() elif model == "big_conv_nn": self._build_big_conv_nn() def _build_simple_nn(self): """Defines and compiles a simple NN.""" self.model = Sequential() self.model.add( Embedding(self.vocab_size, 32, input_length=self.max_length)) self.model.add(Flatten()) self.model.add(Dense(1, activation='sigmoid')) self.model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) print(self.model.summary()) def _get_complete_conv_layer(self, filter_length, nb_filter): """Wrap up for convolutional layer followed with a summing pool layer, batch normalization and dropout.""" model = Sequential() model.add( Convolution1D(nb_filter=nb_filter, input_shape=(self.max_length, 32), filter_length=filter_length, border_mode='same', activation='relu', subsample_length=1)) # model.add(BatchNormalization()) model.add(Lambda(self._sum_1d, output_shape=(nb_filter, ))) # model.add(BatchNormalization(mode=0)) model.add(Dropout(0.5)) return model @staticmethod def _sum_1d(x): """Sum layers on column axis.""" return K.sum(x, axis=1) def _build_big_conv_nn(self): """Defines and compiles same CNN as J. Saxe et al. - eXpose: A Character-Level Convolutional Neural Network with Embeddings For Detecting Malicious URLs, File Paths and Registry Keys.""" main_input = Input(shape=(self.max_length, ), dtype='int32', name='main_input') embedding = Embedding(input_dim=self.vocab_size, output_dim=32, input_length=self.max_length, dropout=0)(main_input) conv1 = self._get_complete_conv_layer(2, 256)(embedding) conv2 = self._get_complete_conv_layer(3, 256)(embedding) conv3 = self._get_complete_conv_layer(4, 256)(embedding) conv4 = self._get_complete_conv_layer(5, 256)(embedding) merged = merge.Concatenate()([conv1, conv2, conv3, conv4]) merged = BatchNormalization()(merged) middle = Dense(1024, activation='relu')(merged) middle = BatchNormalization()(middle) middle = Dropout(0.5)(middle) middle = Dense(1024, activation='relu')(middle) middle = BatchNormalization()(middle) middle = Dropout(0.5)(middle) middle = Dense(1024, activation='relu')(middle) middle = BatchNormalization()(middle) middle = Dropout(0.5)(middle) output = Dense(1, activation='sigmoid')(middle) self.model = Model(input=main_input, output=output) optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) self.model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['acc', self.f1]) self.model.summary() def _get_padded_docs(self, encoded_docs: list) -> list: """Makes the data readable for the model.""" padded_docs = pad_sequences(encoded_docs, maxlen=self.max_length, padding='post') return padded_docs def fit(self, encoded_docs: list, labels: list, batch_size=128, epochs=5, verbose=1, training_logs="training_logs", validation_data=None, validation_split=0.2): """ Trains the model with Tensorboard monitoring. Data should be shuffled before calling this function because the validation set is taken from the last samples of the provided dataset. Parameters ---------- encoded_docs One-hot encoded URLs. labels Labels (0/1) of URLs. batch_size Number of samples per gradient update. epochs Number of epochs to train on. verbose Whether to display information (loss, accuracy...) during training. training_logs Directory where to store Tensorboard logs. validation_data Tuple with the validation data (X_val, y_val) validation_split % of data to put in the validation set. Only used if 'validation_data=None'. """ if not os.path.exists(training_logs): os.makedirs(training_logs) tensorboard = TensorBoard(log_dir=training_logs) padded_docs = self._get_padded_docs(encoded_docs) if validation_data is None: self.model.fit(padded_docs, labels, batch_size=batch_size, epochs=epochs, validation_split=validation_split, verbose=verbose, callbacks=[tensorboard]) else: one_hot_val_urls, y_val = validation_data X_val = self._get_padded_docs(one_hot_val_urls) self.model.fit(padded_docs, labels, batch_size=batch_size, epochs=epochs, validation_data=(X_val, y_val), verbose=verbose, callbacks=[tensorboard]) def evaluate(self, encoded_docs: list, labels: list): """Computes the accuracy of given data.""" padded_docs = self._get_padded_docs(encoded_docs) loss, accuracy, f1score = self.model.evaluate(padded_docs, labels, verbose=0) print('Accuracy: %f' % (accuracy * 100)) print('F1-score: %f' % f1score) def predict_proba(self, encoded_docs: list) -> np.ndarray: """Predicts the probabilities of given data.""" padded_docs = self._get_padded_docs(encoded_docs) probabilities = self.model.predict(padded_docs) return probabilities def plot_roc_curve(self, encoded_docs: list, labels: list): """Plots the ROC curve and computes its AUC.""" probabilities = self.predict_proba(encoded_docs) fpr, tpr, thresholds = roc_curve(labels, probabilities) roc_auc = auc(fpr, tpr) # Figure plt.figure() lw = 2 plt.plot(fpr, tpr, color='darkorange', lw=lw, label='ROC curve (AUC = %0.2f)' % roc_auc) plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic example') plt.legend(loc="lower right") @staticmethod def f1(y_true: np.array, y_pred: np.array): """Computes F1-score metric. Code taken from: https://stackoverflow.com/a/45305384""" def compute_recall(y_true, y_pred): """Recall metric. Only computes a batch-wise average of recall. Computes the recall, a metric for multi-label classification of how many relevant items are selected. """ true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) possible_positives = K.sum(K.round(K.clip(y_true, 0, 1))) recall_score = true_positives / (possible_positives + K.epsilon()) return recall_score def compute_precision(y_true, y_pred): """Precision metric. Only computes a batch-wise average of precision. Computes the precision, a metric for multi-label classification of how many selected items are relevant. """ true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1))) precision_score = true_positives / (predicted_positives + K.epsilon()) return precision_score precision = compute_precision(y_true, y_pred) recall = compute_recall(y_true, y_pred) return 2 * ((precision * recall) / (precision + recall + K.epsilon()))
test_size = 0.2, stratify=labels) yTrain = np_utils.to_categorical(yTrain, 2) yTest = np_utils.to_categorical(yTest, 2) import keras from keras.models import Sequential from keras.layers import Dense, Dropout, Activation from keras.optimizers import SGD model = Sequential() # Dense(64) is a fully-connected layer with 64 hidden units. # in the first layer, you must specify the expected input data shape: # here, 20-dimensional vectors. model.add(Dense(64, activation='relu', input_shape=(w,h,3))) model.add(Dropout(0.5)) model.add(Dense(64, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(2, activation='softmax')) sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) model.fit(Xtrain, yTrain, batch_size=w, nb_epoch=10, verbose=1) score = model.evaluate(Xtest, yTest, batch_size=128)
noise_dim = EMBEDDING_DIM * 2 pretrain(G, D, lstm_output, y_train, n_samples, noise_dim) d_loss, g_loss = train(GAN, G, D, lstm_output, y_train, n_samples, noise_dim, verbose=True) data_and_gen, _ = sample_data_and_gen(G, lstm_output, y_train, n_samples, noise_dim) X_train = np.concatenate((lstm_output, data_and_gen)) new_y_train = [] for i in range(n_samples*2): new_y_train.append([0,1]) y_train = np.concatenate((y_train, np.array(new_y_train))) model = Sequential() model.add(Dense(2, activation='softmax', name = 'Dense_5')) model.compile(loss=[focal_loss(gamma=2., alpha=.25)], optimizer='rmsprop', metrics=['acc']) model.fit(X_train, y_train, epochs=5, batch_size=1024) X_test = lstm_test_output """ from sklearn.manifold import TSNE import matplotlib.pyplot as plt import random random.seed(45) index = random.sample(range(len(lstm_output)), 10000) X = lstm_output[index,:] X = np.concatenate((X, np.array(data_and_gen)[n_samples:,].astype(int))) Y = y_train[index,1] for i in range(n_samples): Y = np.concatenate((Y, [2]))
pickle.dump(hidden_rep, reps) reps.close() weights = np.array(((model.layers[1]).get_weights())[0]) new_file = open('weights.txt', 'wb') pickle.dump(weights, new_file) new_file.close() #print data val = model.predict(data) #print emotion #print np.max(hidden_rep), np.min(hidden_rep) #print hidden_rep.shape #print val.shape #print data.shape #print e #imsave('original.jpg', np.reshape(data[0, :], (64, 64))) #imsave('predicted.jpg', np.reshape(val[0, :], (64, 64))) #print np.array(model.predict(data)).shape #new = open('AN_10_rep.txt', 'rb') #print pickle.load(new) ''' model = Sequential() model.add(Dense(30, input_dim = 4096, init = 'uniform')) model.add(Activation('linear')) model.add(Dense(4096, init = data[0])) model.add(Activation('linear')) '''
def get_model(name, X_train, y_train, embeddings, batch_size, nb_epoch, max_len, max_features, nb_classes): print('Building model', name) # get correct loss loss_function = 'categorical_crossentropy' if name == 'LSTM+ATT': # this is the placeholder tensor for the input sequences sequence = Input(shape=(max_len,), dtype='int32') # this embedding layer will transform the sequences of integers # into vectors of size 128 embedded = Embedding(embeddings.shape[0], embeddings.shape[1], input_length=max_len, weights=[embeddings])( sequence) # 4 convolution layers (each 1000 filters) cnn = [Convolution1D(filter_length=filters, nb_filter=1000, border_mode='same') for filters in [2, 3, 5, 7]] # concatenate question = merge([cnn(embedded) for cnn in cnn], mode='concat') # create attention vector from max-pooled convoluted maxpool = Lambda(lambda x: keras_backend.max(x, axis=1, keepdims=False), output_shape=lambda x: (x[0], x[2])) attention_vector = maxpool(question) forwards = AttentionLSTM(64, attention_vector)(embedded) backwards = AttentionLSTM(64, attention_vector, go_backwards=True)(embedded) # concatenate the outputs of the 2 LSTMs answer_rnn = merge([forwards, backwards], mode='concat', concat_axis=-1) after_dropout = Dropout(0.5)(answer_rnn) # we have 17 classes output = Dense(nb_classes, activation='softmax')(after_dropout) model = Model(input=sequence, output=output) # try using different optimizers and different optimizer configs model.compile('adam', loss_function, metrics=['accuracy']) # model.compile('adam', 'hinge', metrics=['hinge']) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, validation_split=0.1, verbose=0) return model if name == 'LSTM': # this is the placeholder tensor for the input sequences sequence = Input(shape=(max_len,), dtype='int32') # this embedding layer will transform the sequences of integers # into vectors of size 128 embedded = Embedding(embeddings.shape[0], embeddings.shape[1], input_length=max_len, weights=[embeddings])( sequence) # apply forwards and backward LSTM forwards = LSTM(64)(embedded) backwards = LSTM(64, go_backwards=True)(embedded) # concatenate the outputs of the 2 LSTMs answer_rnn = merge([forwards, backwards], mode='concat', concat_axis=-1) after_dropout = Dropout(0.5)(answer_rnn) # we have 17 classes output = Dense(nb_classes, activation='softmax')(after_dropout) model = Model(input=sequence, output=output) # try using different optimizers and different optimizer configs model.compile('adam', loss_function, metrics=['accuracy']) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, validation_split=0.1, verbose=0) return model if name == 'MLP': model = Sequential() model.add(Dense(512, input_shape=(max_len,))) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(nb_classes)) model.add(Activation('softmax')) model.compile(loss=loss_function, optimizer='adam', metrics=['accuracy']) model.fit(X_train, y_train, nb_epoch=nb_epoch, batch_size=batch_size, validation_split=0.1, verbose=0) return model
#dim in BatchNormalization #for other np multi dim array then specify an axis we mean to collape that axix while #keeping all the other axies the same #When we compute a BatchNormalization along an axis, we preserve the dimensions of the array, #and we normalize with respect to the mean and standard deviation over every other axis #meaning carry out normalisation within separate channels X = BatchNormalization(axis = 3, name = bn_name_base + '2a')(X) # ---------------------------------------------------------------------------------------- #Udacity from keras.models import Sequential # Create the Sequential model model = Sequential() model.add(Flatten(input_shape=(32, 32, 3))) model.add(Dense(100)) model.add(Activation('relu')) model.add(Conv2D(32,kernel_size = (3,3),activation = 'relu',input_shape = input_shape)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) #complie the model specify how to train the model compile(optimizer, loss=None, metrics=None, loss_weights=None, sample_weight_mode=None, weighted_metrics=None, target_tensors=None) #e.g. model.compile('adam', 'categorical_crossentropy', ['accuracy']) #batch_size was set to 32 by default model.evaluate(x=None, y=None, batch_size=None, verbose=1, sample_weight=None, steps=None, callbacks=None)
class SeqModel: # 使用happynoom描述的网络模型 def __init__(self, input_shape=None, learning_rate=0.001, n_layers=2, n_hidden=8, rate_dropout=0.2, loss=risk_estimation): self.input_shape = input_shape self.learning_rate = learning_rate self.n_layers = n_layers self.n_hidden = n_hidden self.rate_dropout = rate_dropout self.loss = loss self.model = None def lstmModel(self): self.model = Sequential() self.model.add(GaussianNoise(stddev=0.01, input_shape=self.input_shape)) for i in range(0, self.n_layers - 1): self.model.add( LSTM(self.n_hidden * 4, return_sequences=True, activation='softsign', recurrent_activation='hard_sigmoid', kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer=initializers.RandomNormal(mean=0.0, stddev=0.05, seed=None), dropout=self.rate_dropout, recurrent_dropout=self.rate_dropout)) self.model.add( LSTM(self.n_hidden, return_sequences=False, activation='softsign', recurrent_activation='hard_sigmoid', kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer=initializers.RandomNormal(mean=0.0, stddev=0.05, seed=None), dropout=self.rate_dropout, recurrent_dropout=self.rate_dropout)) self.model.add(Dense(256, activation='relu')) self.model.add(Dropout(self.rate_dropout)) self.model.add( BatchNormalization(axis=-1, beta_initializer=initializers.RandomNormal( mean=0.0, stddev=0.05, seed=None))) self.model.add(Dense(256, activation='relu')) self.model.add(Dropout(self.rate_dropout)) self.model.add( BatchNormalization(axis=-1, beta_initializer=initializers.RandomNormal( mean=0.0, stddev=0.05, seed=None))) self.model.add(Dense(output_size, activation='softmax')) opt = RMSprop(lr=self.learning_rate) self.model.compile(loss=risk_estimation_sum, optimizer=opt, metrics=['accuracy']) self.model.summary() return self.model def attention_3d_block(self, inputs): input_dim = int(inputs.shape[2]) a = Permute((2, 1))(inputs) a = Reshape((input_dim, time_step))(a) a = Dense(time_step, activation='softmax')(a) # single_attention_vector # a = Lambda(lambda x: K.mean(x, axis=1), name='dim_reduction')(a) # a = RepeatVector(input_dim)(a) a_probs = Permute((2, 1), name='attention_vec')(a) output_attention_mul = Multiply()([inputs, a_probs]) return output_attention_mul def lstmAttentionModel(self): K.clear_session() # 清除之前的模型,省得压满内存 inputs = Input(shape=( time_step, input_size, )) attention_mul = self.attention_3d_block(inputs) for i in range(0, self.n_layers - 1): attention_mul = LSTM( self.n_hidden * 4, return_sequences=True, activation='softsign', recurrent_activation='hard_sigmoid', kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', dropout=self.rate_dropout, recurrent_dropout=self.rate_dropout)(attention_mul) attention_mul = LSTM( self.n_hidden, return_sequences=False, activation='softsign', recurrent_activation='hard_sigmoid', kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', dropout=self.rate_dropout, recurrent_dropout=self.rate_dropout)(attention_mul) attention_mul = Dense(256, kernel_initializer=initializers.glorot_uniform(), activation='relu')(attention_mul) attention_mul = Dropout(self.rate_dropout)(attention_mul) attention_mul = BatchNormalization( axis=-1, beta_initializer='ones')(attention_mul) attention_mul = Dense(256, kernel_initializer=initializers.glorot_uniform(), activation='relu')(attention_mul) attention_mul = Dropout(self.rate_dropout)(attention_mul) attention_mul = BatchNormalization( axis=-1, beta_initializer='ones')(attention_mul) outputs = Dense(output_size, activation='softmax')(attention_mul) self.model = Model(input=[inputs], output=outputs) opt = RMSprop(lr=self.learning_rate) self.model.compile(loss=risk_estimation, optimizer=opt, metrics=['accuracy']) self.model.summary() return self.model def train(self): # fit network history = self.model.fit(train_x, train_y, epochs=2000, batch_size=2048, verbose=1, shuffle=True, validation_data=(test_x, test_y)) # plot history plt.plot(history.history['loss'], label='train') plt.legend() plt.show() def save(self, path=model_path, type='evaluate', name=None): if name: self.model.save(path + name) return if type == 'evaluate': file = 'lstm_evaluate_' + timestamp + '.h5' else: file = 'lstm_' + timestamp + '.h5' self.model.save(path + file) return def load(self, path=model_path, type='evaluate', version='lastest', model_name=None): if model_name: self.model = load_model(path + model_name, custom_objects={ 'risk_estimation': risk_estimation, 'risk_estimation_sum': risk_estimation_sum }) else: file_names = os.listdir(path) model_files = [] eval_files = [] if version == 'lastest': for file in file_names: if re.search('eval', file) is not None: eval_files.append(file) else: model_files.append(file) if type == 'evaluate': eval_files.sort(reverse=True) model_name = eval_files[0] else: model_files.sort(reverse=True) model_name = model_files[0] print(model_name, 'has loaded') self.model = load_model(path + model_name, custom_objects={ 'risk_estimation': risk_estimation, 'risk_estimation_sum': risk_estimation_sum }) elif version == 'softmax': for file in file_names: if re.search('softmax', file) is not None: model_files.append(file) model_files.sort(reverse=True) model_name = model_files[0] self.model = load_model(path + model_name, custom_objects={ 'risk_estimation': risk_estimation, 'risk_estimation_sum': risk_estimation_sum }) else: self.model = load_model(path + version, custom_objects={ 'risk_estimation': risk_estimation, 'risk_estimation_sum': risk_estimation_sum }) def predict(self, test): predict = [] for sample_index in range(test.shape[0]): test_data = test[sample_index].reshape(1, time_step, input_size) prev = self.model.predict(test_data) predict.append(prev) return np.array(predict)
for l in model.layers[:12]: lrs.append(l) a = Input(shape = input_shape) x = lrs[0](a) for l in lrs[1:]: x = l(x) x = conv2d_bn_cpd(x, 512, 10, 6, 6, name="blabla") x = Conv2D(num_classes, kernel_size=(1,1), activation=None, name="final_conv")(x) x = Flatten()(x) x = (Activation('softmax', name="final_softmax"))(x) model = Model(inputs=a, outputs=x) ''' model.add(Conv2D(num_classes, kernel_size=(1,1))) model.add(Flatten()) model.add(Activation('softmax')) ''' # load model weights, if saved # model.load_weights("weights.best.hdf5") # print("loadad weights!") model.summary() for l in model.layers[:12]: l.trainable = False print("Freezeing: " + l.name) # initiate RMSprop optimizer # opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6)
def BuildGenerator(summary=True, resnet=True, bn_momentum=0.9, bn_epsilon=0.00002, name='Generator', plot=False): if resnet: model_input = Input(shape=(128, )) h = Dense(4 * 4 * 256, kernel_initializer='glorot_uniform')(model_input) h = Reshape((4, 4, 256))(h) resblock_1 = ResBlock(input_shape=(4, 4, 256), sampling='up', bn_epsilon=bn_epsilon, bn_momentum=bn_momentum, name='Generator_resblock_1') h = resblock_1(h) resblock_2 = ResBlock(input_shape=(8, 8, 256), sampling='up', bn_epsilon=bn_epsilon, bn_momentum=bn_momentum, name='Generator_resblock_2') h = resblock_2(h) resblock_3 = ResBlock(input_shape=(16, 16, 256), sampling='up', bn_epsilon=bn_epsilon, bn_momentum=bn_momentum, name='Generator_resblock_3') h = resblock_3(h) h = BatchNormalization(epsilon=bn_epsilon, momentum=bn_momentum)(h) h = Activation('relu')(h) model_output = Conv2D(3, kernel_size=3, strides=1, padding='same', activation='tanh')(h) model = Model(model_input, model_output, name=name) else: model = Sequential(name=name) model.add( Dense(4 * 4 * 512, kernel_initializer='glorot_uniform', input_dim=128)) model.add(Reshape((4, 4, 512))) model.add( Conv2DTranspose(256, kernel_size=4, strides=2, padding='same', activation='relu', kernel_initializer='glorot_uniform')) model.add(BatchNormalization(epsilon=bn_epsilon, momentum=bn_momentum)) model.add( Conv2DTranspose(128, kernel_size=4, strides=2, padding='same', activation='relu', kernel_initializer='glorot_uniform')) model.add(BatchNormalization(epsilon=bn_epsilon, momentum=bn_momentum)) model.add( Conv2DTranspose(64, kernel_size=4, strides=2, padding='same', activation='relu', kernel_initializer='glorot_uniform')) model.add(BatchNormalization(epsilon=bn_epsilon, momentum=bn_momentum)) model.add( Conv2DTranspose(3, kernel_size=3, strides=1, padding='same', activation='tanh')) if plot: plot_model(model, name + '.png', show_layer_names=True) if summary: print("Generator") model.summary() return model
out_a = vision_model(digit_a) out_b = vision_model(digit_b) concatenated = keras.layers.concatenate([out_a, out_b]) out = Dense(1, activation='sigmod')(concatenated) classification_model = Model([digit_a, digit_b], out) """ visual question answer model """ from keras.layers import Conv2D, MaxPooling2D, Flatten from keras.layers import Input, LSTM, Embedding, Dense from keras.models import Model, Sequential # define a vision model that encode out image to vector vision_model = Sequential() vision_model.add(Conv2D(64, (3,3), activation='relu', padding='same', input_shape=(224,224,3))) vision_model.add(Conv2D(64, (3,3), activation='relu')) vision_model.add(MaxPooling2D((2,2))) vision_model.add(Conv2D(128, (3,3), activation='relu', padding='same')) vision_model.add(Conv2D(128, (3,3), activation='relu')) vision_model.add(MaxPooling2D((2,2))) vision_model.add(Conv2D(256, (3,3), activation='relu', padding='same')) vision_model.add(Conv2D(256, (3,3), activation='relu')) vision_model.add(Conv2D(256, (3,3), activation='relu')) vision_model.add(MaxPooling2D((2,2))) vision_model.add(Flatten()) # let's get a tensor with the output of our vision model image_input = Input(shape=(224,224,3)) encoded_image = vision_model(image_input)
def test_nested_model_trainability(): # a Sequential inside a Model inner_model = Sequential() inner_model.add(Dense(2, input_dim=1)) x = Input(shape=(1,)) y = inner_model(x) outer_model = Model(x, y) assert outer_model.trainable_weights == inner_model.trainable_weights inner_model.trainable = False assert outer_model.trainable_weights == [] inner_model.trainable = True inner_model.layers[-1].trainable = False assert outer_model.trainable_weights == [] # a Sequential inside a Sequential inner_model = Sequential() inner_model.add(Dense(2, input_dim=1)) outer_model = Sequential() outer_model.add(inner_model) assert outer_model.trainable_weights == inner_model.trainable_weights inner_model.trainable = False assert outer_model.trainable_weights == [] inner_model.trainable = True inner_model.layers[-1].trainable = False assert outer_model.trainable_weights == [] # a Model inside a Model x = Input(shape=(1,)) y = Dense(2)(x) inner_model = Model(x, y) x = Input(shape=(1,)) y = inner_model(x) outer_model = Model(x, y) assert outer_model.trainable_weights == inner_model.trainable_weights inner_model.trainable = False assert outer_model.trainable_weights == [] inner_model.trainable = True inner_model.layers[-1].trainable = False assert outer_model.trainable_weights == [] # a Model inside a Sequential x = Input(shape=(1,)) y = Dense(2)(x) inner_model = Model(x, y) outer_model = Sequential() outer_model.add(inner_model) assert outer_model.trainable_weights == inner_model.trainable_weights inner_model.trainable = False assert outer_model.trainable_weights == [] inner_model.trainable = True inner_model.layers[-1].trainable = False assert outer_model.trainable_weights == []