def main(): training_images, training_labels, test_images, test_labels = load_dataset() # plt.imshow(training_images[:,:,0], cmap='gray') # plt.show() perm_train = np.random.permutation(training_labels.size) training_labels = training_labels[perm_train] training_images = training_images[perm_train, :, :] / 255.0 training_images = np.expand_dims(training_images, -1) print(training_images.shape) test_images = test_images / 255.0 test_images = np.expand_dims(test_images, -1) # pdb.set_trace() # training_labels = to_categorical(training_labels, NUM_CLASSES) # test_labels = to_categorical(test_labels, NUM_CLASSES) BATCH_SIZE = 32 * 8 WIDTH, HEIGHT = 28, 28 # Defiining the network input_layer = Input(shape=(HEIGHT, WIDTH, 1), name='input_layer') cnn1 = Conv2D(filters=32, kernel_size=3, strides=(1, 1), padding='same', activation='relu')(input_layer) maxpool = MaxPooling2D(pool_size=2)(cnn1) cnn2 = Conv2D(filters=32, kernel_size=3, strides=(1, 1), padding='valid', activation='relu')(maxpool) maxpool = MaxPooling2D(pool_size=2)(cnn2) flat = Flatten()(maxpool) dense1 = Dense(units=128, activation='relu')(flat) dropout = Dropout(.5)(dense1) output_layer = Dense(units=NUM_CLASSES, activation='softmax')(dropout) model = Model(inputs=input_layer, outputs=output_layer) model.compile(optimizer=tf.train.AdamOptimizer(), loss='sparse_categorical_crossentropy', metrics=['accuracy']) # pdb.set_trace() print(model.summary()) model.fit(x=training_images, y=training_labels, batch_size=BATCH_SIZE, epochs=30, verbose=1, validation_data=(test_images, test_labels)) accuracy = model.evaluate(x=test_images, y=test_labels, batch_size=BATCH_SIZE) print('test score = {}'.format(accuracy))
def main(): training_images, training_labels, test_images, test_labels = load_dataset() # plt.imshow(training_images[:,:,0], cmap='gray') # plt.show() N = training_labels.size Nt = test_labels.size perm_train = np.random.permutation(N) training_labels = training_labels[perm_train] training_images = training_images[perm_train, :, :] / 255.0 training_images = np.expand_dims(training_images, -1) print(training_images.shape) test_images = test_images / 255.0 test_images = np.expand_dims(test_images, -1) # pdb.set_trace() training_labels = to_categorical(training_labels, NUM_CLASSES) test_labels = to_categorical(test_labels, NUM_CLASSES) BATCH_SIZE = 32 * 8 WIDTH, HEIGHT = 28, 28 epochs = 30 # Defiining the placeholders input_data = tf.placeholder(dtype=tf.float32, shape=[None, HEIGHT, WIDTH, 1], name='data') input_labels = tf.placeholder(dtype=tf.float32, shape=[None, NUM_CLASSES], name='labels') do_rate = tf.placeholder(dtype=tf.float32, name='dropout_rate') # pdb.set_trace() with tf.name_scope('conv1'): with tf.variable_scope('conv1'): W_conv1 = tf.get_variable('w', [3, 3, 1, 32]) b_conv1 = tf.get_variable('b', [32]) conv1 = tf.nn.conv2d(input=input_data, filter=W_conv1, strides=[1, 1, 1, 1], padding='SAME') relu1 = tf.nn.relu(conv1 + b_conv1) with tf.name_scope('pool1'): pool1 = tf.nn.max_pool(value=relu1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') with tf.name_scope('conv2'): with tf.variable_scope('conv2'): W_conv2 = tf.get_variable('w', [3, 3, 32, 32]) b_conv2 = tf.get_variable('b', [32]) conv2 = tf.nn.conv2d(input=pool1, filter=W_conv2, strides=[1, 1, 1, 1], padding='VALID') relu2 = tf.nn.relu(conv2 + b_conv2) with tf.name_scope('pool2'): pool2 = tf.nn.max_pool(value=relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') with tf.name_scope('dense1'): with tf.variable_scope('dense1'): W_dense1 = tf.get_variable('w', [6 * 6 * 32, 128]) b_dense1 = tf.get_variable('b', 128) flat = tf.reshape(pool2, [-1, 6 * 6 * 32], name='reshape') dense1 = tf.matmul(flat, W_dense1) relu3 = tf.nn.relu(dense1 + b_dense1) with tf.name_scope('dropout'): dropout = tf.nn.dropout(relu3, do_rate) with tf.name_scope('output'): with tf.variable_scope('output'): W_out = tf.get_variable('w', [128, NUM_CLASSES]) b_out = tf.get_variable('b', [NUM_CLASSES]) output = tf.matmul(dropout, W_out) + b_out ''' ################################################################ """ Using Keras layers instead """ #input_layer = Input(shape=(HEIGHT, WIDTH, 1), name='input_layer') Kcnn1 = Conv2D(filters=32, kernel_size=3, strides=(1,1), padding='same', activation='relu')(input_data) Kmaxpool1 = MaxPooling2D(pool_size=2)(Kcnn1) Kcnn2 = Conv2D(filters=32, kernel_size=3, strides=(1,1), padding='valid', activation='relu')(Kmaxpool1) Kmaxpool2 = MaxPooling2D(pool_size=2)(Kcnn2) Kflat = Flatten()(Kmaxpool2) Kdense1 = Dense(units=128, activation='relu')(Kflat) Kdropout = Dropout(.5)(Kdense1) output = Dense(units=NUM_CLASSES, activation='softmax')(Kdropout) """ The rest of the code is almost the same as in pure_tf_mnist.py, except for the feed_dict, where instead of do_rate in tensorflow, we need to provide keras specific dropout tensor 'learning_phase' in the backend of Keras. """ ################################################################ ''' print('\n\n') print('-------------------------------------------------------') print('--------------- Trainable parameters ------------------') print('-------------------------------------------------------') total_parameters = 0 for v in tf.trainable_variables(): shape = v.get_shape() print(shape) #pdb.set_trace() params = 1 for dim in shape: params *= dim.value total_parameters += params print('total_parameters = {}'.format(total_parameters)) print('-------------------------------------------------------\n\n') loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=input_labels, logits=output, name='loss')) train_op = tf.train.AdamOptimizer(1e-4).minimize(loss) accuracy = tf.cast( tf.equal(tf.argmax(input_labels, 1), tf.argmax(output, 1)), tf.float32) print('') print('-------------------------------------------------------') print('---------- Starting a TF session ----------------------') print('-------------------------------------------------------') print('') tf_weights = [] tf.set_random_seed(1234) # Training: with tf.Session() as sess: sess.run(tf.global_variables_initializer()) writer = tf.summary.FileWriter('graph', sess.graph) print('-------------------------------------------------------') print('--------------- Training phase ------------------------') print('-------------------------------------------------------') for i in range(epochs): steps = (int)(np.ceil(float(N) / float(BATCH_SIZE))) total_l = 0 total_acc = 0 for step in range(steps): x_in, y_in = get_batch(step, BATCH_SIZE, training_images, training_labels) l, acc, _ = sess.run([loss, accuracy, train_op], { input_data: x_in, input_labels: y_in, do_rate: 0.5 }) total_l += l total_acc += np.sum(acc) #pdb.set_trace() total_acc /= np.float32(N) print( "Epoch {}: Training loss = {}, Training accuracy = {}".format( i, total_l, total_acc)) # Test: total_acc = 0 steps = (int)(np.ceil(float(Nt) / float(BATCH_SIZE))) for step in range(steps): x_in, y_in = get_batch(step, BATCH_SIZE, test_images, test_labels) acc = sess.run([accuracy], { input_data: x_in, input_labels: y_in, do_rate: 1 }) total_acc += np.sum(acc) total_acc /= np.float32(Nt) print('\n-----------------------') print("Test accuracy = {}".format(total_acc)) print('-------------------------------------------------------') ################################################################# ### Exporting the trained weights into a list of numpy vectors for v in tf.trainable_variables(): tf_weights.append(sess.run(v)) writer.close() print('') print('-------------------------------------------------------') print('---------- Starting a Keras session -------------------') print('-------------------------------------------------------') print('') ################################################################# """ Building a Keras Model """ input_layer = Input(shape=(HEIGHT, WIDTH, 1), name='input_layer') Kkcnn1 = Conv2D(filters=32, kernel_size=3, strides=(1, 1), padding='same', activation='relu')(input_layer) Kkmaxpool1 = MaxPooling2D(pool_size=2)(Kkcnn1) Kkcnn2 = Conv2D(filters=32, kernel_size=3, strides=(1, 1), padding='valid', activation='relu')(Kkmaxpool1) Kkmaxpool2 = MaxPooling2D(pool_size=2)(Kkcnn2) Kkflat = Flatten()(Kkmaxpool2) Kkdense1 = Dense(units=128, activation='relu')(Kkflat) Kkdropout = Dropout(.5)(Kkdense1) output_layer = Dense(units=NUM_CLASSES, activation='softmax')(Kkdropout) model = Model(inputs=input_layer, outputs=output_layer) model.compile(optimizer=tf.train.AdamOptimizer(), loss='categorical_crossentropy', metrics=['accuracy']) ################################################################# ################################################################# ### Loarding the already trained weights, onto the keras layers c = 0 # counter for iterating over tensorflow trainable variables #pdb.set_trace() for l in model.layers: trainable_weights = l.trainable_weights if not trainable_weights: # empty trainable weight list in this keras layer; so move on to the next layer. continue len_w = len( trainable_weights ) # e.g. for a normal conv layer, it is two: weight and bias. l.set_weights(tf_weights[c:c + len_w]) c += len_w accuracy = model.evaluate(x=test_images, y=test_labels, batch_size=BATCH_SIZE) print('\n') print('Keras test score = {}'.format(accuracy)) print('\n')
Important Tips: Contemplating the following two tips deeply and patiently !!!!!!!!!! Compare model.summary() and pp model.trainable_weights, we can see that how Conv1D weights or filters are used to screen embedding_1 tensor In fact, for all layers, either weights of Conv1D, Dense, or that of Conv2D, consider them to be filters, to screen previous layer's tensor How embedding weights transform input_1 (?, 1000) to embedding_1 (?, 1000, 100)? deserve research later """ model_path = "/Users/Natsume/Downloads/data_for_all/word_embeddings/pretrainedWordEmbedding_2.h5" if os.path.isfile(model_path): model = load_model( "/Users/Natsume/Downloads/data_for_all/word_embeddings/pretrainedWordEmbedding_2.h5" ) model.fit(x_train, y_train, batch_size=128, epochs=1, validation_split=0.2) # validation_data=(x_val, y_val)) model.save( "/Users/Natsume/Downloads/data_for_all/word_embeddings/pretrainedWordEmbedding_3.h5" ) loss, accuracy = model.evaluate(x_test, y_test, batch_size=len(x_test), verbose=1) preds = model.predict(x_test) preds_integer = np.argmax(preds, axis=1)
conv2d_weights_1 = K.batch_get_value(model.layers[1].weights) (conv2d_weights[0] == conv2d_weights_1[0]).sum() flatten_out_1 = flatten_layer_output([x])[0] (flatten_out == flatten_out_1).sum() flatten_weights_1 = K.batch_get_value(model.layers[2].weights) (flatten_weights == flatten_weights_1) # 2 empty list dense_out_1 = dense_layer_output([x])[0] (dense_out == dense_out_1).sum() dense_weights_1 = K.batch_get_value(model.layers[3].weights) (dense_weights[0] == dense_weights_1[0]).sum() # weights are totally differ loss, accu = model.evaluate(x, y) """ def evaluate(self, x, y, batch_size=32, verbose=1, sample_weight=None):\n', ' Returns the loss value & metrics values for the model in test ' 'mode.\n', '\n', ' Computation is done in batches.\n', '\n', ' Arguments:\n', ' x: Numpy array of test data,\n', ' or list of Numpy arrays if the model has multiple inputs.\n', ' If all inputs in the model are named,\n', ' you can also pass a dictionary\n', ' mapping input names to Numpy arrays.\n', ' y: Numpy array of target data,\n',