def test_rebuild_model(): model = Sequential() model.add(Dense(128, input_shape=(784,))) model.add(Dense(64)) assert(model.get_layer(index=-1).output_shape == (None, 64)) model.add(Dense(32)) assert(model.get_layer(index=-1).output_shape == (None, 32))
def make_wider_student_model(teacher_model, train_data, validation_data, init, epochs=3): '''Train a wider student model based on teacher_model, with either 'random-pad' (baseline) or 'net2wider' ''' new_conv1_width = 128 new_fc1_width = 128 model = Sequential() # a wider conv1 compared to teacher_model model.add(Conv2D(new_conv1_width, 3, input_shape=input_shape, padding='same', name='conv1')) model.add(MaxPooling2D(2, name='pool1')) model.add(Conv2D(64, 3, padding='same', name='conv2')) model.add(MaxPooling2D(2, name='pool2')) model.add(Flatten(name='flatten')) # a wider fc1 compared to teacher model model.add(Dense(new_fc1_width, activation='relu', name='fc1')) model.add(Dense(num_class, activation='softmax', name='fc2')) # The weights for other layers need to be copied from teacher_model # to student_model, except for widened layers # and their immediate downstreams, which will be initialized separately. # For this example there are no other layers that need to be copied. w_conv1, b_conv1 = teacher_model.get_layer('conv1').get_weights() w_conv2, b_conv2 = teacher_model.get_layer('conv2').get_weights() new_w_conv1, new_b_conv1, new_w_conv2 = wider2net_conv2d( w_conv1, b_conv1, w_conv2, new_conv1_width, init) model.get_layer('conv1').set_weights([new_w_conv1, new_b_conv1]) model.get_layer('conv2').set_weights([new_w_conv2, b_conv2]) w_fc1, b_fc1 = teacher_model.get_layer('fc1').get_weights() w_fc2, b_fc2 = teacher_model.get_layer('fc2').get_weights() new_w_fc1, new_b_fc1, new_w_fc2 = wider2net_fc( w_fc1, b_fc1, w_fc2, new_fc1_width, init) model.get_layer('fc1').set_weights([new_w_fc1, new_b_fc1]) model.get_layer('fc2').set_weights([new_w_fc2, b_fc2]) model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.001, momentum=0.9), metrics=['accuracy']) train_x, train_y = train_data history = model.fit(train_x, train_y, epochs=epochs, validation_data=validation_data) return model, history
def test_get_layer(): model = Sequential() model.add(Dense(1, input_dim=2)) with pytest.raises(ValueError): model.get_layer(index=5) with pytest.raises(ValueError): model.get_layer(index=None) with pytest.raises(ValueError): model.get_layer(name='conv')
def make_deeper_student_model(teacher_model, train_data, validation_data, init, epochs=3): '''Train a deeper student model based on teacher_model, with either 'random-init' (baseline) or 'net2deeper' ''' model = Sequential() model.add(Conv2D(64, 3, input_shape=input_shape, padding='same', name='conv1')) model.add(MaxPooling2D(2, name='pool1')) model.add(Conv2D(64, 3, padding='same', name='conv2')) # add another conv2d layer to make original conv2 deeper if init == 'net2deeper': prev_w, _ = model.get_layer('conv2').get_weights() new_weights = deeper2net_conv2d(prev_w) model.add(Conv2D(64, 3, padding='same', name='conv2-deeper', weights=new_weights)) elif init == 'random-init': model.add(Conv2D(64, 3, padding='same', name='conv2-deeper')) else: raise ValueError('Unsupported weight initializer: %s' % init) model.add(MaxPooling2D(2, name='pool2')) model.add(Flatten(name='flatten')) model.add(Dense(64, activation='relu', name='fc1')) # add another fc layer to make original fc1 deeper if init == 'net2deeper': # net2deeper for fc layer with relu, is just an identity initializer model.add(Dense(64, kernel_initializer='identity', activation='relu', name='fc1-deeper')) elif init == 'random-init': model.add(Dense(64, activation='relu', name='fc1-deeper')) else: raise ValueError('Unsupported weight initializer: %s' % init) model.add(Dense(num_class, activation='softmax', name='fc2')) # copy weights for other layers copy_weights(teacher_model, model, layer_names=[ 'conv1', 'conv2', 'fc1', 'fc2']) model.compile(loss='categorical_crossentropy', optimizer=SGD(lr=0.001, momentum=0.9), metrics=['accuracy']) train_x, train_y = train_data history = model.fit(train_x, train_y, epochs=epochs, validation_data=validation_data) return model, history