def mlp_net2net(): num_classes = 10 (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = x_train.reshape(60000, 784) x_train = x_train.astype('float32') x_train /= 255 y_train = y_train.astype('int32') y_train = np.reshape(y_train, (len(y_train), 1)) #y_train = np.random.randint(1, 9, size=(len(y_train),1), dtype='int32') print("shape: ", x_train.shape) #teacher input_tensor1 = Input(batch_shape=[0, 784], dtype="float32") d1 = Dense(512, input_shape=(784, ), activation="relu") d2 = Dense(512, activation="relu") d3 = Dense(num_classes) output = d1(input_tensor1) output = d2(output) output = d3(output) output = Activation("softmax")(output) teacher_model = Model(input_tensor1, output) opt = flexflow.keras.optimizers.SGD(learning_rate=0.01) teacher_model.compile(optimizer=opt) teacher_model.fit(x_train, y_train, epochs=1) d1_kernel, d1_bias = d1.get_weights(teacher_model.ffmodel) d2_kernel, d2_bias = d2.get_weights(teacher_model.ffmodel) d3_kernel, d3_bias = d3.get_weights(teacher_model.ffmodel) # student input_tensor2 = Input(batch_shape=[0, 784], dtype="float32") sd1 = Dense(512, input_shape=(784, ), activation="relu") sd2 = Dense(512, activation="relu") sd3 = Dense(num_classes) output = sd1(input_tensor2) output = sd2(output) output = sd3(output) output = Activation("softmax")(output) student_model = Model(input_tensor2, output) opt = flexflow.keras.optimizers.SGD(learning_rate=0.01) student_model.compile(optimizer=opt) sd1.set_weights(student_model.ffmodel, d1_kernel, d1_bias) sd2.set_weights(student_model.ffmodel, d2_kernel, d2_bias) sd3.set_weights(student_model.ffmodel, d3_kernel, d3_bias) student_model.fit(x_train, y_train, epochs=1)
def top_level_task(): num_classes = 10 num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) x_train = x_train.astype('float32') x_train /= 255 y_train = y_train.astype('int32') print("shape: ", x_train.shape) #teacher input_tensor1 = Input(shape=(3, 32, 32), dtype="float32") c1 = Conv2D(filters=32, input_shape=(3, 32, 32), kernel_size=(3, 3), strides=(1, 1), padding="same", activation="relu") c2 = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu") c3 = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu") c4 = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu") d1 = Dense(512, activation="relu") d2 = Dense(num_classes) output_tensor = c1(input_tensor1) output_tensor = c2(output_tensor) output_tensor = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="same")(output_tensor) output_tensor = c3(output_tensor) output_tensor = c4(output_tensor) output_tensor = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(output_tensor) output_tensor = Flatten()(output_tensor) output_tensor = d1(output_tensor) output_tensor = d2(output_tensor) output_tensor = Activation("softmax")(output_tensor) teacher_model = Model(input_tensor1, output_tensor) opt = flexflow.keras.optimizers.SGD(learning_rate=0.01) teacher_model.compile( optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy', 'sparse_categorical_crossentropy']) teacher_model.fit(x_train, y_train, epochs=10) c1_kernel, c1_bias = c1.get_weights(teacher_model.ffmodel) c2_kernel, c2_bias = c2.get_weights(teacher_model.ffmodel) c3_kernel, c3_bias = c3.get_weights(teacher_model.ffmodel) c4_kernel, c4_bias = c4.get_weights(teacher_model.ffmodel) d1_kernel, d1_bias = d1.get_weights(teacher_model.ffmodel) d2_kernel, d2_bias = d2.get_weights(teacher_model.ffmodel) #d2_kernel *= 0 c2_kernel_new = np.concatenate((c2_kernel, c2_kernel), axis=1) print(c2_kernel.shape, c2_kernel_new.shape, c2_bias.shape) #student model input_tensor2 = Input(shape=(3, 32, 32), dtype="float32") sc1_1 = Conv2D(filters=32, input_shape=(3, 32, 32), kernel_size=(3, 3), strides=(1, 1), padding="same", activation="relu") sc1_2 = Conv2D(filters=32, input_shape=(3, 32, 32), kernel_size=(3, 3), strides=(1, 1), padding="same", activation="relu") sc2 = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu") sc3 = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu") sc4 = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu") sd1 = Dense(512, activation="relu") sd2 = Dense(num_classes) t1 = sc1_1(input_tensor2) t2 = sc1_2(input_tensor2) output_tensor = Concatenate(axis=1)([t1, t2]) output_tensor = sc2(output_tensor) output_tensor = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="same")(output_tensor) output_tensor = sc3(output_tensor) output_tensor = sc4(output_tensor) output_tensor = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(output_tensor) output_tensor = Flatten()(output_tensor) output_tensor = sd1(output_tensor) output_tensor = sd2(output_tensor) output_tensor = Activation("softmax")(output_tensor) student_model = Model(input_tensor2, output_tensor) opt = flexflow.keras.optimizers.SGD(learning_rate=0.01) student_model.compile( optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy', 'sparse_categorical_crossentropy']) sc1_1.set_weights(student_model.ffmodel, c1_kernel, c1_bias) sc1_2.set_weights(student_model.ffmodel, c1_kernel, c1_bias) sc2.set_weights(student_model.ffmodel, c2_kernel_new, c2_bias) sc3.set_weights(student_model.ffmodel, c3_kernel, c3_bias) sc4.set_weights(student_model.ffmodel, c4_kernel, c4_bias) sd1.set_weights(student_model.ffmodel, d1_kernel, d1_bias) sd2.set_weights(student_model.ffmodel, d2_kernel, d2_bias) student_model.fit(x_train, y_train, epochs=160, callbacks=[ VerifyMetrics(ModelAccuracy.CIFAR10_CNN), EpochVerifyMetrics(ModelAccuracy.CIFAR10_CNN) ])
def top_level_task(): num_classes = 10 (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train = x_train.reshape(60000, 784) x_train = x_train.astype('float32') x_train /= 255 y_train = y_train.astype('int32') y_train = np.reshape(y_train, (len(y_train), 1)) print("shape: ", x_train.shape) #teacher input_tensor1 = Input(shape=(784, ), dtype="float32") d1 = Dense(512, input_shape=(784, ), activation="relu") d2 = Dense(512, activation="relu") d3 = Dense(num_classes) output = d1(input_tensor1) output = d2(output) output = d3(output) output = Activation("softmax")(output) teacher_model = Model(input_tensor1, output) opt = flexflow.keras.optimizers.SGD(learning_rate=0.01) teacher_model.compile( optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy', 'sparse_categorical_crossentropy']) teacher_model.fit(x_train, y_train, epochs=10) d1_kernel, d1_bias = d1.get_weights(teacher_model.ffmodel) d2_kernel, d2_bias = d2.get_weights(teacher_model.ffmodel) d3_kernel, d3_bias = d3.get_weights(teacher_model.ffmodel) # student input_tensor2 = Input(shape=(784, ), dtype="float32") sd1_1 = Dense(512, input_shape=(784, ), activation="relu") sd2 = Dense(512, activation="relu") sd3 = Dense(num_classes) output = sd1_1(input_tensor2) output = sd2(output) output = sd3(output) output = Activation("softmax")(output) student_model = Model(input_tensor2, output) opt = flexflow.keras.optimizers.SGD(learning_rate=0.01) student_model.compile( optimizer=opt, loss='sparse_categorical_crossentropy', metrics=['accuracy', 'sparse_categorical_crossentropy']) sd1_1.set_weights(student_model.ffmodel, d1_kernel, d1_bias) sd2.set_weights(student_model.ffmodel, d2_kernel, d2_bias) sd3.set_weights(student_model.ffmodel, d3_kernel, d3_bias) student_model.fit(x_train, y_train, epochs=160, callbacks=[ VerifyMetrics(ModelAccuracy.MNIST_MLP), EpochVerifyMetrics(ModelAccuracy.MNIST_MLP) ])
def cifar_cnn_net2net(): num_classes = 10 num_samples = 10000 (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples) x_train = x_train.astype('float32') x_train /= 255 #x_train *= 0 #y_train = np.random.randint(1, 9, size=(num_samples,1), dtype='int32') y_train = y_train.astype('int32') print("shape: ", x_train.shape) #teacher input_tensor1 = Input(batch_shape=[0, 3, 32, 32], dtype="float32") c1 = Conv2D(filters=32, input_shape=(3, 32, 32), kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu") c2 = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu") c3 = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu") c4 = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu") d1 = Dense(512, activation="relu") d2 = Dense(num_classes) output_tensor = c1(input_tensor1) output_tensor = c2(output_tensor) output_tensor = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(output_tensor) output_tensor = c3(output_tensor) output_tensor = c4(output_tensor) output_tensor = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(output_tensor) output_tensor = Flatten()(output_tensor) output_tensor = d1(output_tensor) output_tensor = d2(output_tensor) output_tensor = Activation("softmax")(output_tensor) teacher_model = Model(input_tensor1, output_tensor) print(teacher_model.summary()) opt = flexflow.keras.optimizers.SGD(learning_rate=0.01) teacher_model.compile(optimizer=opt) teacher_model.fit(x_train, y_train, epochs=1) c1_kernel, c1_bias = c1.get_weights(teacher_model.ffmodel) c2_kernel, c2_bias = c2.get_weights(teacher_model.ffmodel) c3_kernel, c3_bias = c3.get_weights(teacher_model.ffmodel) c4_kernel, c4_bias = c4.get_weights(teacher_model.ffmodel) d1_kernel, d1_bias = d1.get_weights(teacher_model.ffmodel) d2_kernel, d2_bias = d2.get_weights(teacher_model.ffmodel) #d2_kernel *= 0 c2_kernel_new = np.concatenate((c2_kernel, c2_kernel), axis=1) print(c2_kernel.shape, c2_kernel_new.shape, c2_bias.shape) #student model input_tensor2 = Input(batch_shape=[0, 3, 32, 32], dtype="float32") sc1_1 = Conv2D(filters=32, input_shape=(3, 32, 32), kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu") sc1_2 = Conv2D(filters=32, input_shape=(3, 32, 32), kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu") sc2 = Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu") sc3 = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu") sc4 = Conv2D(filters=64, kernel_size=(3, 3), strides=(1, 1), padding=(1, 1), activation="relu") sd1 = Dense(512, activation="relu") sd2 = Dense(num_classes) t1 = sc1_1(input_tensor2) t2 = sc1_2(input_tensor2) output_tensor = Concatenate(axis=1)([t1, t2]) output_tensor = sc2(output_tensor) output_tensor = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(output_tensor) output_tensor = sc3(output_tensor) output_tensor = sc4(output_tensor) output_tensor = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="valid")(output_tensor) output_tensor = Flatten()(output_tensor) output_tensor = sd1(output_tensor) output_tensor = sd2(output_tensor) output_tensor = Activation("softmax")(output_tensor) student_model = Model(input_tensor2, output_tensor) print(student_model.summary()) opt = flexflow.keras.optimizers.SGD(learning_rate=0.01) student_model.compile(optimizer=opt) sc1_1.set_weights(student_model.ffmodel, c1_kernel, c1_bias) sc1_2.set_weights(student_model.ffmodel, c1_kernel, c1_bias) sc2.set_weights(student_model.ffmodel, c2_kernel_new, c2_bias) sc3.set_weights(student_model.ffmodel, c3_kernel, c3_bias) sc4.set_weights(student_model.ffmodel, c4_kernel, c4_bias) sd1.set_weights(student_model.ffmodel, d1_kernel, d1_bias) sd2.set_weights(student_model.ffmodel, d2_kernel, d2_bias) student_model.fit(x_train, y_train, epochs=1)