예제 #1
0
def mlp_net2net():
    num_classes = 10

    (x_train, y_train), (x_test, y_test) = mnist.load_data()

    x_train = x_train.reshape(60000, 784)
    x_train = x_train.astype('float32')
    x_train /= 255
    y_train = y_train.astype('int32')
    y_train = np.reshape(y_train, (len(y_train), 1))
    #y_train = np.random.randint(1, 9, size=(len(y_train),1), dtype='int32')
    print("shape: ", x_train.shape)

    #teacher

    input_tensor1 = Input(batch_shape=[0, 784], dtype="float32")

    d1 = Dense(512, input_shape=(784, ), activation="relu")
    d2 = Dense(512, activation="relu")
    d3 = Dense(num_classes)

    output = d1(input_tensor1)
    output = d2(output)
    output = d3(output)
    output = Activation("softmax")(output)

    teacher_model = Model(input_tensor1, output)

    opt = flexflow.keras.optimizers.SGD(learning_rate=0.01)
    teacher_model.compile(optimizer=opt)

    teacher_model.fit(x_train, y_train, epochs=1)

    d1_kernel, d1_bias = d1.get_weights(teacher_model.ffmodel)
    d2_kernel, d2_bias = d2.get_weights(teacher_model.ffmodel)
    d3_kernel, d3_bias = d3.get_weights(teacher_model.ffmodel)

    # student

    input_tensor2 = Input(batch_shape=[0, 784], dtype="float32")

    sd1 = Dense(512, input_shape=(784, ), activation="relu")
    sd2 = Dense(512, activation="relu")
    sd3 = Dense(num_classes)

    output = sd1(input_tensor2)
    output = sd2(output)
    output = sd3(output)
    output = Activation("softmax")(output)

    student_model = Model(input_tensor2, output)

    opt = flexflow.keras.optimizers.SGD(learning_rate=0.01)
    student_model.compile(optimizer=opt)

    sd1.set_weights(student_model.ffmodel, d1_kernel, d1_bias)
    sd2.set_weights(student_model.ffmodel, d2_kernel, d2_bias)
    sd3.set_weights(student_model.ffmodel, d3_kernel, d3_bias)

    student_model.fit(x_train, y_train, epochs=1)
def top_level_task():
    num_classes = 10

    num_samples = 10000

    (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)

    x_train = x_train.astype('float32')
    x_train /= 255
    y_train = y_train.astype('int32')
    print("shape: ", x_train.shape)

    #teacher
    input_tensor1 = Input(shape=(3, 32, 32), dtype="float32")

    c1 = Conv2D(filters=32,
                input_shape=(3, 32, 32),
                kernel_size=(3, 3),
                strides=(1, 1),
                padding="same",
                activation="relu")
    c2 = Conv2D(filters=32,
                kernel_size=(3, 3),
                strides=(1, 1),
                padding=(1, 1),
                activation="relu")
    c3 = Conv2D(filters=64,
                kernel_size=(3, 3),
                strides=(1, 1),
                padding=(1, 1),
                activation="relu")
    c4 = Conv2D(filters=64,
                kernel_size=(3, 3),
                strides=(1, 1),
                padding=(1, 1),
                activation="relu")
    d1 = Dense(512, activation="relu")
    d2 = Dense(num_classes)

    output_tensor = c1(input_tensor1)
    output_tensor = c2(output_tensor)
    output_tensor = MaxPooling2D(pool_size=(2, 2),
                                 strides=(2, 2),
                                 padding="same")(output_tensor)
    output_tensor = c3(output_tensor)
    output_tensor = c4(output_tensor)
    output_tensor = MaxPooling2D(pool_size=(2, 2),
                                 strides=(2, 2),
                                 padding="valid")(output_tensor)
    output_tensor = Flatten()(output_tensor)
    output_tensor = d1(output_tensor)
    output_tensor = d2(output_tensor)
    output_tensor = Activation("softmax")(output_tensor)

    teacher_model = Model(input_tensor1, output_tensor)

    opt = flexflow.keras.optimizers.SGD(learning_rate=0.01)
    teacher_model.compile(
        optimizer=opt,
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy', 'sparse_categorical_crossentropy'])

    teacher_model.fit(x_train, y_train, epochs=10)

    c1_kernel, c1_bias = c1.get_weights(teacher_model.ffmodel)
    c2_kernel, c2_bias = c2.get_weights(teacher_model.ffmodel)
    c3_kernel, c3_bias = c3.get_weights(teacher_model.ffmodel)
    c4_kernel, c4_bias = c4.get_weights(teacher_model.ffmodel)
    d1_kernel, d1_bias = d1.get_weights(teacher_model.ffmodel)
    d2_kernel, d2_bias = d2.get_weights(teacher_model.ffmodel)
    #d2_kernel *= 0

    c2_kernel_new = np.concatenate((c2_kernel, c2_kernel), axis=1)
    print(c2_kernel.shape, c2_kernel_new.shape, c2_bias.shape)

    #student model
    input_tensor2 = Input(shape=(3, 32, 32), dtype="float32")

    sc1_1 = Conv2D(filters=32,
                   input_shape=(3, 32, 32),
                   kernel_size=(3, 3),
                   strides=(1, 1),
                   padding="same",
                   activation="relu")
    sc1_2 = Conv2D(filters=32,
                   input_shape=(3, 32, 32),
                   kernel_size=(3, 3),
                   strides=(1, 1),
                   padding="same",
                   activation="relu")
    sc2 = Conv2D(filters=32,
                 kernel_size=(3, 3),
                 strides=(1, 1),
                 padding=(1, 1),
                 activation="relu")
    sc3 = Conv2D(filters=64,
                 kernel_size=(3, 3),
                 strides=(1, 1),
                 padding=(1, 1),
                 activation="relu")
    sc4 = Conv2D(filters=64,
                 kernel_size=(3, 3),
                 strides=(1, 1),
                 padding=(1, 1),
                 activation="relu")
    sd1 = Dense(512, activation="relu")
    sd2 = Dense(num_classes)

    t1 = sc1_1(input_tensor2)
    t2 = sc1_2(input_tensor2)
    output_tensor = Concatenate(axis=1)([t1, t2])
    output_tensor = sc2(output_tensor)
    output_tensor = MaxPooling2D(pool_size=(2, 2),
                                 strides=(2, 2),
                                 padding="same")(output_tensor)
    output_tensor = sc3(output_tensor)
    output_tensor = sc4(output_tensor)
    output_tensor = MaxPooling2D(pool_size=(2, 2),
                                 strides=(2, 2),
                                 padding="valid")(output_tensor)
    output_tensor = Flatten()(output_tensor)
    output_tensor = sd1(output_tensor)
    output_tensor = sd2(output_tensor)
    output_tensor = Activation("softmax")(output_tensor)

    student_model = Model(input_tensor2, output_tensor)

    opt = flexflow.keras.optimizers.SGD(learning_rate=0.01)
    student_model.compile(
        optimizer=opt,
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy', 'sparse_categorical_crossentropy'])

    sc1_1.set_weights(student_model.ffmodel, c1_kernel, c1_bias)
    sc1_2.set_weights(student_model.ffmodel, c1_kernel, c1_bias)
    sc2.set_weights(student_model.ffmodel, c2_kernel_new, c2_bias)
    sc3.set_weights(student_model.ffmodel, c3_kernel, c3_bias)
    sc4.set_weights(student_model.ffmodel, c4_kernel, c4_bias)
    sd1.set_weights(student_model.ffmodel, d1_kernel, d1_bias)
    sd2.set_weights(student_model.ffmodel, d2_kernel, d2_bias)

    student_model.fit(x_train,
                      y_train,
                      epochs=160,
                      callbacks=[
                          VerifyMetrics(ModelAccuracy.CIFAR10_CNN),
                          EpochVerifyMetrics(ModelAccuracy.CIFAR10_CNN)
                      ])
def top_level_task():
    num_classes = 10

    (x_train, y_train), (x_test, y_test) = mnist.load_data()

    x_train = x_train.reshape(60000, 784)
    x_train = x_train.astype('float32')
    x_train /= 255
    y_train = y_train.astype('int32')
    y_train = np.reshape(y_train, (len(y_train), 1))
    print("shape: ", x_train.shape)

    #teacher

    input_tensor1 = Input(shape=(784, ), dtype="float32")

    d1 = Dense(512, input_shape=(784, ), activation="relu")
    d2 = Dense(512, activation="relu")
    d3 = Dense(num_classes)

    output = d1(input_tensor1)
    output = d2(output)
    output = d3(output)
    output = Activation("softmax")(output)

    teacher_model = Model(input_tensor1, output)

    opt = flexflow.keras.optimizers.SGD(learning_rate=0.01)
    teacher_model.compile(
        optimizer=opt,
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy', 'sparse_categorical_crossentropy'])

    teacher_model.fit(x_train, y_train, epochs=10)

    d1_kernel, d1_bias = d1.get_weights(teacher_model.ffmodel)
    d2_kernel, d2_bias = d2.get_weights(teacher_model.ffmodel)
    d3_kernel, d3_bias = d3.get_weights(teacher_model.ffmodel)

    # student

    input_tensor2 = Input(shape=(784, ), dtype="float32")

    sd1_1 = Dense(512, input_shape=(784, ), activation="relu")
    sd2 = Dense(512, activation="relu")
    sd3 = Dense(num_classes)

    output = sd1_1(input_tensor2)
    output = sd2(output)
    output = sd3(output)
    output = Activation("softmax")(output)

    student_model = Model(input_tensor2, output)

    opt = flexflow.keras.optimizers.SGD(learning_rate=0.01)
    student_model.compile(
        optimizer=opt,
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy', 'sparse_categorical_crossentropy'])

    sd1_1.set_weights(student_model.ffmodel, d1_kernel, d1_bias)
    sd2.set_weights(student_model.ffmodel, d2_kernel, d2_bias)
    sd3.set_weights(student_model.ffmodel, d3_kernel, d3_bias)

    student_model.fit(x_train,
                      y_train,
                      epochs=160,
                      callbacks=[
                          VerifyMetrics(ModelAccuracy.MNIST_MLP),
                          EpochVerifyMetrics(ModelAccuracy.MNIST_MLP)
                      ])
예제 #4
0
def cifar_cnn_net2net():
    num_classes = 10

    num_samples = 10000

    (x_train, y_train), (x_test, y_test) = cifar10.load_data(num_samples)

    x_train = x_train.astype('float32')
    x_train /= 255
    #x_train *= 0
    #y_train = np.random.randint(1, 9, size=(num_samples,1), dtype='int32')
    y_train = y_train.astype('int32')
    print("shape: ", x_train.shape)

    #teacher
    input_tensor1 = Input(batch_shape=[0, 3, 32, 32], dtype="float32")

    c1 = Conv2D(filters=32,
                input_shape=(3, 32, 32),
                kernel_size=(3, 3),
                strides=(1, 1),
                padding=(1, 1),
                activation="relu")
    c2 = Conv2D(filters=32,
                kernel_size=(3, 3),
                strides=(1, 1),
                padding=(1, 1),
                activation="relu")
    c3 = Conv2D(filters=64,
                kernel_size=(3, 3),
                strides=(1, 1),
                padding=(1, 1),
                activation="relu")
    c4 = Conv2D(filters=64,
                kernel_size=(3, 3),
                strides=(1, 1),
                padding=(1, 1),
                activation="relu")
    d1 = Dense(512, activation="relu")
    d2 = Dense(num_classes)

    output_tensor = c1(input_tensor1)
    output_tensor = c2(output_tensor)
    output_tensor = MaxPooling2D(pool_size=(2, 2),
                                 strides=(2, 2),
                                 padding="valid")(output_tensor)
    output_tensor = c3(output_tensor)
    output_tensor = c4(output_tensor)
    output_tensor = MaxPooling2D(pool_size=(2, 2),
                                 strides=(2, 2),
                                 padding="valid")(output_tensor)
    output_tensor = Flatten()(output_tensor)
    output_tensor = d1(output_tensor)
    output_tensor = d2(output_tensor)
    output_tensor = Activation("softmax")(output_tensor)

    teacher_model = Model(input_tensor1, output_tensor)

    print(teacher_model.summary())

    opt = flexflow.keras.optimizers.SGD(learning_rate=0.01)
    teacher_model.compile(optimizer=opt)

    teacher_model.fit(x_train, y_train, epochs=1)

    c1_kernel, c1_bias = c1.get_weights(teacher_model.ffmodel)
    c2_kernel, c2_bias = c2.get_weights(teacher_model.ffmodel)
    c3_kernel, c3_bias = c3.get_weights(teacher_model.ffmodel)
    c4_kernel, c4_bias = c4.get_weights(teacher_model.ffmodel)
    d1_kernel, d1_bias = d1.get_weights(teacher_model.ffmodel)
    d2_kernel, d2_bias = d2.get_weights(teacher_model.ffmodel)
    #d2_kernel *= 0

    c2_kernel_new = np.concatenate((c2_kernel, c2_kernel), axis=1)
    print(c2_kernel.shape, c2_kernel_new.shape, c2_bias.shape)

    #student model
    input_tensor2 = Input(batch_shape=[0, 3, 32, 32], dtype="float32")

    sc1_1 = Conv2D(filters=32,
                   input_shape=(3, 32, 32),
                   kernel_size=(3, 3),
                   strides=(1, 1),
                   padding=(1, 1),
                   activation="relu")
    sc1_2 = Conv2D(filters=32,
                   input_shape=(3, 32, 32),
                   kernel_size=(3, 3),
                   strides=(1, 1),
                   padding=(1, 1),
                   activation="relu")
    sc2 = Conv2D(filters=32,
                 kernel_size=(3, 3),
                 strides=(1, 1),
                 padding=(1, 1),
                 activation="relu")
    sc3 = Conv2D(filters=64,
                 kernel_size=(3, 3),
                 strides=(1, 1),
                 padding=(1, 1),
                 activation="relu")
    sc4 = Conv2D(filters=64,
                 kernel_size=(3, 3),
                 strides=(1, 1),
                 padding=(1, 1),
                 activation="relu")
    sd1 = Dense(512, activation="relu")
    sd2 = Dense(num_classes)

    t1 = sc1_1(input_tensor2)
    t2 = sc1_2(input_tensor2)
    output_tensor = Concatenate(axis=1)([t1, t2])
    output_tensor = sc2(output_tensor)
    output_tensor = MaxPooling2D(pool_size=(2, 2),
                                 strides=(2, 2),
                                 padding="valid")(output_tensor)
    output_tensor = sc3(output_tensor)
    output_tensor = sc4(output_tensor)
    output_tensor = MaxPooling2D(pool_size=(2, 2),
                                 strides=(2, 2),
                                 padding="valid")(output_tensor)
    output_tensor = Flatten()(output_tensor)
    output_tensor = sd1(output_tensor)
    output_tensor = sd2(output_tensor)
    output_tensor = Activation("softmax")(output_tensor)

    student_model = Model(input_tensor2, output_tensor)

    print(student_model.summary())

    opt = flexflow.keras.optimizers.SGD(learning_rate=0.01)
    student_model.compile(optimizer=opt)

    sc1_1.set_weights(student_model.ffmodel, c1_kernel, c1_bias)
    sc1_2.set_weights(student_model.ffmodel, c1_kernel, c1_bias)
    sc2.set_weights(student_model.ffmodel, c2_kernel_new, c2_bias)
    sc3.set_weights(student_model.ffmodel, c3_kernel, c3_bias)
    sc4.set_weights(student_model.ffmodel, c4_kernel, c4_bias)
    sd1.set_weights(student_model.ffmodel, d1_kernel, d1_bias)
    sd2.set_weights(student_model.ffmodel, d2_kernel, d2_bias)

    student_model.fit(x_train, y_train, epochs=1)