Beispiel #1
0
    def __init__(self, input_shape, num_classes, name=None):
        self.num_classes = num_classes
        self.input_shape = input_shape
        self.name = name
        '''
        if load:
            weight_dict = np.load(load).item()
            self.B = tf.cast(tf.Variable(weight_dict[self.name]), tf.float32)
        elif std is not None:
            b = np.random.normal(loc=0., scale=std, size=(self.num_classes, self.output_size))
            self.B = tf.cast(tf.Variable(b), tf.float32)
        else:
            # var = 1. / self.output_size
            # std = np.sqrt(var)
            # b = np.random.normal(loc=0., scale=std, size=(self.num_classes, self.output_size))

            b = FeedbackMatrix(size=(self.num_classes, self.output_size), sparse=self.sparse, rank=self.rank)
            self.B = tf.cast(tf.Variable(b), tf.float32) 
        '''

        # THE PROBLEM WAS NEVER THE BIAS ... IT WAS THE FACT WE WERNT DIVIDING BY N

        # l0 = FullyConnected(input_shape=input_shape, size=self.input_shape, init='alexnet', activation=Relu(), bias=1., name=self.name)
        self.l0 = FullyConnected(input_shape=input_shape,
                                 size=self.num_classes,
                                 init='alexnet',
                                 activation=Linear(),
                                 bias=0.,
                                 name=self.name)
Beispiel #2
0
    def __init__(self, input_shape, pool_shape, nactions, name=None):
        self.input_shape = input_shape
        self.batch_size, self.h, self.w, self.fin = self.input_shape
        self.pool_shape = pool_shape
        self.nactions = nactions
        self.name = name
        self.action_name = self.name + '_action'
        self.value_name = self.name + '_value'
        self.nlp_name = self.name + '_nlp'

        self.pool = AvgPool(size=self.input_shape,
                            ksize=self.pool_shape,
                            strides=self.pool_shape,
                            padding='SAME')

        l2_input_shape = l1.output_shape()
        self.conv2fc = ConvToFullyConnected(input_shape=l2_input_shape)

        l3_input_shape = l2.output_shape()
        self.actions = FullyConnected(input_shape=l3_input_shape,
                                      size=self.nactions,
                                      init='alexnet',
                                      name=self.name + '_actions')
        self.values = FullyConnected(input_shape=l3_input_shape,
                                     size=1,
                                     init='alexnet',
                                     name=self.name + '_values')

        ####################################################

        self.logits_bias = tf.Variable(np.zeros(shape=(self.nbatch,
                                                       self.nclass)),
                                       dtype=tf.float32)
        self.values_bias = tf.Variable(np.zeros(shape=(self.nbatch, 1)),
                                       dtype=tf.float32)

        # self.actions_model = Model(layers=[l1, l2, actions])
        # self.values_model = Model(layers=[l1, l2, values])

        ####################################################

        self.advantages = tf.placeholder("float", [None])
        self.rewards = tf.placeholder("float", [None])

        self.old_actions = tf.placeholder("int32", [None])
        self.old_values = tf.placeholder("float", [None])
        self.old_nlps = tf.placeholder("float", [None])
Beispiel #3
0
    def __init__(self, input_shape, pool_shape, num_classes, name=None):
        self.input_shape = input_shape
        self.batch_size, self.h, self.w, self.fin = self.input_shape
        self.pool_shape = pool_shape
        self.num_classes = num_classes
        self.name = name

        l1 = AvgPool(size=self.input_shape, ksize=self.pool_shape, strides=self.pool_shape, padding='SAME')

        l2_input_shape = l1.output_shape()
        l2 = ConvToFullyConnected(input_shape=l2_input_shape)
        
        l3_input_shape = l2.output_shape()
        l3 = FullyConnected(input_shape=l3_input_shape, size=self.num_classes, init='alexnet', activation=Linear(), bias=0., name=self.name)
        
        self.B = Model(layers=[l1, l2, l3])
Beispiel #4
0
l4 = MaxPool(size=[batch_size, 27, 27, 256], ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")
l5 = FeedbackConv(size=[batch_size, 13, 13, 256], num_classes=1000, sparse=args.sparse, rank=args.rank, name='conv2_fb')

l6 = Convolution(input_shape=[batch_size, 13, 13, 256], filter_sizes=[3, 3, 256, 384], init=args.init, activation=act, bias=args.bias, load=weights_conv, name='conv3', train=train_conv)
l7 = FeedbackConv(size=[batch_size, 13, 13, 384], num_classes=1000, sparse=args.sparse, rank=args.rank, name='conv3_fb')

l8 = Convolution(input_shape=[batch_size, 13, 13, 384], filter_sizes=[3, 3, 384, 384], init=args.init, activation=act, bias=args.bias, load=weights_conv, name='conv4', train=train_conv)
l9 = FeedbackConv(size=[batch_size, 13, 13, 384], num_classes=1000, sparse=args.sparse, rank=args.rank, name='conv4_fb')

l10 = Convolution(input_shape=[batch_size, 13, 13, 384], filter_sizes=[3, 3, 384, 256], init=args.init, activation=act, bias=args.bias, load=weights_conv, name='conv5', train=train_conv)
l11 = MaxPool(size=[batch_size, 13, 13, 256], ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")
l12 = FeedbackConv(size=[batch_size, 6, 6, 256], num_classes=1000, sparse=args.sparse, rank=args.rank, name='conv5_fb')

l13 = ConvToFullyConnected(input_shape=[6, 6, 256])

l14 = FullyConnected(input_shape=6*6*256, size=4096, init=args.init, activation=act, bias=args.bias, load=weights_fc, name='fc1', train=train_fc)
l15 = Dropout(rate=dropout_rate)
l16 = FeedbackFC(size=[6*6*256, 4096], num_classes=1000, sparse=args.sparse, rank=args.rank, name='fc1_fb')

l17 = FullyConnected(input_shape=4096, size=4096, init=args.init, activation=act, bias=args.bias, load=weights_fc, name='fc2', train=train_fc)
l18 = Dropout(rate=dropout_rate)
l19 = FeedbackFC(size=[4096, 4096], num_classes=1000, sparse=args.sparse, rank=args.rank, name='fc2_fb')

l20 = FullyConnected(input_shape=4096, size=1000, init=args.init, bias=args.bias, load=weights_fc, name='fc3', train=train_fc)

###############################################################

model = Model(layers=[l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15, l16, l17, l18, l19, l20])
predict = tf.nn.softmax(model.predict(X=features))
weights = model.get_weights()
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--epochs', type=int, default=200)
    parser.add_argument('--batch_size', type=int, default=128)
    parser.add_argument('--alpha', type=float, default=1e-4)
    parser.add_argument('--beta', type=float,
                        default=1e-4)  #feedback weights, B, learning rate
    parser.add_argument('--sigma', type=float,
                        default=0.1)  #node pert standard deviation
    parser.add_argument('--l2', type=float, default=0.)
    parser.add_argument('--decay', type=float, default=1.)
    parser.add_argument('--eps', type=float, default=1e-5)
    parser.add_argument('--dropout', type=float, default=0.5)
    parser.add_argument('--act', type=str, default='tanh')
    parser.add_argument('--bias', type=float, default=0.1)
    parser.add_argument('--gpu', type=int, default=1)
    parser.add_argument('--dfa', type=int, default=1)
    parser.add_argument('--feedbacklearning', type=int,
                        default=1)  #Whether or not to learn feedback weights
    parser.add_argument('--sparse', type=int, default=0)
    parser.add_argument('--rank', type=int, default=0)
    parser.add_argument('--init', type=str, default="sqrt_fan_in")
    parser.add_argument('--opt', type=str, default="adam")
    parser.add_argument('--N', type=int, default=50)
    parser.add_argument('--save', type=int, default=0)
    parser.add_argument('--name', type=str, default="cifar10_conv_np")
    parser.add_argument('--load', type=str, default=None)
    args = parser.parse_args()

    if args.gpu >= 0:
        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
        os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)

    cifar10 = tf.keras.datasets.cifar10.load_data()

    ##############################################

    EPOCHS = args.epochs
    TRAIN_EXAMPLES = 50000
    TEST_EXAMPLES = 10000
    BATCH_SIZE = args.batch_size

    if args.act == 'tanh':
        act = Tanh()
    elif args.act == 'relu':
        act = Relu()
    else:
        assert (False)

    train_fc = True
    if args.load:
        train_conv = False
    else:
        train_conv = True

    weights_fc = None
    weights_conv = args.load

    #Setup the parameters
    attrs = ['sigma', 'alpha', 'beta']
    log_scale = [True, True, True]
    ranges = [[-4, -1], [-6, -3], [-6, -3]]
    params = []
    isnan = []
    train_accs = []
    test_accs = []

    #Here we run a bunch of times for different parameters...
    for idx in range(args.N):

        #Choose some random parameters...
        param = set_random_hyperparameters(args, attrs, ranges, log_scale)
        params.append(param)

        if args.feedbacklearning == 0:
            args.beta = 0

        #Tell me the params....
        print('Alpha, beta, sigma are: ', args.alpha, args.beta, args.sigma)

        tf.set_random_seed(0)
        tf.reset_default_graph()

        batch_size = tf.placeholder(tf.int32, shape=())
        dropout_rate = tf.placeholder(tf.float32, shape=())
        learning_rate = tf.placeholder(tf.float32, shape=())
        sigma = tf.placeholder(tf.float32, shape=(), name="Sigma")
        X = tf.placeholder(tf.float32, [None, 32, 32, 3])
        X = tf.map_fn(lambda frame: tf.image.per_image_standardization(frame),
                      X)
        Y = tf.placeholder(tf.float32, [None, 10])

        l0 = Convolution(input_sizes=[batch_size, 32, 32, 3],
                         filter_sizes=[5, 5, 3, 96],
                         num_classes=10,
                         init_filters=args.init,
                         strides=[1, 1, 1, 1],
                         padding="SAME",
                         alpha=learning_rate,
                         activation=act,
                         bias=args.bias,
                         last_layer=False,
                         name='conv1',
                         load=weights_conv,
                         train=train_conv)
        l1 = MaxPool(size=[batch_size, 32, 32, 96],
                     ksize=[1, 3, 3, 1],
                     strides=[1, 2, 2, 1],
                     padding="SAME")

        #Add perturbation to activity to get output to train feedback weights with
        l2p = NodePert(size=[batch_size, 16, 16, 96], sigma=sigma)
        l2 = FeedbackConv(size=[batch_size, 16, 16, 96],
                          num_classes=10,
                          sparse=args.sparse,
                          rank=args.rank,
                          name='conv1_fb')

        l3 = Convolution(input_sizes=[batch_size, 16, 16, 96],
                         filter_sizes=[5, 5, 96, 128],
                         num_classes=10,
                         init_filters=args.init,
                         strides=[1, 1, 1, 1],
                         padding="SAME",
                         alpha=learning_rate,
                         activation=act,
                         bias=args.bias,
                         last_layer=False,
                         name='conv2',
                         load=weights_conv,
                         train=train_conv)
        l4 = MaxPool(size=[batch_size, 16, 16, 128],
                     ksize=[1, 3, 3, 1],
                     strides=[1, 2, 2, 1],
                     padding="SAME")
        l5p = NodePert(size=[batch_size, 8, 8, 128], sigma=sigma)
        l5 = FeedbackConv(size=[batch_size, 8, 8, 128],
                          num_classes=10,
                          sparse=args.sparse,
                          rank=args.rank,
                          name='conv2_fb')

        l6 = Convolution(input_sizes=[batch_size, 8, 8, 128],
                         filter_sizes=[5, 5, 128, 256],
                         num_classes=10,
                         init_filters=args.init,
                         strides=[1, 1, 1, 1],
                         padding="SAME",
                         alpha=learning_rate,
                         activation=act,
                         bias=args.bias,
                         last_layer=False,
                         name='conv3',
                         load=weights_conv,
                         train=train_conv)
        l7 = MaxPool(size=[batch_size, 8, 8, 256],
                     ksize=[1, 3, 3, 1],
                     strides=[1, 2, 2, 1],
                     padding="SAME")
        l8p = NodePert(size=[batch_size, 4, 4, 256], sigma=sigma)
        l8 = FeedbackConv(size=[batch_size, 4, 4, 256],
                          num_classes=10,
                          sparse=args.sparse,
                          rank=args.rank,
                          name='conv3_fb')

        l9 = ConvToFullyConnected(shape=[4, 4, 256])

        l10p = NodePert(size=[batch_size, 4 * 4 * 256], sigma=sigma)
        l10 = FullyConnected(size=[4 * 4 * 256, 2048],
                             num_classes=10,
                             init_weights=args.init,
                             alpha=learning_rate,
                             activation=act,
                             bias=args.bias,
                             last_layer=False,
                             name='fc1',
                             load=weights_fc,
                             train=train_fc)
        l11 = Dropout(rate=dropout_rate)
        l12 = FeedbackFC(size=[4 * 4 * 256, 2048],
                         num_classes=10,
                         sparse=args.sparse,
                         rank=args.rank,
                         name='fc1_fb')

        l13p = NodePert(size=[batch_size, 2048], sigma=sigma)
        l13 = FullyConnected(size=[2048, 2048],
                             num_classes=10,
                             init_weights=args.init,
                             alpha=learning_rate,
                             activation=act,
                             bias=args.bias,
                             last_layer=False,
                             name='fc2',
                             load=weights_fc,
                             train=train_fc)
        l14 = Dropout(rate=dropout_rate)
        l15 = FeedbackFC(size=[2048, 2048],
                         num_classes=10,
                         sparse=args.sparse,
                         rank=args.rank,
                         name='fc2_fb')

        l16 = FullyConnected(size=[2048, 10],
                             num_classes=10,
                             init_weights=args.init,
                             alpha=learning_rate,
                             activation=Linear(),
                             bias=args.bias,
                             last_layer=True,
                             name='fc3',
                             load=weights_fc,
                             train=train_fc)

        ##############################################

        model = Model(layers=[
            l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14,
            l15, l16
        ])
        model_perturbed = Model(layers=[
            l0, l1, l2p, l2, l3, l4, l5p, l5, l6, l7, l8p, l8, l9, l10p, l10,
            l11, l12, l13p, l13, l14, l15, l16
        ])

        predict = model.predict(X=X)
        predict_perturbed = model_perturbed.predict(X=X)

        #######
        #Pairs of perturbations and feedback weights
        #feedbackpairs = [[l2p, l2], [l5p, l5], [l8p, l8], [l10p, l12], [l13p, l15]]

        #Test one at a time... this works, so it must be l10p, 12 pair that fails
        feedbackpairs = [[l2p, l2], [l5p, l5], [l8p, l8], [l13p, l15]]

        #Get noise, feedback matrices, and loss function and unperturbed loss function, to make update rule for feedback weights
        loss = tf.reduce_sum(tf.pow(tf.nn.softmax(predict) - Y, 2), 1) / 2
        loss_perturbed = tf.reduce_sum(
            tf.pow(tf.nn.softmax(predict_perturbed) - Y, 2), 1) / 2

        train_B = []
        E = tf.nn.softmax(predict) - Y
        for idx, (noise, feedback) in enumerate(feedbackpairs):
            print(idx, batch_size, feedback.output_size)
            xi = tf.reshape(noise.get_noise(),
                            (batch_size, feedback.output_size))
            B = feedback.B
            lambd = tf.matmul(
                tf.diag(loss_perturbed - loss) / args.sigma / args.sigma, xi)
            np_error = tf.matmul(E, B) - lambd
            grad_B = tf.matmul(tf.transpose(E), np_error)
            new_B = B.assign(B - args.beta * grad_B)
            train_B.append(new_B)
        #######

        weights = model.get_weights()

        if args.opt == "adam" or args.opt == "rms" or args.opt == "decay":
            if args.dfa:
                grads_and_vars = model.dfa_gvs(X=X, Y=Y)
            else:
                grads_and_vars = model.gvs(X=X, Y=Y)

            if args.opt == "adam":
                train = tf.train.AdamOptimizer(
                    learning_rate=learning_rate,
                    beta1=0.9,
                    beta2=0.999,
                    epsilon=args.eps).apply_gradients(
                        grads_and_vars=grads_and_vars)
            elif args.opt == "rms":
                train = tf.train.RMSPropOptimizer(
                    learning_rate=learning_rate, decay=0.99,
                    epsilon=args.eps).apply_gradients(
                        grads_and_vars=grads_and_vars)
            elif args.opt == "decay":
                train = tf.train.GradientDescentOptimizer(
                    learning_rate=learning_rate).apply_gradients(
                        grads_and_vars=grads_and_vars)
            else:
                assert (False)

        else:
            if args.dfa:
                train = model.dfa(X=X, Y=Y)
            else:
                train = model.train(X=X, Y=Y)

        correct = tf.equal(tf.argmax(predict, 1), tf.argmax(Y, 1))
        total_correct = tf.reduce_sum(tf.cast(correct, tf.float32))

        ##############################################

        sess = tf.InteractiveSession()
        tf.global_variables_initializer().run()
        tf.local_variables_initializer().run()

        (x_train, y_train), (x_test, y_test) = cifar10

        x_train = x_train.reshape(TRAIN_EXAMPLES, 32, 32, 3)
        y_train = keras.utils.to_categorical(y_train, 10)

        x_test = x_test.reshape(TEST_EXAMPLES, 32, 32, 3)
        y_test = keras.utils.to_categorical(y_test, 10)

        ##############################################

        filename = args.name + '.results'
        f = open(filename, "w")
        f.write(filename + "\n")
        f.write("total params: " + str(model.num_params()) + "\n")
        f.close()

        ##############################################

        for ii in range(EPOCHS):
            if args.opt == 'decay' or args.opt == 'gd':
                decay = np.power(args.decay, ii)
                lr = args.alpha * decay
            else:
                lr = args.alpha

            print(ii)

            #############################

            _count = 0
            _total_correct = 0

            #The training loop... here we add something to also update the feedback weights with the node pert
            for jj in range(int(TRAIN_EXAMPLES / BATCH_SIZE)):
                xs = x_train[jj * BATCH_SIZE:(jj + 1) * BATCH_SIZE]
                ys = y_train[jj * BATCH_SIZE:(jj + 1) * BATCH_SIZE]
                _correct, _ = sess.run(
                    [total_correct, train],
                    feed_dict={
                        sigma: 0.0,
                        batch_size: BATCH_SIZE,
                        dropout_rate: args.dropout,
                        learning_rate: lr,
                        X: xs,
                        Y: ys
                    })

                #Add step to update B......
                _ = sess.run(
                    [train_B],
                    feed_dict={
                        sigma: args.sigma,
                        batch_size: BATCH_SIZE,
                        dropout_rate: args.dropout,
                        learning_rate: lr,
                        X: xs,
                        Y: ys
                    })

                _total_correct += _correct
                _count += BATCH_SIZE

            train_acc = 1.0 * _total_correct / _count
            train_accs.append(train_acc)

            #############################

            _count = 0
            _total_correct = 0

            for jj in range(int(TEST_EXAMPLES / BATCH_SIZE)):
                xs = x_test[jj * BATCH_SIZE:(jj + 1) * BATCH_SIZE]
                ys = y_test[jj * BATCH_SIZE:(jj + 1) * BATCH_SIZE]
                _correct = sess.run(total_correct,
                                    feed_dict={
                                        sigma: 0.0,
                                        batch_size: BATCH_SIZE,
                                        dropout_rate: 0.0,
                                        learning_rate: 0.0,
                                        X: xs,
                                        Y: ys
                                    })

                _total_correct += _correct
                _count += BATCH_SIZE

            test_acc = 1.0 * _total_correct / _count
            test_accs.append(test_acc)

            isnan.append(None)

            #try:
            #    trainer.train()
            #except ValueError:
            #    print("Method fails to converge for these parameters")
            #    isnan[n,m] = 1

            #Save results...
            #############################

            print("train acc: %f test acc: %f" % (train_acc, test_acc))

            f = open(filename, "a")
            f.write("train acc: %f test acc: %f\n" % (train_acc, test_acc))
            f.close()

        #Save params after each run
        fn = "./cifar10_conv_np_hyperparam_search_varalpha_septsearch_2_dfa_%d_fblearning_%d.npz" % (
            args.dfa, args.feedbacklearning)
        to_save = {
            'attr': attrs,
            'params': params,
            'train_accs': train_accs,
            'test_accs': test_accs,
            'isnan': isnan
        }
        pickle.dump(to_save, open(fn, "wb"))
Beispiel #6
0
l2 = FeedbackConv(size=[batch_size, 16, 16, 96], num_classes=10, sparse=args.sparse, rank=args.rank, name='conv1_fb')

l3 = Convolution(input_sizes=[batch_size, 16, 16, 96], filter_sizes=[5, 5, 96, 128], num_classes=10, init_filters=args.init, strides=[1, 1, 1, 1], padding="SAME", alpha=learning_rate, activation=act, bias=args.bias, last_layer=False, name='conv2', load=weights_conv, train=train_conv)
l4 = MaxPool(size=[batch_size, 16, 16, 128], ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="SAME")
l5p = NodePert(size=[batch_size, 8, 8, 128], sigma = sigma)
l5 = FeedbackConv(size=[batch_size, 8, 8, 128], num_classes=10, sparse=args.sparse, rank=args.rank, name='conv2_fb')

l6 = Convolution(input_sizes=[batch_size, 8, 8, 128], filter_sizes=[5, 5, 128, 256], num_classes=10, init_filters=args.init, strides=[1, 1, 1, 1], padding="SAME", alpha=learning_rate, activation=act, bias=args.bias, last_layer=False, name='conv3', load=weights_conv, train=train_conv)
l7 = MaxPool(size=[batch_size, 8, 8, 256], ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="SAME")
l8p = NodePert(size=[batch_size, 4, 4, 256], sigma = sigma)
l8 = FeedbackConv(size=[batch_size, 4, 4, 256], num_classes=10, sparse=args.sparse, rank=args.rank, name='conv3_fb')

l9 = ConvToFullyConnected(shape=[4, 4, 256])

l10p = NodePert(size=[batch_size, 4*4*256], sigma = sigma)
l10 = FullyConnected(size=[4*4*256, 2048], num_classes=10, init_weights=args.init, alpha=learning_rate, activation=act, bias=args.bias, last_layer=False, name='fc1', load=weights_fc, train=train_fc)
l11 = Dropout(rate=dropout_rate)
l12 = FeedbackFC(size=[4*4*256, 2048], num_classes=10, sparse=args.sparse, rank=args.rank, name='fc1_fb')

l13p = NodePert(size=[batch_size, 2048], sigma = sigma)
l13 = FullyConnected(size=[2048, 2048], num_classes=10, init_weights=args.init, alpha=learning_rate, activation=act, bias=args.bias, last_layer=False, name='fc2', load=weights_fc, train=train_fc)
l14 = Dropout(rate=dropout_rate)
l15 = FeedbackFC(size=[2048, 2048], num_classes=10, sparse=args.sparse, rank=args.rank, name='fc2_fb')

l16 = FullyConnected(size=[2048, 10], num_classes=10, init_weights=args.init, alpha=learning_rate, activation=Linear(), bias=args.bias, last_layer=True, name='fc3', load=weights_fc, train=train_fc)

##############################################

model = Model(layers=[l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15, l16])
model_perturbed = Model(layers=[l0, l1, l2p, l2, l3, l4, l5p, l5, l6, l7, l8p, l8, l9, l10p, l10, l11, l12, l13p, l13, l14, l15, l16])
Beispiel #7
0
tf.set_random_seed(0)
tf.reset_default_graph()

batch_size = tf.placeholder(tf.int32, shape=())
dropout_rate = tf.placeholder(tf.float32, shape=())
lr = tf.placeholder(tf.float32, shape=())

X = tf.placeholder(tf.float32, [None, 32, 32, 3])
Y = tf.placeholder(tf.float32, [None, 10])

l0 = ConvToFullyConnected(input_shape=[32, 32, 3])
l1 = Dropout(rate=0.1)

l2 = FullyConnected(input_shape=3072,
                    size=1000,
                    init=args.init,
                    activation=act,
                    bias=args.bias,
                    name='fc1')
l3 = Dropout(rate=dropout_rate)
l4 = FeedbackFC(size=[3072, 1000],
                num_classes=10,
                sparse=args.sparse,
                rank=args.rank,
                name='fc1_fb')

l5 = FullyConnected(input_shape=1000,
                    size=1000,
                    init=args.init,
                    activation=act,
                    bias=args.bias,
                    name='fc2')
Beispiel #8
0
tf.set_random_seed(0)
tf.reset_default_graph()

batch_size = tf.placeholder(tf.int32, shape=())
dropout_rate = tf.placeholder(tf.float32, shape=())
learning_rate = tf.placeholder(tf.float32, shape=())

Y = tf.placeholder(tf.float32, [None, 10])
X = tf.placeholder(tf.float32, [None, 3072])

l0 = Dropout(rate=dropout_rate / 5.)

l1 = FullyConnected(size=[3072, 1000],
                    num_classes=10,
                    init_weights=args.init,
                    alpha=learning_rate,
                    activation=act,
                    bias=args.bias,
                    last_layer=False,
                    name="fc1")
l2 = Dropout(rate=dropout_rate)
l3 = FeedbackFC(size=[3072, 1000],
                num_classes=10,
                sparse=args.sparse,
                rank=args.rank,
                name="fc1_fb")

l4 = FullyConnected(size=[1000, 1000],
                    num_classes=10,
                    init_weights=args.init,
                    alpha=learning_rate,
                    activation=act,
                 name='conv2')
l3 = MaxPool(size=[batch_size, 28, 28, 64],
             ksize=[1, 3, 3, 1],
             strides=[1, 2, 2, 1],
             padding="SAME")
l4 = FeedbackConv(size=[batch_size, 14, 14, 64],
                  num_classes=10,
                  sparse=args.sparse,
                  rank=args.rank,
                  name='conv2_fb')

l5 = ConvToFullyConnected(input_shape=[14, 14, 64])

l6 = FullyConnected(input_shape=14 * 14 * 64,
                    size=128,
                    init=args.init,
                    activation=act,
                    bias=args.bias,
                    name='fc1')
l7 = Dropout(rate=dropout_rate)
l8 = FeedbackFC(size=[14 * 14 * 64, 128],
                num_classes=10,
                sparse=args.sparse,
                rank=args.rank,
                name='fc1_fb')

l9 = FullyConnected(input_shape=128,
                    size=10,
                    init=args.init,
                    bias=args.bias,
                    name='fc2')
Beispiel #10
0
class LELPPO(Layer):
    def __init__(self, input_shape, pool_shape, nactions, name=None):
        self.input_shape = input_shape
        self.batch_size, self.h, self.w, self.fin = self.input_shape
        self.pool_shape = pool_shape
        self.nactions = nactions
        self.name = name
        self.action_name = self.name + '_action'
        self.value_name = self.name + '_value'
        self.nlp_name = self.name + '_nlp'

        self.pool = AvgPool(size=self.input_shape,
                            ksize=self.pool_shape,
                            strides=self.pool_shape,
                            padding='SAME')

        l2_input_shape = l1.output_shape()
        self.conv2fc = ConvToFullyConnected(input_shape=l2_input_shape)

        l3_input_shape = l2.output_shape()
        self.actions = FullyConnected(input_shape=l3_input_shape,
                                      size=self.nactions,
                                      init='alexnet',
                                      name=self.name + '_actions')
        self.values = FullyConnected(input_shape=l3_input_shape,
                                     size=1,
                                     init='alexnet',
                                     name=self.name + '_values')

        ####################################################

        self.logits_bias = tf.Variable(np.zeros(shape=(self.nbatch,
                                                       self.nclass)),
                                       dtype=tf.float32)
        self.values_bias = tf.Variable(np.zeros(shape=(self.nbatch, 1)),
                                       dtype=tf.float32)

        # self.actions_model = Model(layers=[l1, l2, actions])
        # self.values_model = Model(layers=[l1, l2, values])

        ####################################################

        self.advantages = tf.placeholder("float", [None])
        self.rewards = tf.placeholder("float", [None])

        self.old_actions = tf.placeholder("int32", [None])
        self.old_values = tf.placeholder("float", [None])
        self.old_nlps = tf.placeholder("float", [None])

        ####################################################

    def get_weights(self):
        return []

    def output_shape(self):
        return self.input_shape

    def num_params(self):
        return 0

    def place_holders(self):
        place_holders_dict = {}
        place_holders_dict[self.name + '_advantages'] = self.advantages
        place_holders_dict[self.name + '_rewards'] = self.rewards
        place_holders_dict[self.name + '_old_actions'] = self.old_actions
        place_holders_dict[self.name + '_old_values'] = self.old_values
        place_holders_dict[self.name + '_old_nlps'] = self.old_nlps
        return place_holders_dict

    ###################################################################

    def forward(self, X):
        return X

    def predict(self, X):
        # [logits, logits_forward] = self.actions_model.forward(X)
        # [values, values_forward] = self.values_model.forward(X)

        pool = self.pool.forward(AI)
        conv2fc = self.conv2fc.forward(pool)
        logits = self.actions.forward(conv2fc)
        values = self.values.forward(conv2fc)

        values = tf.reshape(values, (-1, ))
        actions = sample(logits)
        nlps = neg_log_prob(logits, actions)

        # states, rewards, advantages, old_actions, old_values, old_nlps
        cache = {
            self.action_name: actions,
            self.value_name: values,
            self.nlp_name: nlps
        }
        return X, cache

    ###################################################################

    def backward(self, AI, AO, DO):
        return DO

    def gv(self, AI, AO, DO):
        return []

    ###################################################################

    def dfa_backward(self, AI, AO, E, DO):
        return DO

    def dfa_gv(self, AI, AO, E, DO):
        return []

    ###################################################################

    def lel_backward(self, AI, AO, DO, cache):

        pool = self.pool.forward(AI)
        conv2fc = self.conv2fc.forward(pool)
        logits = self.actions.forward(conv2fc)
        values = self.values.forward(conv2fc)

        # [logits, logits_forward] = self.actions_model.forward(AI)
        # [values, values_forward] = self.values_model.forward(AI)

        logits = logits + self.logits_bias
        values = values + self.values_bias
        values = tf.reshape(values, (-1, ))
        nlps = neg_log_prob(logits, self.old_actions)

        ratio = tf.exp(nlps - self.old_nlps)
        ratio = tf.clip_by_value(ratio, 0, 10)
        surr1 = self.advantages * ratio
        surr2 = self.advantages * tf.clip_by_value(ratio, 1 - epsilon_decay,
                                                   1 + epsilon_decay)
        policy_loss = -tf.reduce_mean(tf.minimum(surr1, surr2))

        entropy_loss = -policy_entropy(train)

        clipped_value_estimate = self.old_values + tf.clip_by_value(
            values - self.old_values, -epsilon_decay, epsilon_decay)
        value_loss_1 = tf.squared_difference(clipped_value_estimate,
                                             self.rewards)
        value_loss_2 = tf.squared_difference(values, self.rewards)
        value_loss = 0.5 * tf.reduce_mean(
            tf.maximum(value_loss_1, value_loss_2))

        ###################################################################

        loss = policy_loss + 0.01 * entropy_loss + 1. * value_loss
        # grads = tf.gradients(self.loss, [self.logits_bias, self.values_bias] + self.params)
        grads = tf.gradients(self.loss, [self.logits_bias, self.values_bias])

        do_logits = grads[0]
        do_values = grads[1]

        # we never call forward in lel, until backwards... forward just returns X.
        # actually works out nicely.
        # perhaps we dont actually need a cache then.
        # a few cheap redundant computations isnt so bad.

        dlogits = self.actions.backward(conv2fc, logits, do_logits)
        dvalues = self.values.backward(conv2fc, values, do_values)
        dconv2fc = self.conv2fc.backward(pool, conv2fc, dlogits + dvalues)
        dpool = self.pool.backward(AI, pool, dconv2fc)

        return dpool

    def lel_gv(self, AI, AO, DO, cache):

        pool = self.pool.forward(AI)
        conv2fc = self.conv2fc.forward(pool)
        logits = self.actions.forward(conv2fc)
        values = self.values.forward(conv2fc)

        # [logits, logits_forward] = self.actions_model.forward(AI)
        # [values, values_forward] = self.values_model.forward(AI)

        logits = logits + self.logits_bias
        values = values + self.values_bias
        values = tf.reshape(values, (-1, ))
        nlps = neg_log_prob(logits, self.old_actions)

        ratio = tf.exp(nlps - self.old_nlps)
        ratio = tf.clip_by_value(ratio, 0, 10)
        surr1 = self.advantages * ratio
        surr2 = self.advantages * tf.clip_by_value(ratio, 1 - epsilon_decay,
                                                   1 + epsilon_decay)
        policy_loss = -tf.reduce_mean(tf.minimum(surr1, surr2))

        entropy_loss = -policy_entropy(train)

        clipped_value_estimate = self.old_values + tf.clip_by_value(
            values - self.old_values, -epsilon_decay, epsilon_decay)
        value_loss_1 = tf.squared_difference(clipped_value_estimate,
                                             self.rewards)
        value_loss_2 = tf.squared_difference(values, self.rewards)
        value_loss = 0.5 * tf.reduce_mean(
            tf.maximum(value_loss_1, value_loss_2))

        ###################################################################

        loss = policy_loss + 0.01 * entropy_loss + 1. * value_loss
        # grads = tf.gradients(self.loss, [self.logits_bias, self.values_bias] + self.params)
        grads = tf.gradients(self.loss, [self.logits_bias, self.values_bias])

        do_logits = grads[0]
        do_values = grads[1]

        # we never call forward in lel, until backwards... forward just returns X.
        # actually works out nicely.
        # perhaps we dont actually need a cache then.
        # a few cheap redundant computations isnt so bad.

        gvs = []
        dlogits = self.actions.gv(conv2fc, logits, do_logits)
        dvalues = self.values.gv(conv2fc, values, do_values)
        # dconv2fc = self.conv2fc.backward(pool, conv2fc, dlogits + dvalues)
        # dpool = self.pool.backward(AI, pool, dconv2fc)

        gvs.extend(dlogits, dvalues)

        return gvs
Beispiel #11
0
tf.set_random_seed(0)
tf.reset_default_graph()

batch_size = tf.placeholder(tf.int32, shape=())
dropout_rate = tf.placeholder(tf.float32, shape=())
lr = tf.placeholder(tf.float32, shape=())

X = tf.placeholder(tf.float32, [None, 28, 28, 1])
X = tf.map_fn(lambda frame: tf.image.per_image_standardization(frame), X)
Y = tf.placeholder(tf.float32, [None, 10])

l0 = ConvToFullyConnected(input_shape=[28, 28, 1])

l1 = FullyConnected(input_shape=784,
                    size=400,
                    init=args.init,
                    activation=act,
                    bias=args.bias,
                    name='fc1')
l2 = Dropout(rate=dropout_rate)
l3 = FeedbackFC(size=[784, 400],
                num_classes=10,
                sparse=args.sparse,
                rank=args.rank,
                name='fc1_fb')

l4 = FullyConnected(input_shape=400,
                    size=10,
                    init=args.init,
                    bias=args.bias,
                    name='fc2')
Beispiel #12
0
class LELFC(Layer):
    def __init__(self, input_shape, num_classes, name=None):
        self.num_classes = num_classes
        self.input_shape = input_shape
        self.name = name
        '''
        if load:
            weight_dict = np.load(load).item()
            self.B = tf.cast(tf.Variable(weight_dict[self.name]), tf.float32)
        elif std is not None:
            b = np.random.normal(loc=0., scale=std, size=(self.num_classes, self.output_size))
            self.B = tf.cast(tf.Variable(b), tf.float32)
        else:
            # var = 1. / self.output_size
            # std = np.sqrt(var)
            # b = np.random.normal(loc=0., scale=std, size=(self.num_classes, self.output_size))

            b = FeedbackMatrix(size=(self.num_classes, self.output_size), sparse=self.sparse, rank=self.rank)
            self.B = tf.cast(tf.Variable(b), tf.float32) 
        '''

        # THE PROBLEM WAS NEVER THE BIAS ... IT WAS THE FACT WE WERNT DIVIDING BY N

        # l0 = FullyConnected(input_shape=input_shape, size=self.input_shape, init='alexnet', activation=Relu(), bias=1., name=self.name)
        self.l0 = FullyConnected(input_shape=input_shape,
                                 size=self.num_classes,
                                 init='alexnet',
                                 activation=Linear(),
                                 bias=0.,
                                 name=self.name)

        # self.B = Model(layers=[l1])

    def get_weights(self):
        # return self.l0.get_weights()
        return []

    def get_feedback(self):
        return self.B

    def output_shape(self):
        return self.input_shape

    def num_params(self):
        return 0

    def forward(self, X):
        return X

    ###################################################################

    def backward(self, AI, AO, DO):
        return DO

    def gv(self, AI, AO, DO):
        return []

    def train(self, AI, AO, DO):
        return []

    ###################################################################

    def dfa_backward(self, AI, AO, E, DO):
        return DO

    def dfa_gv(self, AI, AO, E, DO):
        return []

    def dfa(self, AI, AO, E, DO):
        return []

    ###################################################################

    # > https://ml-cheatsheet.readthedocs.io/en/latest/loss_functions.html
    # > https://www.ics.uci.edu/~pjsadows/notes.pdf
    # > https://deepnotes.io/softmax-crossentropy
    def lel_backward(self, AI, AO, E, DO, Y):
        '''
        S = tf.matmul(AO, tf.transpose(self.B))
        # should be doing cross entropy here.
        # is this right ?
        # just adding softmax ?
        ES = tf.subtract(tf.nn.softmax(S), Y)
        DO = tf.matmul(ES, self.B)
        # (* activation.gradient) and (* AI) occur in the actual layer itself.
        return DO
        '''
        # '''
        S = self.l0.forward(AI)
        ES = tf.subtract(tf.nn.softmax(S), Y)
        DI = self.l0.backward(AI, S, ES)
        # '''

        # DI = self.B.backwards(AI, Y)

        return DI

    def lel_gv(self, AI, AO, E, DO, Y):
        # '''
        S = self.l0.forward(AI)
        ES = tf.subtract(tf.nn.softmax(S), Y)
        gvs = self.l0.gv(AI, S, ES)
        # '''

        # gvs = self.B.gvs(AI, Y)

        return gvs

    def lel(self, AI, AO, E, DO, Y):
        assert (False)
Beispiel #13
0
TEST_EXAMPLES = 10000
BATCH_SIZE = args.batch_size

##############################################

tf.set_random_seed(0)
tf.reset_default_graph()

batch_size = tf.placeholder(tf.int32, shape=())
dropout_rate = tf.placeholder(tf.float32, shape=())
learning_rate = tf.placeholder(tf.float32, shape=())

X = tf.placeholder(tf.float32, [None, 784])
Y = tf.placeholder(tf.float32, [None, 10])

l0 = FullyConnected(size=[784, 400], num_classes=10, init_weights=args.init, alpha=learning_rate, activation=Tanh(), bias=args.bias, l2=args.l2, last_layer=False, name="fc1")
l1 = Dropout(rate=dropout_rate)
l2 = FeedbackFC(size=[784, 400], num_classes=10, sparse=args.sparse, rank=args.rank, name="fc1_fb")

l3 = FullyConnected(size=[400, 10], num_classes=10, init_weights=args.init, alpha=learning_rate, activation=Linear(), bias=args.bias, l2=args.l2, last_layer=True, name="fc2")

model = Model(layers=[l0, l1, l2, l3])

##############################################

predict = model.predict(X=X)

weights = model.get_weights()

if args.opt == "adam" or args.opt == "rms" or args.opt == "decay":
    if args.dfa:
                 name='conv3')
l7 = MaxPool(size=[batch_size, 8, 8, 256],
             ksize=[1, 3, 3, 1],
             strides=[1, 2, 2, 1],
             padding="SAME")
l8 = FeedbackConv(size=[batch_size, 4, 4, 256],
                  num_classes=10,
                  sparse=args.sparse,
                  rank=args.rank,
                  name='conv3_fb')

l9 = ConvToFullyConnected(input_shape=[4, 4, 256])

l10 = FullyConnected(input_shape=4 * 4 * 256,
                     size=2048,
                     init=args.init,
                     activation=act,
                     bias=args.bias,
                     name='fc1')
l11 = Dropout(rate=dropout_rate)
l12 = FeedbackFC(size=[4 * 4 * 256, 2048],
                 num_classes=10,
                 sparse=args.sparse,
                 rank=args.rank,
                 name='fc1_fb')

l13 = FullyConnected(input_shape=2048,
                     size=2048,
                     init=args.init,
                     activation=act,
                     bias=args.bias,
                     name='fc2')