Example #1
0
    def __init__(self,
                 input_shape,
                 size,
                 activation=None,
                 init=None,
                 lr=0.001):

        self.input_shape = input_shape
        self.size = size
        self.init = init
        self.activation = Linear() if activation == None else activation
        self.lr = lr

        self.bias = np.zeros(shape=size)
        self.weights = init_matrix(size=(self.input_shape, self.size),
                                   init=self.init)
Example #2
0
class Dense(Layer):
    def __init__(self,
                 input_shape,
                 size,
                 activation=None,
                 init=None,
                 lr=0.001):

        self.input_shape = input_shape
        self.size = size
        self.init = init
        self.activation = Linear() if activation == None else activation
        self.lr = lr

        bias = np.zeros(shape=size)
        weights = init_matrix(size=(self.input_shape, self.size),
                              init=self.init)

        self.bias = tf.Variable(bias, dtype=tf.float32)
        self.weights = tf.Variable(weights, dtype=tf.float32)

    ###################################################################

    def get_weights(self):
        assert (False)

    def num_params(self):
        assert (False)

    ###################################################################

    def forward(self, X):
        Z = tf.matmul(X, self.weights) + self.bias
        A = self.activation.forward(Z)
        return A, None

    def backward(self, AI, AO, DO, cache):
        DO = DO * self.activation.gradient(AO)
        DI = tf.matmul(DO, tf.transpose(self.weights))

        DW = tf.matmul(tf.transpose(AI), DO)
        DB = tf.reduce_sum(DO, axis=0)

        return DI, [(DW, self.weights), (DB, self.bias)]
Example #3
0
class Dense(Layer):
    def __init__(self,
                 input_shape,
                 size,
                 activation=None,
                 init=None,
                 lr=0.001):

        self.input_shape = input_shape
        self.size = size
        self.init = init
        self.activation = Linear() if activation == None else activation
        self.lr = lr

        self.bias = np.zeros(shape=size)
        self.weights = init_matrix(size=(self.input_shape, self.size),
                                   init=self.init)

    ###################################################################

    def get_weights(self):
        assert (False)

    def num_params(self):
        assert (False)

    ###################################################################

    def forward(self, X):
        Z = X @ self.weights + self.bias
        A = self.activation.forward(Z)
        return A, None

    def backward(self, AI, AO, DO, cache):
        DO = DO * self.activation.gradient(AO)
        DI = DO @ self.weights.T

        DW = AI.T @ DO
        DB = np.sum(DO, axis=0)

        self.weights -= self.lr * DW
        self.bias -= self.lr * DB

        return DI
Example #4
0
    def __init__(self,
                 input_shape,
                 size,
                 activation=None,
                 init=None,
                 lr=0.001):

        self.input_shape = input_shape
        self.size = size
        self.init = init
        self.activation = Linear() if activation == None else activation
        self.lr = lr

        bias = np.zeros(shape=size)
        weights = init_matrix(size=(self.input_shape, self.size),
                              init=self.init)

        self.bias = tf.Variable(bias, dtype=tf.float32)
        self.weights = tf.Variable(weights, dtype=tf.float32)
Example #5
0
                     name="fc2",
                     load=weights_fc,
                     train=train_fc)
l23 = Dropout(rate=dropout_rate)
l24 = FeedbackFC(size=[4096, 4096],
                 num_classes=num_classes,
                 sparse=args.sparse,
                 rank=args.rank,
                 name="fc2_fb",
                 load=weights_fc)

l25 = FullyConnected(size=[4096, num_classes],
                     num_classes=num_classes,
                     init_weights=args.init,
                     alpha=learning_rate,
                     activation=Linear(),
                     bias=args.bias,
                     last_layer=True,
                     name="fc3",
                     load=weights_fc,
                     train=train_fc)

###############################################################

model = Model(layers=[
    l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15, l16,
    l17, l18, l19, l20, l21, l22, l23, l24, l25
])

predict = tf.nn.softmax(model.predict(X=features))
Example #6
0
l4 = MaxPool(size=[batch_size, 16, 16, 128], ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="SAME")
l5 = FeedbackConv(size=[batch_size, 8, 8, 128], num_classes=100, sparse=sparse, rank=rank)

l6 = Convolution(input_sizes=[batch_size, 8, 8, 128], filter_sizes=[5, 5, 128, 256], num_classes=100, init_filters=args.init, strides=[1, 1, 1, 1], padding="SAME", alpha=ALPHA, activation=Tanh(), last_layer=False)
l7 = MaxPool(size=[batch_size, 8, 8, 256], ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="SAME")
l8 = FeedbackConv(size=[batch_size, 4, 4, 256], num_classes=100, sparse=sparse, rank=rank)

l9 = ConvToFullyConnected(shape=[4, 4, 256])
l10 = FullyConnected(size=[4*4*256, 2048], num_classes=100, init_weights=args.init, alpha=ALPHA, activation=Tanh(), last_layer=False)
l11 = FeedbackFC(size=[4*4*256, 2048], num_classes=100, sparse=sparse, rank=rank)

l12 = FullyConnected(size=[2048, 2048], num_classes=100, init_weights=args.init, alpha=ALPHA, activation=Tanh(), last_layer=False)
l13 = FeedbackFC(size=[2048, 2048], num_classes=100, sparse=sparse, rank=rank)

# have to adjust lr if using sigmoid
l14 = FullyConnected(size=[2048, 100], num_classes=100, init_weights=args.init, alpha=ALPHA, activation=Linear(), last_layer=True)

model = Model(layers=[l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14])

predict = model.predict(X=XTEST)

if args.dfa:
    grads_and_vars = model.dfa(X=XTRAIN, Y=YTRAIN)
else:
    grads_and_vars = model.train(X=XTRAIN, Y=YTRAIN)
    
if args.opt == "adam":
    optimizer = tf.train.AdamOptimizer(learning_rate=ALPHA, beta1=0.9, beta2=0.999, epsilon=1.0).apply_gradients(grads_and_vars=grads_and_vars)
elif args.opt == "rms":
    optimizer = tf.train.RMSPropOptimizer(learning_rate=ALPHA, decay=1.0, momentum=0.0).apply_gradients(grads_and_vars=grads_and_vars)
else:
Example #7
0
    l2 = Convolution(input_sizes=[batch_size, 27, 27, 96], filter_sizes=[5, 5, 96, 256], num_classes=num_classes, init_filters=args.init, strides=[1, 1, 1, 1], padding="SAME", alpha=ALPHA, activation=Tanh(), bias=0.0, last_layer=False)
    l3 = MaxPool(size=[batch_size, 27, 27, 256], ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")

    l4 = Convolution(input_sizes=[batch_size, 13, 13, 256], filter_sizes=[3, 3, 256, 384], num_classes=num_classes, init_filters=args.init, strides=[1, 1, 1, 1], padding="SAME", alpha=ALPHA, activation=Tanh(), bias=0.0, last_layer=False)

    l5 = Convolution(input_sizes=[batch_size, 13, 13, 384], filter_sizes=[3, 3, 384, 384], num_classes=num_classes, init_filters=args.init, strides=[1, 1, 1, 1], padding="SAME", alpha=ALPHA, activation=Tanh(), bias=0.0, last_layer=False)

    l6 = Convolution(input_sizes=[batch_size, 13, 13, 384], filter_sizes=[3, 3, 384, 256], num_classes=num_classes, init_filters=args.init, strides=[1, 1, 1, 1], padding="SAME", alpha=ALPHA, activation=Tanh(), bias=0.0, last_layer=False)
    l7 = MaxPool(size=[batch_size, 13, 13, 256], ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID")

    l8 = ConvToFullyConnected(shape=[6, 6, 256])
    l9 = FullyConnected(size=[6*6*256, 4096], num_classes=num_classes, init_weights=args.init, alpha=ALPHA, activation=Tanh(), bias=0.0, last_layer=False)

    l10 = FullyConnected(size=[4096, 4096], num_classes=num_classes, init_weights=args.init, alpha=ALPHA, activation=Tanh(), bias=0.0, last_layer=False)

    l11 = FullyConnected(size=[4096, num_classes], num_classes=num_classes, init_weights=args.init, alpha=ALPHA, activation=Linear(), bias=0.0, last_layer=True)

    model = Model(layers=[l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11])

    predict = tf.nn.softmax(model.predict(X=features))

    if args.dfa:
        train = model.dfa(X=features, Y=labels)
    else:
        train = model.train(X=features, Y=labels)

    correct = tf.equal(tf.argmax(predict,1), tf.argmax(labels,1))
    total_correct = tf.reduce_sum(tf.cast(correct, tf.float32))
    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

    print (model.num_params())