def __init__(self, input_shape, size, activation=None, init=None, lr=0.001): self.input_shape = input_shape self.size = size self.init = init self.activation = Linear() if activation == None else activation self.lr = lr self.bias = np.zeros(shape=size) self.weights = init_matrix(size=(self.input_shape, self.size), init=self.init)
def __init__(self, input_shape, size, activation=None, init=None, lr=0.001): self.input_shape = input_shape self.size = size self.init = init self.activation = Linear() if activation == None else activation self.lr = lr bias = np.zeros(shape=size) weights = init_matrix(size=(self.input_shape, self.size), init=self.init) self.bias = tf.Variable(bias, dtype=tf.float32) self.weights = tf.Variable(weights, dtype=tf.float32)
name="fc2", load=weights_fc, train=train_fc) l23 = Dropout(rate=dropout_rate) l24 = FeedbackFC(size=[4096, 4096], num_classes=num_classes, sparse=args.sparse, rank=args.rank, name="fc2_fb", load=weights_fc) l25 = FullyConnected(size=[4096, num_classes], num_classes=num_classes, init_weights=args.init, alpha=learning_rate, activation=Linear(), bias=args.bias, last_layer=True, name="fc3", load=weights_fc, train=train_fc) ############################################################### model = Model(layers=[ l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15, l16, l17, l18, l19, l20, l21, l22, l23, l24, l25 ]) predict = tf.nn.softmax(model.predict(X=features))
l4 = MaxPool(size=[batch_size, 16, 16, 128], ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="SAME") l5 = FeedbackConv(size=[batch_size, 8, 8, 128], num_classes=100, sparse=sparse, rank=rank) l6 = Convolution(input_sizes=[batch_size, 8, 8, 128], filter_sizes=[5, 5, 128, 256], num_classes=100, init_filters=args.init, strides=[1, 1, 1, 1], padding="SAME", alpha=ALPHA, activation=Tanh(), last_layer=False) l7 = MaxPool(size=[batch_size, 8, 8, 256], ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="SAME") l8 = FeedbackConv(size=[batch_size, 4, 4, 256], num_classes=100, sparse=sparse, rank=rank) l9 = ConvToFullyConnected(shape=[4, 4, 256]) l10 = FullyConnected(size=[4*4*256, 2048], num_classes=100, init_weights=args.init, alpha=ALPHA, activation=Tanh(), last_layer=False) l11 = FeedbackFC(size=[4*4*256, 2048], num_classes=100, sparse=sparse, rank=rank) l12 = FullyConnected(size=[2048, 2048], num_classes=100, init_weights=args.init, alpha=ALPHA, activation=Tanh(), last_layer=False) l13 = FeedbackFC(size=[2048, 2048], num_classes=100, sparse=sparse, rank=rank) # have to adjust lr if using sigmoid l14 = FullyConnected(size=[2048, 100], num_classes=100, init_weights=args.init, alpha=ALPHA, activation=Linear(), last_layer=True) model = Model(layers=[l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14]) predict = model.predict(X=XTEST) if args.dfa: grads_and_vars = model.dfa(X=XTRAIN, Y=YTRAIN) else: grads_and_vars = model.train(X=XTRAIN, Y=YTRAIN) if args.opt == "adam": optimizer = tf.train.AdamOptimizer(learning_rate=ALPHA, beta1=0.9, beta2=0.999, epsilon=1.0).apply_gradients(grads_and_vars=grads_and_vars) elif args.opt == "rms": optimizer = tf.train.RMSPropOptimizer(learning_rate=ALPHA, decay=1.0, momentum=0.0).apply_gradients(grads_and_vars=grads_and_vars) else:
l2 = Convolution(input_sizes=[batch_size, 27, 27, 96], filter_sizes=[5, 5, 96, 256], num_classes=num_classes, init_filters=args.init, strides=[1, 1, 1, 1], padding="SAME", alpha=ALPHA, activation=Tanh(), bias=0.0, last_layer=False) l3 = MaxPool(size=[batch_size, 27, 27, 256], ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID") l4 = Convolution(input_sizes=[batch_size, 13, 13, 256], filter_sizes=[3, 3, 256, 384], num_classes=num_classes, init_filters=args.init, strides=[1, 1, 1, 1], padding="SAME", alpha=ALPHA, activation=Tanh(), bias=0.0, last_layer=False) l5 = Convolution(input_sizes=[batch_size, 13, 13, 384], filter_sizes=[3, 3, 384, 384], num_classes=num_classes, init_filters=args.init, strides=[1, 1, 1, 1], padding="SAME", alpha=ALPHA, activation=Tanh(), bias=0.0, last_layer=False) l6 = Convolution(input_sizes=[batch_size, 13, 13, 384], filter_sizes=[3, 3, 384, 256], num_classes=num_classes, init_filters=args.init, strides=[1, 1, 1, 1], padding="SAME", alpha=ALPHA, activation=Tanh(), bias=0.0, last_layer=False) l7 = MaxPool(size=[batch_size, 13, 13, 256], ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding="VALID") l8 = ConvToFullyConnected(shape=[6, 6, 256]) l9 = FullyConnected(size=[6*6*256, 4096], num_classes=num_classes, init_weights=args.init, alpha=ALPHA, activation=Tanh(), bias=0.0, last_layer=False) l10 = FullyConnected(size=[4096, 4096], num_classes=num_classes, init_weights=args.init, alpha=ALPHA, activation=Tanh(), bias=0.0, last_layer=False) l11 = FullyConnected(size=[4096, num_classes], num_classes=num_classes, init_weights=args.init, alpha=ALPHA, activation=Linear(), bias=0.0, last_layer=True) model = Model(layers=[l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11]) predict = tf.nn.softmax(model.predict(X=features)) if args.dfa: train = model.dfa(X=features, Y=labels) else: train = model.train(X=features, Y=labels) correct = tf.equal(tf.argmax(predict,1), tf.argmax(labels,1)) total_correct = tf.reduce_sum(tf.cast(correct, tf.float32)) accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) print (model.num_params())