def _build_graph(self, input_vars, _): x, label = input_vars x = x / 256.0 def quantize(x, name=None): # quantize to 2 bit return ((x * 3.0 + 0.5) // 1) / 3.0 bn = lambda x, name: BatchNorm('bn', x, False, epsilon=1e-4) bnc = lambda x, name: tf.clip_by_value( bn(x, None), 0.0, 1.0, name=name) def conv_split(name, x, channel, shape): inputs = tf.split(3, 2, x) x0 = Conv2D(name + 'a', inputs[0], channel / 2, shape) x1 = Conv2D(name + 'b', inputs[1], channel / 2, shape) return tf.concat(3, [x0, x1]) with argscope([Conv2D, FullyConnected], nl=bnc): x = Conv2D('conv1_1', x, 96, 12, stride=4, padding='VALID') x = quantize(x) x = conv_split('conv2_1', x, 256, 5) x = tf.pad(x, [[0, 0], [1, 1], [1, 1], [0, 0]]) x = MaxPooling('pool1', x, 3, 2) x = quantize(x) x = Conv2D('conv3_1', x, 384, 3) x = tf.pad(x, [[0, 0], [1, 1], [1, 1], [0, 0]]) x = MaxPooling('pool2', x, 3, 2) x = quantize(x) x = conv_split('conv4_1', x, 384, 3) x = quantize(x) x = conv_split('conv5_1', x, 256, 3) x = MaxPooling('pool3', x, 3, 2) x = quantize(x) x = tf.transpose(x, perm=[0, 3, 1, 2]) x = tf.nn.dropout(x, keep_prob=1.) x = FullyConnected('fc0', x, out_dim=4096) x = quantize(x) x = tf.nn.dropout(x, keep_prob=1.) x = FullyConnected('fc1', x, out_dim=4096) logits = FullyConnected('fct', x, out_dim=1000, nl=bn) prob = tf.nn.softmax(logits, name='prob') nr_wrong = tf.reduce_sum(prediction_incorrect(logits, label), name='wrong-top1') nr_wrong = tf.reduce_sum(prediction_incorrect(logits, label, 5), name='wrong-top5')
def _build_graph(self, input_vars, _): x, label = input_vars x = x / 256.0 def quantize(x, name=None): # quantize to 2 bit return ((x * 3.0 + 0.5) // 1) / 3.0 bn = lambda x, name: BatchNorm('bn', x, False, epsilon=1e-4) bnc = lambda x, name: tf.clip_by_value(bn(x, None), 0.0, 1.0, name=name) def conv_split(name, x, channel, shape): inputs = tf.split(3, 2, x) x0 = Conv2D(name + 'a', inputs[0], channel/2, shape) x1 = Conv2D(name + 'b', inputs[1], channel/2, shape) return tf.concat(3, [x0, x1]) with argscope([Conv2D, FullyConnected], nl=bnc): x = Conv2D('conv1_1', x, 96, 12, stride=4, padding='VALID') x = quantize(x) x = conv_split('conv2_1', x, 256, 5) x = tf.pad(x, [[0,0], [1,1], [1,1], [0,0]]) x = MaxPooling('pool1', x, 3, 2) x = quantize(x) x = Conv2D('conv3_1', x, 384, 3) x = tf.pad(x, [[0,0], [1,1], [1,1], [0,0]]) x = MaxPooling('pool2', x, 3, 2) x = quantize(x) x = conv_split('conv4_1', x, 384, 3) x = quantize(x) x = conv_split('conv5_1', x, 256, 3) x = MaxPooling('pool3', x, 3, 2) x = quantize(x) x = tf.transpose(x, perm=[0,3,1,2]) x = tf.nn.dropout(x, keep_prob=1.) x = FullyConnected('fc0', x, out_dim=4096) x = quantize(x) x = tf.nn.dropout(x, keep_prob=1.) x = FullyConnected('fc1', x, out_dim=4096) logits = FullyConnected('fct', x, out_dim=1000, nl=bn) prob = tf.nn.softmax(logits, name='prob') nr_wrong = tf.reduce_sum(prediction_incorrect(logits, label), name='wrong-top1') nr_wrong = tf.reduce_sum(prediction_incorrect(logits, label, 5), name='wrong-top5')
def _build_graph(self, inputs): """This function should build the model which takes the input variables and define self.cost at the end""" # inputs contains a list of input variables defined above image, label = inputs # In tensorflow, inputs to convolution function are assumed to be # NHWC. Add a single channel here. image = tf.expand_dims(image, 3) image = image * 2 - 1 # center the pixels values at zero # The context manager `argscope` sets the default option for all the layers under # this context. Here we use 32 channel convolution with shape 3x3 with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu, out_channel=32): logits = (LinearWrap(image) .Conv2D('conv0') .MaxPooling('pool0', 2) .Conv2D('conv1') .Conv2D('conv2') .MaxPooling('pool1', 2) .Conv2D('conv3') .FullyConnected('fc0', 512, nl=tf.nn.relu) .Dropout('dropout', 0.5) .FullyConnected('fc1', out_dim=10, nl=tf.identity)()) prob = tf.nn.softmax(logits, name='prob') # a Bx10 with probabilities # a vector of length B with loss of each sample cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') # the average cross-entropy loss # compute the "incorrect vector", for the callback ClassificationError to use at validation time wrong = symbf.prediction_incorrect(logits, label, name='incorrect') accuracy = symbf.accuracy(logits, label, name='accuracy') # This will monitor training error (in a moving_average fashion): # 1. write the value to tensosrboard # 2. write the value to stat.json # 3. print the value after each epoch train_error = tf.reduce_mean(wrong, name='train_error') summary.add_moving_summary(train_error, accuracy) # Use a regex to find parameters to apply weight decay. # Here we apply a weight decay on all W (weight matrix) of all fc layers wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/W', tf.nn.l2_loss), name='regularize_loss') self.cost = tf.add_n([wd_cost, cost], name='total_cost') summary.add_moving_summary(cost, wd_cost, self.cost) # monitor histogram of all weight (of conv and fc layers) in tensorboard summary.add_param_summary(('.*/W', ['histogram', 'rms']))
def _build_graph(self, input_vars): image, label = input_vars is_training = get_current_tower_context().is_training keep_prob = tf.constant(0.5 if is_training else 1.0) # ドロップアウトする率 if is_training: tf.image_summary("train_image", image, 10) image = image / 4.0 # just to make range smaller with argscope(Conv2D, nl=BNReLU(), use_bias=False, kernel_shape=3): logits = LinearWrap(image) \ .Conv2D('conv1', out_channel=96, stride=4, kernel_shape=7) \ .tf.nn.relu(name='relu2') \ .MaxPooling('pool1', 3, stride=2) \ .tf.nn.local_response_normalization(depth_radius=5, alpha=0.0001, beta=0.75, name='norm1') \ .Conv2D('conv2', out_channel=256, kernel_shape=5) \ .tf.nn.relu('relu2') \ .MaxPooling('pool2', 3, stride=2) \ .tf.nn.local_response_normalization(alpha=0.0001, beta=0.75, name='norm2') \ .Conv2D('conv3', out_channel=384, kernel_shape=3) \ .tf.nn.relu(name='relu3') \ .MaxPooling('pool5', 3, stride=2) \ .FullyConnected('fc6', 512) \ .tf.nn.relu(name='relu6') \ .tf.nn.dropout(keep_prob) \ .FullyConnected('fc7', 512) \ .tf.nn.relu(name='relu7') \ .tf.nn.dropout(keep_prob) \ .FullyConnected('fc8', out_dim=8, nl=tf.identity)() prob = tf.nn.softmax(logits, name='prob') #cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label) cost = tf.nn.softmax_cross_entropy_with_logits(logits, label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') # compute the number of failed samples, for ClassificationError to use at test time wrong = symbf.prediction_incorrect(logits, label) nr_wrong = tf.reduce_sum(wrong, name='wrong') # monitor training error add_moving_summary(tf.reduce_mean(wrong, name='train_error')) # weight decay on all W of fc layers wd_cost = tf.mul(0.004, regularize_cost('fc.*/W', tf.nn.l2_loss), name='regularize_loss') add_moving_summary(cost, wd_cost) add_param_summary([('.*/W', ['histogram'])]) # monitor W self.cost = tf.add_n([cost, wd_cost], name='cost')
def _build_graph(self, inputs): image, label = inputs is_training = get_current_tower_context().is_training keep_prob = tf.constant(0.5 if is_training else 1.0) if is_training: tf.summary.image("train_image", image, 10) if tf.test.is_gpu_available(): image = tf.transpose(image, [0, 3, 1, 2]) data_format = 'NCHW' else: data_format = 'NHWC' image = image / 4.0 # just to make range smaller with argscope(Conv2D, nl=BNReLU, use_bias=False, kernel_shape=3), \ argscope([Conv2D, MaxPooling, BatchNorm], data_format=data_format): logits = LinearWrap(image) \ .Conv2D('conv1.1', out_channel=64) \ .Conv2D('conv1.2', out_channel=64) \ .MaxPooling('pool1', 3, stride=2, padding='SAME') \ .Conv2D('conv2.1', out_channel=128) \ .Conv2D('conv2.2', out_channel=128) \ .MaxPooling('pool2', 3, stride=2, padding='SAME') \ .Conv2D('conv3.1', out_channel=128, padding='VALID') \ .Conv2D('conv3.2', out_channel=128, padding='VALID') \ .FullyConnected('fc0', 1024 + 512, nl=tf.nn.relu) \ .tf.nn.dropout(keep_prob) \ .FullyConnected('fc1', 512, nl=tf.nn.relu) \ .FullyConnected('linear', out_dim=self.cifar_classnum, nl=tf.identity)() cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') wrong = symbf.prediction_incorrect(logits, label) accuracy = symbf.accuracy(logits, label, name='accuracy') # monitor training error add_moving_summary(tf.reduce_mean(wrong, name='train_error'), accuracy) # weight decay on all W of fc layers wd_cost = regularize_cost('fc.*/W', l2_regularizer(4e-4), name='regularize_loss') add_moving_summary(cost, wd_cost) add_param_summary(('.*/W', ['histogram'])) # monitor W self.cost = tf.add_n([cost, wd_cost], name='cost')
def _build_graph(self, input_vars, is_training): image, label = input_vars keep_prob = tf.constant(0.5 if is_training else 1.0) if is_training: tf.image_summary("train_image", image, 10) image = image / 4.0 # just to make range smaller with argscope(Conv2D, nl=BNReLU(is_training), use_bias=False, kernel_shape=3): logits = LinearWrap(image) \ .Conv2D('conv1.1', out_channel=64) \ .Conv2D('conv1.2', out_channel=64) \ .MaxPooling('pool1', 3, stride=2, padding='SAME') \ .Conv2D('conv2.1', out_channel=128) \ .Conv2D('conv2.2', out_channel=128) \ .MaxPooling('pool2', 3, stride=2, padding='SAME') \ .Conv2D('conv3.1', out_channel=128, padding='VALID') \ .Conv2D('conv3.2', out_channel=128, padding='VALID') \ .FullyConnected('fc0', 1024 + 512, b_init=tf.constant_initializer(0.1)) \ .tf.nn.dropout(keep_prob) \ .FullyConnected('fc1', 512, b_init=tf.constant_initializer(0.1)) \ .FullyConnected('linear', out_dim=self.cifar_classnum, nl=tf.identity)() cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, cost) # compute the number of failed samples, for ClassificationError to use at test time wrong = symbf.prediction_incorrect(logits, label) nr_wrong = tf.reduce_sum(wrong, name='wrong') # monitor training error tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, tf.reduce_mean(wrong, name='train_error')) # weight decay on all W of fc layers wd_cost = tf.mul(0.004, regularize_cost('fc.*/W', tf.nn.l2_loss), name='regularize_loss') tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost) add_param_summary([('.*/W', ['histogram'])]) # monitor W self.cost = tf.add_n([cost, wd_cost], name='cost')
def _build_graph(self, input_vars, is_training): image, label = input_vars keep_prob = tf.constant(0.5 if is_training else 1.0) if is_training: tf.image_summary("train_image", image, 10) image = image / 4.0 # just to make range smaller with argscope(Conv2D, nl=BNReLU(is_training), use_bias=False, kernel_shape=3): l = Conv2D('conv1.1', image, out_channel=64) l = Conv2D('conv1.2', l, out_channel=64) l = MaxPooling('pool1', l, 3, stride=2, padding='SAME') l = Conv2D('conv2.1', l, out_channel=128) l = Conv2D('conv2.2', l, out_channel=128) l = MaxPooling('pool2', l, 3, stride=2, padding='SAME') l = Conv2D('conv3.1', l, out_channel=128, padding='VALID') l = Conv2D('conv3.2', l, out_channel=128, padding='VALID') l = FullyConnected('fc0', l, 1024 + 512, b_init=tf.constant_initializer(0.1)) l = tf.nn.dropout(l, keep_prob) l = FullyConnected('fc1', l, 512, b_init=tf.constant_initializer(0.1)) # fc will have activation summary by default. disable for the output layer logits = FullyConnected('linear', l, out_dim=self.cifar_classnum, nl=tf.identity) cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, cost) # compute the number of failed samples, for ClassificationError to use at test time wrong = symbf.prediction_incorrect(logits, label) nr_wrong = tf.reduce_sum(wrong, name='wrong') # monitor training error tf.add_to_collection( MOVING_SUMMARY_VARS_KEY, tf.reduce_mean(wrong, name='train_error')) # weight decay on all W of fc layers wd_cost = tf.mul(0.004, regularize_cost('fc.*/W', tf.nn.l2_loss), name='regularize_loss') tf.add_to_collection(MOVING_SUMMARY_VARS_KEY, wd_cost) add_param_summary([('.*/W', ['histogram'])]) # monitor W self.cost = tf.add_n([cost, wd_cost], name='cost')
def _build_graph(self, inputs): image, label = inputs image = tf.expand_dims(image * 2 - 1, 3) with argscope(Conv2D, kernel_shape=3, nl=tf.nn.relu, out_channel=32): c0 = Conv2D('conv0', image) p0 = MaxPooling('pool0', c0, 2) c1 = Conv2D('conv1', p0) c2 = Conv2D('conv2', c1) p1 = MaxPooling('pool1', c2, 2) c3 = Conv2D('conv3', p1) fc1 = FullyConnected('fc0', c3, 512, nl=tf.nn.relu) fc1 = Dropout('dropout', fc1, 0.5) logits = FullyConnected('fc1', fc1, out_dim=10, nl=tf.identity) with tf.name_scope('visualizations'): visualize_conv_weights(c0.variables.W, 'conv0') visualize_conv_activations(c0, 'conv0') visualize_conv_weights(c1.variables.W, 'conv1') visualize_conv_activations(c1, 'conv1') visualize_conv_weights(c2.variables.W, 'conv2') visualize_conv_activations(c2, 'conv2') visualize_conv_weights(c3.variables.W, 'conv3') visualize_conv_activations(c3, 'conv3') tf.summary.image('input', (image + 1.0) * 128., 3) cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') wrong = symbf.prediction_incorrect(logits, label, name='incorrect') accuracy = symbf.accuracy(logits, label) wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/W', tf.nn.l2_loss), name='regularize_loss') self.cost = tf.add_n([wd_cost, cost], name='total_cost') summary.add_moving_summary(cost, wd_cost, self.cost, accuracy) summary.add_param_summary(('.*/W', ['histogram', 'rms']))
def _build_graph(self, input_vars): image, label = input_vars is_training = get_current_tower_context().is_training keep_prob = tf.constant(0.5 if is_training else 1.0) if is_training: tf.image_summary("train_image", image, 10) image = image / 4.0 # just to make range smaller with argscope(Conv2D, nl=BNReLU, use_bias=False, kernel_shape=3): logits = LinearWrap(image) \ .Conv2D('conv1.1', out_channel=64) \ .Conv2D('conv1.2', out_channel=64) \ .MaxPooling('pool1', 3, stride=2, padding='SAME') \ .Conv2D('conv2.1', out_channel=128) \ .Conv2D('conv2.2', out_channel=128) \ .MaxPooling('pool2', 3, stride=2, padding='SAME') \ .Conv2D('conv3.1', out_channel=128, padding='VALID') \ .Conv2D('conv3.2', out_channel=128, padding='VALID') \ .FullyConnected('fc0', 1024 + 512, nl=tf.nn.relu) \ .tf.nn.dropout(keep_prob) \ .FullyConnected('fc1', 512, nl=tf.nn.relu) \ .FullyConnected('linear', out_dim=self.cifar_classnum, nl=tf.identity)() cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') wrong = symbf.prediction_incorrect(logits, label) # monitor training error add_moving_summary(tf.reduce_mean(wrong, name='train_error')) # weight decay on all W of fc layers wd_cost = tf.mul(0.0004, regularize_cost('fc.*/W', tf.nn.l2_loss), name='regularize_loss') add_moving_summary(cost, wd_cost) add_param_summary([('.*/W', ['histogram'])]) # monitor W self.cost = tf.add_n([cost, wd_cost], name='cost')
def _build_graph(self, inputs): image, label = inputs is_training = get_current_tower_context().is_training keep_prob = tf.constant(0.5 if is_training else 1.0) if is_training: tf.summary.image("train_image", image, 10) image = tf.transpose(image, [0, 3, 1, 2]) image = image / 4.0 # just to make range smaller with argscope(Conv2D, nl=BNReLU, use_bias=False, kernel_shape=3), \ argscope([Conv2D, MaxPooling, BatchNorm], data_format='NCHW'): logits = LinearWrap(image) \ .Conv2D('conv1.1', out_channel=64) \ .Conv2D('conv1.2', out_channel=64) \ .MaxPooling('pool1', 3, stride=2, padding='SAME') \ .Conv2D('conv2.1', out_channel=128) \ .Conv2D('conv2.2', out_channel=128) \ .MaxPooling('pool2', 3, stride=2, padding='SAME') \ .Conv2D('conv3.1', out_channel=128, padding='VALID') \ .Conv2D('conv3.2', out_channel=128, padding='VALID') \ .FullyConnected('fc0', 1024 + 512, nl=tf.nn.relu) \ .tf.nn.dropout(keep_prob) \ .FullyConnected('fc1', 512, nl=tf.nn.relu) \ .FullyConnected('linear', out_dim=self.cifar_classnum, nl=tf.identity)() cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') wrong = symbf.prediction_incorrect(logits, label) # monitor training error add_moving_summary(tf.reduce_mean(wrong, name='train_error')) # weight decay on all W of fc layers wd_cost = regularize_cost('fc.*/W', l2_regularizer(4e-4), name='regularize_loss') add_moving_summary(cost, wd_cost) add_param_summary(('.*/W', ['histogram'])) # monitor W self.cost = tf.add_n([cost, wd_cost], name='cost')
def _build_graph(self, inputs): image, label = inputs image = image / 128.0 - 1 with argscope(Conv2D, nl=BNReLU, use_bias=False): logits = (LinearWrap(image).Conv2D( 'conv1', 24, 5, padding='VALID').MaxPooling('pool1', 2, padding='SAME').Conv2D( 'conv2', 32, 3, padding='VALID').Conv2D( 'conv3', 32, 3, padding='VALID').MaxPooling( 'pool2', 2, padding='SAME').Conv2D( 'conv4', 64, 3, padding='VALID').Dropout( 'drop', 0.5).FullyConnected( 'fc0', 512, b_init=tf.constant_initializer(0.1), nl=tf.nn.relu).FullyConnected( 'linear', out_dim=10, nl=tf.identity)()) prob = tf.nn.softmax(logits, name='output') # compute the number of failed samples, for ClassificationError to use at test time wrong = prediction_incorrect(logits, label) # monitor training error add_moving_summary(tf.reduce_mean(wrong, name='train_error')) cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') wd_cost = regularize_cost('fc.*/W', l2_regularizer(0.00001)) add_moving_summary(cost, wd_cost) add_param_summary(('.*/W', ['histogram', 'rms'])) # monitor W self.cost = tf.add_n([cost, wd_cost], name='cost')
def _build_graph(self, inputs): image, label = inputs is_training = get_current_tower_context().is_training fw, fa, fg = get_dorefa(BITW, BITA, BITG) # monkey-patch tf.get_variable to apply fw def binarize_weight(v): name = v.op.name # don't binarize first and last layer if not name.endswith('W') or 'conv0' in name or 'fc' in name: return v else: logger.info("Binarizing weight {}".format(v.op.name)) return fw(v) def cabs(x): return tf.minimum(1.0, tf.abs(x), name='cabs') def activate(x): return fa(cabs(x)) image = image / 256.0 with remap_variables(binarize_weight), \ argscope(BatchNorm, decay=0.9, epsilon=1e-4), \ argscope(Conv2D, use_bias=False, nl=tf.identity): logits = ( LinearWrap(image).Conv2D('conv0', 48, 5, padding='VALID', use_bias=True).MaxPooling( 'pool0', 2, padding='SAME').apply(activate) # 18 .Conv2D('conv1', 64, 3, padding='SAME').apply(fg).BatchNorm( 'bn1').apply(activate).Conv2D( 'conv2', 64, 3, padding='SAME').apply(fg).BatchNorm('bn2').MaxPooling( 'pool1', 2, padding='SAME').apply(activate) # 9 .Conv2D( 'conv3', 128, 3, padding='VALID').apply(fg).BatchNorm('bn3').apply(activate) # 7 .Conv2D('conv4', 128, 3, padding='SAME').apply(fg). BatchNorm('bn4').apply(activate).Conv2D( 'conv5', 128, 3, padding='VALID').apply(fg).BatchNorm('bn5').apply(activate) # 5 .tf.nn.dropout(0.5 if is_training else 1.0).Conv2D( 'conv6', 512, 5, padding='VALID').apply(fg).BatchNorm( 'bn6').apply(cabs).FullyConnected('fc1', 10, nl=tf.identity)()) tf.nn.softmax(logits, name='output') # compute the number of failed samples wrong = prediction_incorrect(logits, label) # monitor training error add_moving_summary(tf.reduce_mean(wrong, name='train_error')) cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') # weight decay on all W of fc layers wd_cost = regularize_cost('fc.*/W', l2_regularizer(1e-7)) add_param_summary(('.*/W', ['histogram', 'rms'])) self.cost = tf.add_n([cost, wd_cost], name='cost') add_moving_summary(cost, wd_cost, self.cost)
def _build_graph(self, inputs): image, label = inputs image = image / 255.0 # ? def proj_kk(l, k, ch_r, ch, stride=1): l = Conv2D('conv{0}{0}r'.format(k), l, ch_r, 1) return Conv2D('conv{0}{0}'.format(k), l, ch, k, stride=stride, padding='VALID' if stride > 1 else 'SAME') def proj_233(l, ch_r, ch, stride=1): l = Conv2D('conv233r', l, ch_r, 1) l = Conv2D('conv233a', l, ch, 3) return Conv2D('conv233b', l, ch, 3, stride=stride, padding='VALID' if stride > 1 else 'SAME') def pool_proj(l, ch, pool_type): if pool_type == 'max': l = MaxPooling('maxpool', l, 3, 1) else: l = AvgPooling('maxpool', l, 3, 1, padding='SAME') return Conv2D('poolproj', l, ch, 1) def proj_77(l, ch_r, ch): return (LinearWrap(l) .Conv2D('conv77r', ch_r, 1) .Conv2D('conv77a', ch_r, [1, 7]) .Conv2D('conv77b', ch, [7, 1])()) def proj_277(l, ch_r, ch): return (LinearWrap(l) .Conv2D('conv277r', ch_r, 1) .Conv2D('conv277aa', ch_r, [7, 1]) .Conv2D('conv277ab', ch_r, [1, 7]) .Conv2D('conv277ba', ch_r, [7, 1]) .Conv2D('conv277bb', ch, [1, 7])()) with argscope(Conv2D, nl=BNReLU, use_bias=False),\ argscope(BatchNorm, decay=0.9997, epsilon=1e-3): l = (LinearWrap(image) .Conv2D('conv0', 32, 3, stride=2, padding='VALID') # 299 .Conv2D('conv1', 32, 3, padding='VALID') # 149 .Conv2D('conv2', 64, 3, padding='SAME') # 147 .MaxPooling('pool2', 3, 2) .Conv2D('conv3', 80, 1, padding='SAME') # 73 .Conv2D('conv4', 192, 3, padding='VALID') # 71 .MaxPooling('pool4', 3, 2)()) # 35 with tf.variable_scope('incep-35-256a'): l = tf.concat([ Conv2D('conv11', l, 64, 1), proj_kk(l, 5, 48, 64), proj_233(l, 64, 96), pool_proj(l, 32, 'avg') ], 3, name='concat') with tf.variable_scope('incep-35-288a'): l = tf.concat([ Conv2D('conv11', l, 64, 1), proj_kk(l, 5, 48, 64), proj_233(l, 64, 96), pool_proj(l, 64, 'avg') ], 3, name='concat') with tf.variable_scope('incep-35-288b'): l = tf.concat([ Conv2D('conv11', l, 64, 1), proj_kk(l, 5, 48, 64), proj_233(l, 64, 96), pool_proj(l, 64, 'avg') ], 3, name='concat') # 35x35x288 with tf.variable_scope('incep-17-768a'): l = tf.concat([ Conv2D('conv3x3', l, 384, 3, stride=2, padding='VALID'), proj_233(l, 64, 96, stride=2), MaxPooling('maxpool', l, 3, 2) ], 3, name='concat') with tf.variable_scope('incep-17-768b'): l = tf.concat([ Conv2D('conv11', l, 192, 1), proj_77(l, 128, 192), proj_277(l, 128, 192), pool_proj(l, 192, 'avg') ], 3, name='concat') for x in ['c', 'd']: with tf.variable_scope('incep-17-768{}'.format(x)): l = tf.concat([ Conv2D('conv11', l, 192, 1), proj_77(l, 160, 192), proj_277(l, 160, 192), pool_proj(l, 192, 'avg') ], 3, name='concat') with tf.variable_scope('incep-17-768e'): l = tf.concat([ Conv2D('conv11', l, 192, 1), proj_77(l, 192, 192), proj_277(l, 192, 192), pool_proj(l, 192, 'avg') ], 3, name='concat') # 17x17x768 with tf.variable_scope('br1'): br1 = AvgPooling('avgpool', l, 5, 3, padding='VALID') br1 = Conv2D('conv11', br1, 128, 1) shape = br1.get_shape().as_list() br1 = Conv2D('convout', br1, 768, shape[1:3], padding='VALID') br1 = FullyConnected('fc', br1, 1000, nl=tf.identity) with tf.variable_scope('incep-17-1280a'): l = tf.concat([ proj_kk(l, 3, 192, 320, stride=2), Conv2D('conv73', proj_77(l, 192, 192), 192, 3, stride=2, padding='VALID'), MaxPooling('maxpool', l, 3, 2) ], 3, name='concat') for x in ['a', 'b']: with tf.variable_scope('incep-8-2048{}'.format(x)): br11 = Conv2D('conv11', l, 320, 1) br33 = Conv2D('conv133r', l, 384, 1) br33 = tf.concat([ Conv2D('conv133a', br33, 384, [1, 3]), Conv2D('conv133b', br33, 384, [3, 1]) ], 3, name='conv133') br233 = proj_kk(l, 3, 448, 384) br233 = tf.concat([ Conv2D('conv233a', br233, 384, [1, 3]), Conv2D('conv233b', br233, 384, [3, 1]), ], 3, name='conv233') l = tf.concat([ br11, br33, br233, pool_proj(l, 192, 'avg') ], 3, name='concat') l = GlobalAvgPooling('gap', l) # 1x1x2048 l = Dropout('drop', l, 0.8) logits = FullyConnected('linear', l, out_dim=1000, nl=tf.identity) loss1 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=br1, labels=label) loss1 = tf.reduce_mean(loss1, name='loss1') loss2 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) loss2 = tf.reduce_mean(loss2, name='loss2') wrong = prediction_incorrect(logits, label, 1, name='wrong-top1') add_moving_summary(tf.reduce_mean(wrong, name='train-error-top1')) wrong = prediction_incorrect(logits, label, 5, name='wrong-top5') add_moving_summary(tf.reduce_mean(wrong, name='train-error-top5')) # weight decay on all W of fc layers wd_w = tf.train.exponential_decay(0.00004, get_global_step_var(), 80000, 0.7, True) wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='l2_regularize_loss') self.cost = tf.add_n([0.4 * loss1, loss2, wd_cost], name='cost') add_moving_summary(loss1, loss2, wd_cost, self.cost)
def _build_graph(self, inputs): image, label = inputs fw, fa, fg = get_dorefa(BITW, BITA, BITG) old_get_variable = tf.get_variable def monitor(x, name): if MONITOR == 1: return tf.Print(x, [x], message='\n\n' + name + ': ', summarize=1000, name=name) else: return x def new_get_variable(v): name = v.op.name if not name.endswith('W') or 'conv1_1' in name or 'fc8' in name: return v else: logger.info("Quantizing weight {}".format(v.op.name)) if MONITOR == 1: return tf.Print(fw(v), [fw(v)], message='\n\n' + v.name + ', Quantized weights are:', summarize=100) else: return fw(v) def bn_activate(name, x): X = BatchNorm(name, x) x = monitor(x, name + '_noact_out') return activate(x) def activate(x): if BITA == 32: return tf.nn.relu(x) else: return fa(tf.nn.relu(x)) # VGG 16 with remap_variables(new_get_variable), \ argscope(Conv2D, kernel_shape=3, use_bias=False, nl = tf.identity): logits = ( LinearWrap(image).apply(monitor, 'image_out').Conv2D( 'conv1_1', 64).apply(fg).BatchNorm('bn1_1').apply(activate).apply( monitor, 'conv1_1_out').Conv2D('conv1_2', 64).apply( fg).BatchNorm('bn1_2').apply(activate).apply( monitor, 'conv1_2_out').MaxPooling('pool1', 2).apply( monitor, 'pool1_out') # 112 .Conv2D( 'conv2_1', 128).apply(fg).BatchNorm('bn2_1').apply(activate).apply( monitor, 'conv2_1_out').Conv2D('conv2_2', 128).apply( fg).BatchNorm('bn2_2').apply(activate).apply( monitor, 'conv2_2_out').MaxPooling( 'pool2', 2).apply(monitor, 'pool2_out') # 56 .Conv2D( 'conv3_1', 256).apply(fg).BatchNorm('bn3_1').apply(activate).apply( monitor, 'conv3_1_out').Conv2D( 'conv3_2', 256).apply(fg).BatchNorm('bn3_2'). apply(activate).apply(monitor, 'conv3_2_out').Conv2D( 'conv3_3', 256).apply(fg).BatchNorm('bn3_3').apply(activate).apply( monitor, 'conv3_3_out').MaxPooling('pool3', 2).apply( monitor, 'pool3_out') # 28 .Conv2D( 'conv4_1', 512).apply(fg).BatchNorm('bn4_1').apply(activate).apply( monitor, 'conv4_1_out').Conv2D( 'conv4_2', 512).apply(fg).BatchNorm('bn4_2'). apply(activate).apply(monitor, 'conv4_2_out').Conv2D( 'conv4_3', 512).apply(fg).BatchNorm('bn4_3').apply(activate).apply( monitor, 'conv4_3_out').MaxPooling('pool4', 2).apply( monitor, 'pool4_out') # 14 .Conv2D( 'conv5_1', 512).apply(fg).BatchNorm('bn5_1').apply(activate).apply( monitor, 'conv5_1_out').Conv2D( 'conv5_2', 512).apply(fg).BatchNorm('bn5_2'). apply(activate).apply(monitor, 'conv5_2_out').Conv2D( 'conv5_3', 512).apply(fg).BatchNorm('bn5_3').apply(activate).apply( monitor, 'conv5_3_out').MaxPooling('pool5', 2).apply( monitor, 'pool5_out').FullyConnected( 'fc6', use_bias=False, out_dim=512).apply(activate).apply( monitor, 'fc6_out').FullyConnected( 'fc7', use_bias=False, out_dim=512).apply(activate).apply( monitor, 'fc7_out').FullyConnected( 'fc8', use_bias=False, out_dim=self.cifar_classnum, nl=tf.identity).apply( monitor, 'fc8_out')()) prob = tf.nn.softmax(logits, name='prob') cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') wrong = symbf.prediction_incorrect(logits, label, name='incorrect') accuracy = symbf.accuracy(logits, label, name='accuracy') train_error = tf.reduce_mean(wrong, name='train_error') summary.add_moving_summary(train_error, accuracy) wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/W', tf.nn.l2_loss), name='regularize_loss') self.cost = tf.add_n([wd_cost, cost], name='total_cost') summary.add_moving_summary(cost, wd_cost, self.cost)
def _build_graph(self, inputs): image, label = inputs image = image / 255.0 # ? def proj_kk(l, k, ch_r, ch, stride=1): l = Conv2D('conv{0}{0}r'.format(k), l, ch_r, 1) return Conv2D('conv{0}{0}'.format(k), l, ch, k, stride=stride, padding='VALID' if stride > 1 else 'SAME') def proj_233(l, ch_r, ch, stride=1): l = Conv2D('conv233r', l, ch_r, 1) l = Conv2D('conv233a', l, ch, 3) return Conv2D('conv233b', l, ch, 3, stride=stride, padding='VALID' if stride > 1 else 'SAME') def pool_proj(l, ch, pool_type): if pool_type == 'max': l = MaxPooling('maxpool', l, 3, 1) else: l = AvgPooling('maxpool', l, 3, 1, padding='SAME') return Conv2D('poolproj', l, ch, 1) def proj_77(l, ch_r, ch): return (LinearWrap(l).Conv2D('conv77r', ch_r, 1).Conv2D( 'conv77a', ch_r, [1, 7]).Conv2D('conv77b', ch, [7, 1])()) def proj_277(l, ch_r, ch): return (LinearWrap(l).Conv2D('conv277r', ch_r, 1).Conv2D( 'conv277aa', ch_r, [7, 1]).Conv2D('conv277ab', ch_r, [1, 7]).Conv2D( 'conv277ba', ch_r, [7, 1]).Conv2D('conv277bb', ch, [1, 7])()) with argscope(Conv2D, nl=BNReLU, use_bias=False),\ argscope(BatchNorm, decay=0.9997, epsilon=1e-3): l = ( LinearWrap(image).Conv2D('conv0', 32, 3, stride=2, padding='VALID') # 299 .Conv2D('conv1', 32, 3, padding='VALID') # 149 .Conv2D('conv2', 64, 3, padding='SAME') # 147 .MaxPooling('pool2', 3, 2).Conv2D('conv3', 80, 1, padding='SAME') # 73 .Conv2D('conv4', 192, 3, padding='VALID') # 71 .MaxPooling('pool4', 3, 2)()) # 35 with tf.variable_scope('incep-35-256a'): l = tf.concat([ Conv2D('conv11', l, 64, 1), proj_kk(l, 5, 48, 64), proj_233(l, 64, 96), pool_proj(l, 32, 'avg') ], 3, name='concat') with tf.variable_scope('incep-35-288a'): l = tf.concat([ Conv2D('conv11', l, 64, 1), proj_kk(l, 5, 48, 64), proj_233(l, 64, 96), pool_proj(l, 64, 'avg') ], 3, name='concat') with tf.variable_scope('incep-35-288b'): l = tf.concat([ Conv2D('conv11', l, 64, 1), proj_kk(l, 5, 48, 64), proj_233(l, 64, 96), pool_proj(l, 64, 'avg') ], 3, name='concat') # 35x35x288 with tf.variable_scope('incep-17-768a'): l = tf.concat([ Conv2D('conv3x3', l, 384, 3, stride=2, padding='VALID'), proj_233(l, 64, 96, stride=2), MaxPooling('maxpool', l, 3, 2) ], 3, name='concat') with tf.variable_scope('incep-17-768b'): l = tf.concat([ Conv2D('conv11', l, 192, 1), proj_77(l, 128, 192), proj_277(l, 128, 192), pool_proj(l, 192, 'avg') ], 3, name='concat') for x in ['c', 'd']: with tf.variable_scope('incep-17-768{}'.format(x)): l = tf.concat([ Conv2D('conv11', l, 192, 1), proj_77(l, 160, 192), proj_277(l, 160, 192), pool_proj(l, 192, 'avg') ], 3, name='concat') with tf.variable_scope('incep-17-768e'): l = tf.concat([ Conv2D('conv11', l, 192, 1), proj_77(l, 192, 192), proj_277(l, 192, 192), pool_proj(l, 192, 'avg') ], 3, name='concat') # 17x17x768 with tf.variable_scope('br1'): br1 = AvgPooling('avgpool', l, 5, 3, padding='VALID') br1 = Conv2D('conv11', br1, 128, 1) shape = br1.get_shape().as_list() br1 = Conv2D('convout', br1, 768, shape[1:3], padding='VALID') br1 = FullyConnected('fc', br1, 1000, nl=tf.identity) with tf.variable_scope('incep-17-1280a'): l = tf.concat([ proj_kk(l, 3, 192, 320, stride=2), Conv2D('conv73', proj_77(l, 192, 192), 192, 3, stride=2, padding='VALID'), MaxPooling('maxpool', l, 3, 2) ], 3, name='concat') for x in ['a', 'b']: with tf.variable_scope('incep-8-2048{}'.format(x)): br11 = Conv2D('conv11', l, 320, 1) br33 = Conv2D('conv133r', l, 384, 1) br33 = tf.concat([ Conv2D('conv133a', br33, 384, [1, 3]), Conv2D('conv133b', br33, 384, [3, 1]) ], 3, name='conv133') br233 = proj_kk(l, 3, 448, 384) br233 = tf.concat([ Conv2D('conv233a', br233, 384, [1, 3]), Conv2D('conv233b', br233, 384, [3, 1]), ], 3, name='conv233') l = tf.concat( [br11, br33, br233, pool_proj(l, 192, 'avg')], 3, name='concat') l = GlobalAvgPooling('gap', l) # 1x1x2048 l = Dropout('drop', l, 0.8) logits = FullyConnected('linear', l, out_dim=1000, nl=tf.identity) loss1 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=br1, labels=label) loss1 = tf.reduce_mean(loss1, name='loss1') loss2 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) loss2 = tf.reduce_mean(loss2, name='loss2') wrong = prediction_incorrect(logits, label, 1, name='wrong-top1') add_moving_summary(tf.reduce_mean(wrong, name='train-error-top1')) wrong = prediction_incorrect(logits, label, 5, name='wrong-top5') add_moving_summary(tf.reduce_mean(wrong, name='train-error-top5')) # weight decay on all W of fc layers wd_w = tf.train.exponential_decay(0.00004, get_global_step_var(), 80000, 0.7, True) wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='l2_regularize_loss') self.cost = tf.add_n([0.4 * loss1, loss2, wd_cost], name='cost') add_moving_summary(loss1, loss2, wd_cost, self.cost)
def build_graph(self, image, label): image = image / 255.0 fw, fa, fg = get_dorefa(BITW, BITA, BITG) # monkey-patch tf.get_variable to apply fw def new_get_variable(v): name = v.op.name # don't binarize first and last layer if not name.endswith('W') or 'conv0' in name or 'fct' in name: return v else: logger.info("Binarizing weight {}".format(v.op.name)) return fw(v) def nonlin(x): if BITA == 32: return tf.nn.relu(x) # still use relu for 32bit cases return tf.clip_by_value(x, 0.0, 1.0) def activate(x): return fa(nonlin(x)) with remap_variables(new_get_variable), \ argscope(BatchNorm, momentum=0.9, epsilon=1e-4), \ argscope(Conv2D, use_bias=False): logits = (LinearWrap(image).Conv2D( 'conv0', 96, 12, strides=4, padding='VALID').apply(activate).Conv2D( 'conv1', 256, 5, padding='SAME', split=2).apply(fg).BatchNorm('bn1').MaxPooling( 'pool1', 3, 2, padding='SAME').apply(activate).Conv2D( 'conv2', 384, 3).apply(fg).BatchNorm('bn2').MaxPooling( 'pool2', 3, 2, padding='SAME').apply(activate).Conv2D( 'conv3', 384, 3, split=2).apply(fg). BatchNorm('bn3').apply(activate).Conv2D( 'conv4', 256, 3, split=2).apply(fg).BatchNorm('bn4').MaxPooling( 'pool4', 3, 2, padding='VALID').apply(activate).FullyConnected( 'fc0', 4096).apply(fg).BatchNorm('bnfc0'). apply(activate).FullyConnected( 'fc1', 4096, use_bias=False).apply(fg).BatchNorm('bnfc1').apply( nonlin).FullyConnected('fct', 1000, use_bias=True)()) tf.nn.softmax(logits, name='output') cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') wrong = prediction_incorrect(logits, label, 1, name='wrong-top1') add_moving_summary(tf.reduce_mean(wrong, name='train-error-top1')) wrong = prediction_incorrect(logits, label, 5, name='wrong-top5') add_moving_summary(tf.reduce_mean(wrong, name='train-error-top5')) # weight decay on all W of fc layers wd_cost = regularize_cost('fc.*/W', l2_regularizer(5e-6), name='regularize_cost') add_param_summary(('.*/W', ['histogram', 'rms'])) total_cost = tf.add_n([cost, wd_cost], name='cost') add_moving_summary(cost, wd_cost, total_cost) return total_cost
def _build_graph(self, inputs): inp, label = inputs is_training = get_current_tower_context().is_training fw, fa = get_dorefa(self.bitw, self.bita) def binarize_weight(v): name = v.op.name if not (name.endswith('W') or name.endswith('b')): logger.info("Not quantizing {}".format(name)) return v elif not self.quant_ends and 'conv0' in name: logger.info("Not quantizing {}".format(name)) return v elif not self.quant_ends and 'last_linear' in name: logger.info("Not quantizing {}".format(name)) return v elif not self.quant_ends and (self.net_fn == fcn1_net or self.net_fn == fcn2_net) and 'linear0' in name: logger.info("Not quantizing {}".format(name)) return v else: logger.info("Quantizing weight {}".format(name)) return fw(v) def nonlin(x, name="activate"): if self.bita == 32: return fa(tf.nn.relu(BNWithTrackedMults(x))) else: return fa(tf.clip_by_value(BNWithTrackedMults(x), 0.0, 1.0)) with remap_variables(binarize_weight), \ argscope([FullyConnectedWithTrackedMults], network_complexity=self.network_complexity), \ argscope([Conv2DWithTrackedMults], network_complexity=self.network_complexity), \ argscope([BNReLUWithTrackedMults], network_complexity=self.network_complexity), \ argscope([BNWithTrackedMults], network_complexity=self.network_complexity), \ argscope(BatchNorm, decay=0.9, epsilon=1e-4): l = self.net_fn(inp, nonlin, self.n_context) logits = FullyConnectedWithTrackedMults('last_linear', l, out_dim=self.n_spks, nl=tf.identity) prob = tf.nn.softmax(logits, name='output') # used for validation accuracy of utterance identity_guesses = flatten(tf.argmax(prob, axis=1)) uniq_identities, _, count = tf.unique_with_counts(identity_guesses) idx_to_identity_with_most_votes = tf.argmax(count) chosen_identity = tf.gather(uniq_identities, idx_to_identity_with_most_votes) wrong = tf.expand_dims(tf.not_equal(chosen_identity, tf.cast(label[0], tf.int64)), axis=0, name='utt-wrong') cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') add_moving_summary(cost) wrong = prediction_incorrect(logits, label, 1, name='wrong-top1') add_moving_summary(tf.reduce_mean(wrong, name='train-error-top1')) with tf.name_scope('original-weight-summaries'): add_param_summary(('.*/W', ['rms', 'histogram'])) add_param_summary(('.*/b', ['rms', 'histogram'])) with tf.name_scope('activation-summaries'): def fn(name): return (name.endswith('output') or name.endswith('output:0')) and "Inference" not in name and 'quantized' not in name tensors = get_tensors_from_graph(tf.get_default_graph(), fn) logger.info("Adding activation tensors to summary: {}".format(tensors)) for tensor in tensors: add_tensor_summary(tensor, ['rms', 'histogram']) wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(), 480000, 0.2, True) wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='wd_cost') add_moving_summary(wd_cost) self.cost = tf.add_n([cost, wd_cost], name='cost') tf.constant([self.network_complexity['mults']], name='TotalMults') tf.constant([self.network_complexity['weights']], name='TotalWeights') logger.info("Parameter count: {}".format(self.network_complexity))
def _build_graph(self, inputs): image, label = inputs image = image / 128.0 def inception(name, x, nr1x1, nr3x3r, nr3x3, nr233r, nr233, nrpool, pooltype): stride = 2 if nr1x1 == 0 else 1 with tf.variable_scope(name): outs = [] if nr1x1 != 0: outs.append(Conv2D('conv1x1', x, nr1x1, 1)) x2 = Conv2D('conv3x3r', x, nr3x3r, 1) outs.append(Conv2D('conv3x3', x2, nr3x3, 3, stride=stride)) x3 = Conv2D('conv233r', x, nr233r, 1) x3 = Conv2D('conv233a', x3, nr233, 3) outs.append(Conv2D('conv233b', x3, nr233, 3, stride=stride)) if pooltype == 'max': x4 = MaxPooling('mpool', x, 3, stride, padding='SAME') else: assert pooltype == 'avg' x4 = AvgPooling('apool', x, 3, stride, padding='SAME') if nrpool != 0: # pool + passthrough if nrpool == 0 x4 = Conv2D('poolproj', x4, nrpool, 1) outs.append(x4) return tf.concat(outs, 3, name='concat') with argscope(Conv2D, nl=BNReLU, use_bias=False): l = (LinearWrap(image) .Conv2D('conv0', 64, 7, stride=2) .MaxPooling('pool0', 3, 2, padding='SAME') .Conv2D('conv1', 64, 1) .Conv2D('conv2', 192, 3) .MaxPooling('pool2', 3, 2, padding='SAME')()) # 28 l = inception('incep3a', l, 64, 64, 64, 64, 96, 32, 'avg') l = inception('incep3b', l, 64, 64, 96, 64, 96, 64, 'avg') l = inception('incep3c', l, 0, 128, 160, 64, 96, 0, 'max') br1 = (LinearWrap(l) .Conv2D('loss1conv', 128, 1) .FullyConnected('loss1fc', 1024, nl=tf.nn.relu) .FullyConnected('loss1logit', 1000, nl=tf.identity)()) loss1 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=br1, labels=label) loss1 = tf.reduce_mean(loss1, name='loss1') # 14 l = inception('incep4a', l, 224, 64, 96, 96, 128, 128, 'avg') l = inception('incep4b', l, 192, 96, 128, 96, 128, 128, 'avg') l = inception('incep4c', l, 160, 128, 160, 128, 160, 128, 'avg') l = inception('incep4d', l, 96, 128, 192, 160, 192, 128, 'avg') l = inception('incep4e', l, 0, 128, 192, 192, 256, 0, 'max') br2 = Conv2D('loss2conv', l, 128, 1) br2 = FullyConnected('loss2fc', br2, 1024, nl=tf.nn.relu) br2 = FullyConnected('loss2logit', br2, 1000, nl=tf.identity) loss2 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=br2, labels=label) loss2 = tf.reduce_mean(loss2, name='loss2') # 7 l = inception('incep5a', l, 352, 192, 320, 160, 224, 128, 'avg') l = inception('incep5b', l, 352, 192, 320, 192, 224, 128, 'max') l = GlobalAvgPooling('gap', l) logits = FullyConnected('linear', l, out_dim=1000, nl=tf.identity) tf.nn.softmax(logits, name='output') loss3 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) loss3 = tf.reduce_mean(loss3, name='loss3') cost = tf.add_n([loss3, 0.3 * loss2, 0.3 * loss1], name='weighted_cost') add_moving_summary([cost, loss1, loss2, loss3]) wrong = prediction_incorrect(logits, label, 1, name='wrong-top1') add_moving_summary(tf.reduce_mean(wrong, name='train_error_top1')) wrong = prediction_incorrect(logits, label, 5, name='wrong-top5') add_moving_summary(tf.reduce_mean(wrong, name='train_error_top5')) # weight decay on all W of fc layers wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(), 80000, 0.7, True) wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='l2_regularize_loss') self.cost = tf.add_n([cost, wd_cost], name='cost') add_moving_summary(wd_cost, self.cost)
def _build_graph(self, inputs): image, label = inputs is_training = get_current_tower_context().is_training fw, fa, fg = get_dorefa(BITW, BITA, BITG) # monkey-patch tf.get_variable to apply fw def binarize_weight(v): name = v.op.name # don't binarize first and last layer if not name.endswith('W') or 'conv0' in name or 'fc' in name: return v else: logger.info("Binarizing weight {}".format(v.op.name)) return fw(v) def cabs(x): return tf.minimum(1.0, tf.abs(x), name='cabs') def activate(x): return fa(cabs(x)) image = image / 256.0 with remap_variables(binarize_weight), \ argscope(BatchNorm, decay=0.9, epsilon=1e-4), \ argscope(Conv2D, use_bias=False, nl=tf.identity): logits = (LinearWrap(image) .Conv2D('conv0', 48, 5, padding='VALID', use_bias=True) .MaxPooling('pool0', 2, padding='SAME') .apply(activate) # 18 .Conv2D('conv1', 64, 3, padding='SAME') .apply(fg) .BatchNorm('bn1').apply(activate) .Conv2D('conv2', 64, 3, padding='SAME') .apply(fg) .BatchNorm('bn2') .MaxPooling('pool1', 2, padding='SAME') .apply(activate) # 9 .Conv2D('conv3', 128, 3, padding='VALID') .apply(fg) .BatchNorm('bn3').apply(activate) # 7 .Conv2D('conv4', 128, 3, padding='SAME') .apply(fg) .BatchNorm('bn4').apply(activate) .Conv2D('conv5', 128, 3, padding='VALID') .apply(fg) .BatchNorm('bn5').apply(activate) # 5 .tf.nn.dropout(0.5 if is_training else 1.0) .Conv2D('conv6', 512, 5, padding='VALID') .apply(fg).BatchNorm('bn6') .apply(cabs) .FullyConnected('fc1', 10, nl=tf.identity)()) tf.nn.softmax(logits, name='output') # compute the number of failed samples wrong = prediction_incorrect(logits, label) # monitor training error add_moving_summary(tf.reduce_mean(wrong, name='train_error')) cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') # weight decay on all W of fc layers wd_cost = regularize_cost('fc.*/W', l2_regularizer(1e-7)) add_param_summary(('.*/W', ['histogram', 'rms'])) self.cost = tf.add_n([cost, wd_cost], name='cost') add_moving_summary(cost, wd_cost, self.cost)
def _build_graph(self, inputs): image, label = inputs image = image / 128.0 def inception(name, x, nr1x1, nr3x3r, nr3x3, nr233r, nr233, nrpool, pooltype): stride = 2 if nr1x1 == 0 else 1 with tf.variable_scope(name): outs = [] if nr1x1 != 0: outs.append(Conv2D('conv1x1', x, nr1x1, 1)) x2 = Conv2D('conv3x3r', x, nr3x3r, 1) outs.append(Conv2D('conv3x3', x2, nr3x3, 3, stride=stride)) x3 = Conv2D('conv233r', x, nr233r, 1) x3 = Conv2D('conv233a', x3, nr233, 3) outs.append(Conv2D('conv233b', x3, nr233, 3, stride=stride)) if pooltype == 'max': x4 = MaxPooling('mpool', x, 3, stride, padding='SAME') else: assert pooltype == 'avg' x4 = AvgPooling('apool', x, 3, stride, padding='SAME') if nrpool != 0: # pool + passthrough if nrpool == 0 x4 = Conv2D('poolproj', x4, nrpool, 1) outs.append(x4) return tf.concat(outs, 3, name='concat') with argscope(Conv2D, nl=BNReLU, use_bias=False): l = (LinearWrap(image) .Conv2D('conv0', 64, 7, stride=2) .MaxPooling('pool0', 3, 2, padding='SAME') .Conv2D('conv1', 64, 1) .Conv2D('conv2', 192, 3) .MaxPooling('pool2', 3, 2, padding='SAME')()) # 28 l = inception('incep3a', l, 64, 64, 64, 64, 96, 32, 'avg') l = inception('incep3b', l, 64, 64, 96, 64, 96, 64, 'avg') l = inception('incep3c', l, 0, 128, 160, 64, 96, 0, 'max') br1 = (LinearWrap(l) .Conv2D('loss1conv', 128, 1) .FullyConnected('loss1fc', 1024, nl=tf.nn.relu) .FullyConnected('loss1logit', 1000, nl=tf.identity)()) loss1 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=br1, labels=label) loss1 = tf.reduce_mean(loss1, name='loss1') # 14 l = inception('incep4a', l, 224, 64, 96, 96, 128, 128, 'avg') l = inception('incep4b', l, 192, 96, 128, 96, 128, 128, 'avg') l = inception('incep4c', l, 160, 128, 160, 128, 160, 128, 'avg') l = inception('incep4d', l, 96, 128, 192, 160, 192, 128, 'avg') l = inception('incep4e', l, 0, 128, 192, 192, 256, 0, 'max') br2 = Conv2D('loss2conv', l, 128, 1) br2 = FullyConnected('loss2fc', br2, 1024, nl=tf.nn.relu) br2 = FullyConnected('loss2logit', br2, 1000, nl=tf.identity) loss2 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=br2, labels=label) loss2 = tf.reduce_mean(loss2, name='loss2') # 7 l = inception('incep5a', l, 352, 192, 320, 160, 224, 128, 'avg') l = inception('incep5b', l, 352, 192, 320, 192, 224, 128, 'max') l = GlobalAvgPooling('gap', l) logits = FullyConnected('linear', l, out_dim=1000, nl=tf.identity) tf.nn.softmax(logits, name='output') loss3 = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) loss3 = tf.reduce_mean(loss3, name='loss3') cost = tf.add_n([loss3, 0.3 * loss2, 0.3 * loss1], name='weighted_cost') add_moving_summary([cost, loss1, loss2, loss3]) wrong = prediction_incorrect(logits, label, 1, name='wrong-top1') add_moving_summary(tf.reduce_mean(wrong, name='train_error_top1')) wrong = prediction_incorrect(logits, label, 5, name='wrong-top5') add_moving_summary(tf.reduce_mean(wrong, name='train_error_top5')) # weight decay on all W of fc layers wd_w = tf.train.exponential_decay(0.0002, get_global_step_var(), 80000, 0.7, True) wd_cost = tf.multiply(wd_w, regularize_cost('.*/W', tf.nn.l2_loss), name='l2_regularize_loss') self.cost = tf.add_n([cost, wd_cost], name='cost') add_moving_summary(wd_cost, self.cost)
def _build_graph(self, inputs): image, label = inputs """Add a single channel here""" image = tf.expand_dims(image, 3) image = image * 256 image = tf.round(image) fw, fa, fg = get_dorefa(BITW, BITA, BITG) old_get_variable = tf.get_variable def monitor(x, name): if MONITOR == 1: return tf.Print(x, [x], message='\n\n' + name + ': ', summarize=1000, name=name) else: return x def new_get_variable(v): name = v.op.name if not name.endswith('W') or 'conv0' in name or 'fc1' in name: return v else: logger.info("Quantizing weight {}".format(v.op.name)) if MONITOR == 1: return tf.Print(fw(v), [fw(v)], message='\n\n' + v.name + ', Quantized weights are:', summarize=100) else: return fw(v) def activate(x): if BITA == 32: return tf.nn.relu(x) else: return fa(tf.nn.relu(x)) with remap_variables(new_get_variable), \ argscope(Conv2D, kernel_shape=3, use_bias=False, nl=tf.identity, out_channel=32): logits = (LinearWrap(image).apply(monitor, 'image_out').Conv2D( 'conv0').apply(fg).BatchNorm('bn0').apply(activate).apply( monitor, 'conv0_out').MaxPooling('pool0', 2).apply( monitor, 'pool0_out').Conv2D('conv1').apply( fg).BatchNorm('bn1').apply(activate).apply( monitor, 'conv1_out').Conv2D('conv2').apply( fg).BatchNorm('bn2').apply(activate).apply( monitor, 'conv2_out').MaxPooling( 'pool1', 2).apply( monitor, 'pool1_out').Conv2D('conv3'). apply(fg).BatchNorm('bn3').apply(activate).apply( monitor, 'conv3_out').FullyConnected( 'fc0', use_bias=False, out_dim=20, nl=tf.identity).apply(activate).apply( monitor, 'fc0_out').FullyConnected( 'fc1', use_bias=False, out_dim=10, nl=tf.identity).apply( monitor, 'fc1_out')()) prob = tf.nn.softmax(logits, name='prob') cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') wrong = symbf.prediction_incorrect(logits, label, name='incorrect') accuracy = symbf.accuracy(logits, label, name='accuracy') train_error = tf.reduce_mean(wrong, name='train_error') summary.add_moving_summary(train_error, accuracy) wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/W', tf.nn.l2_loss), name='regularize_loss') self.cost = tf.add_n([wd_cost, cost], name='total_cost') summary.add_moving_summary(cost, wd_cost, self.cost)
def _build_graph(self, inputs): image, label = inputs image = image / 255.0 fw, fa, fg = get_dorefa(BITW, BITA, BITG) # monkey-patch tf.get_variable to apply fw def new_get_variable(v): name = v.op.name # don't binarize first and last layer if not name.endswith('W') or 'conv0' in name or 'fct' in name: return v else: logger.info("Binarizing weight {}".format(v.op.name)) return fw(v) def nonlin(x): if BITA == 32: return tf.nn.relu(x) # still use relu for 32bit cases return tf.clip_by_value(x, 0.0, 1.0) def activate(x): return fa(nonlin(x)) with remap_variables(new_get_variable), \ argscope(BatchNorm, decay=0.9, epsilon=1e-4), \ argscope([Conv2D, FullyConnected], use_bias=False, nl=tf.identity): logits = (LinearWrap(image) .Conv2D('conv0', 96, 12, stride=4, padding='VALID') .apply(activate) .Conv2D('conv1', 256, 5, padding='SAME', split=2) .apply(fg) .BatchNorm('bn1') .MaxPooling('pool1', 3, 2, padding='SAME') .apply(activate) .Conv2D('conv2', 384, 3) .apply(fg) .BatchNorm('bn2') .MaxPooling('pool2', 3, 2, padding='SAME') .apply(activate) .Conv2D('conv3', 384, 3, split=2) .apply(fg) .BatchNorm('bn3') .apply(activate) .Conv2D('conv4', 256, 3, split=2) .apply(fg) .BatchNorm('bn4') .MaxPooling('pool4', 3, 2, padding='VALID') .apply(activate) .FullyConnected('fc0', 4096) .apply(fg) .BatchNorm('bnfc0') .apply(activate) .FullyConnected('fc1', 4096) .apply(fg) .BatchNorm('bnfc1') .apply(nonlin) .FullyConnected('fct', 1000, use_bias=True)()) tf.nn.softmax(logits, name='output') cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') wrong = prediction_incorrect(logits, label, 1, name='wrong-top1') add_moving_summary(tf.reduce_mean(wrong, name='train-error-top1')) wrong = prediction_incorrect(logits, label, 5, name='wrong-top5') add_moving_summary(tf.reduce_mean(wrong, name='train-error-top5')) # weight decay on all W of fc layers wd_cost = regularize_cost('fc.*/W', l2_regularizer(5e-6), name='regularize_cost') add_param_summary(('.*/W', ['histogram', 'rms'])) self.cost = tf.add_n([cost, wd_cost], name='cost') add_moving_summary(cost, wd_cost, self.cost)
def _build_graph(self, inputs): xys = np.array([(y, x, 1) for y in range(WARP_TARGET_SIZE) for x in range(WARP_TARGET_SIZE)], dtype='float32') xys = tf.constant(xys, dtype=tf.float32, name='xys') # p x 3 image, label = inputs image = image / 255.0 - 0.5 # bhw2 def get_stn(image): stn = (LinearWrap(image).AvgPooling('downsample', 2).Conv2D( 'conv0', 20, 5, padding='VALID').MaxPooling('pool0', 2).Conv2D( 'conv1', 20, 5, padding='VALID').FullyConnected( 'fc1', out_dim=32).FullyConnected( 'fct', out_dim=6, nl=tf.identity, W_init=tf.constant_initializer(), b_init=tf.constant_initializer( [1, 0, HALF_DIFF, 0, 1, HALF_DIFF]))()) # output 6 parameters for affine transformation stn = tf.reshape(stn, [-1, 2, 3], name='affine') # bx2x3 stn = tf.reshape(tf.transpose(stn, [2, 0, 1]), [3, -1]) # 3 x (bx2) coor = tf.reshape(tf.matmul(xys, stn), [WARP_TARGET_SIZE, WARP_TARGET_SIZE, -1, 2]) coor = tf.transpose(coor, [2, 0, 1, 3], 'sampled_coords') # b h w 2 sampled = ImageSample('warp', [image, coor], borderMode='constant') return sampled with argscope([Conv2D, FullyConnected], nl=tf.nn.relu): with tf.variable_scope('STN1'): sampled1 = get_stn(image) with tf.variable_scope('STN2'): sampled2 = get_stn(image) # For visualization in tensorboard with tf.name_scope('visualization'): padded1 = tf.pad(sampled1, [[0, 0], [HALF_DIFF, HALF_DIFF], [HALF_DIFF, HALF_DIFF], [0, 0]]) padded2 = tf.pad(sampled2, [[0, 0], [HALF_DIFF, HALF_DIFF], [HALF_DIFF, HALF_DIFF], [0, 0]]) img_orig = tf.concat([image[:, :, :, 0], image[:, :, :, 1]], 1) # b x 2h x w transform1 = tf.concat([padded1[:, :, :, 0], padded1[:, :, :, 1]], 1) transform2 = tf.concat([padded2[:, :, :, 0], padded2[:, :, :, 1]], 1) stacked = tf.concat([img_orig, transform1, transform2], 2, 'viz') tf.summary.image('visualize', tf.expand_dims(stacked, -1), max_outputs=30) sampled = tf.concat([sampled1, sampled2], 3, 'sampled_concat') logits = (LinearWrap(sampled).FullyConnected( 'fc1', out_dim=256, nl=tf.nn.relu).FullyConnected( 'fc2', out_dim=128, nl=tf.nn.relu).FullyConnected('fct', out_dim=19, nl=tf.identity)()) prob = tf.nn.softmax(logits, name='prob') cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') wrong = symbf.prediction_incorrect(logits, label) summary.add_moving_summary(tf.reduce_mean(wrong, name='train_error')) wd_cost = tf.multiply(1e-5, regularize_cost('fc.*/W', tf.nn.l2_loss), name='regularize_loss') summary.add_moving_summary(cost, wd_cost) self.cost = tf.add_n([wd_cost, cost], name='cost')