def _classifier(self, net, is_train): axes = [1] # [2, 3] if self['format'] == 'nchw' else [1, 2] net = log_1d(tf.reduce_mean(net, axes)) for unit in self['classifier.units']: net = log_1d(tf.layers.dense(net, unit, activation=tf.nn.relu)) net = log_1d(tf.layers.dense(net, 1, activation=None)) return net
def __call__(self, x, is_train): x = log_1d( tf.layers.dropout(x, self['dropout'], training=is_train, name='dropout')) x = log_1d( dense(x, self.vocab_length, activation=tf.nn.relu if self['use_activation'] else None, name='dense')) return x
def __call__(self, x, is_train): with tf.name_scope('resnet'): ResNet = resnet.Model(resnet_size=self['resnet_size'], bottleneck=self['bottleneck'], num_filters=self['num_filters'], kernel_size=self['kernel_size'], conv_stride=self['conv_stride'], first_pool_size=self[ 'first_pool_size'], first_pool_stride=self['first_pool_stride'], block_sizes=self['block_sizes'], block_strides=self['block_strides'], data_format=self._parse_format()) return log_1d(ResNet(x, is_train))
def _rec_block(self, net, index, is_train, scope): with tf.variable_scope(scope): net = log_1d( tf.layers.dropout(net, self['dropout'], training=is_train, name='dropout')) with tf.name_scope('fw'): cell_fw, sum_fw = self._cell() with tf.name_scope('bw'): cell_bw, sum_bw = self._cell() output, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, net, dtype=tf.float32) net = log_1d(tf.concat(output, 2)) sum_fw() sum_bw() return net
def build_graph(self): # cpu does not support nchw, so nhwc forcing if (self._cpu): self._config['format'] = DEFAULTS['format'] ################### # PLACEHOLDER ################### with tf.name_scope('placeholder'): x = log_1d( tf.placeholder(tf.float32, [ None, self.image_height, None if self['dynamic_width'] else self.image_width, self.channels ], name="x")) if self["scale"]: x = tf.truediv(x, tf.constant(255.0), name='scale') y = tf.sparse_placeholder(tf.int32, shape=[None, None], name="y") class_y = tf.placeholder(tf.float32, shape=[None, 1], name="class_y") l = tf.placeholder(tf.int32, shape=[None], name="l") is_train = tf.placeholder_with_default(False, (), name='is_train') if self['format'] == 'nchw': net = log_1d(tf.transpose(x, [0, 3, 1, 2], name='nhwc2nchw')) else: net = x ################ # PHASE I: Encoding ############### with tf.name_scope('encoder'): net = self._encoder(net, is_train) if self['format'] == 'nchw': net = log_1d(tf.transpose(net, [0, 2, 3, 1], name='nchw2nhwc')) else: net = net net = log_1d(tf.transpose(net, [0, 2, 1, 3])) ################ # PHASE II: Recurrent Block ############### with tf.name_scope('recurrent'): if self['dynamic_width']: # maybe theres a better way to do columnwise stacking net = log_1d( tf.reshape( net, [-1, tf.shape(net)[1], net.shape[2] * net.shape[3]])) else: net = log_1d( tf.reshape( net, [-1, net.shape[1], net.shape[2] * net.shape[3]])) encoder_net = net net = self._recurrent(net, is_train) ################ # PHASE III: Fully Connected ############### with tf.name_scope('fc'): fc = FullyConnected.FullyConnected(self['fc'], self.vocab_length) net = fc(net, is_train) ################## # PHASE IV: CTC ################# logits = log_1d(tf.transpose(net, [1, 0, 2])) with tf.name_scope('loss'): total_loss = tf.nn.ctc_loss(y, logits, l) tf.summary.scalar('loss', tf.reduce_mean(total_loss)) with tf.name_scope('train'): train_step = self._train_step(total_loss, self.learning_rate) with tf.name_scope('logits'): logits = tf.nn.softmax(logits) ################# # PHASE V: Classifier ################ with tf.name_scope('classifier'): class_logits = self._classifier(encoder_net, is_train) class_pred = tf.nn.sigmoid(class_logits) class_loss = tf.nn.sigmoid_cross_entropy_with_logits( logits=class_logits, labels=class_y) # tf.summary.scalar('class_loss', tf.reduce_mean(class_loss)) class_train = self._train_step(class_loss, self.class_learning_rate) """ new_dict = { "classifier": { "x": x, "y": class_y, "train": class_train, "loss": class_loss, "logits": class_logits }, "recognizer": { "x": x, "y": y, "l": l, "train": train_step, "loss": train_loss, "logits": logits } "is_train": is_train } """ return dict(x=x, y=y, class_y=class_y, class_pred=class_pred, class_loss=class_loss, class_train=class_train, l=l, is_train=is_train, logits=logits, total_loss=total_loss, train_step=train_step, viz=self.viz)