예제 #1
0
    def add_tensorboard(self, session, tensorboard_dir, timeline_enabled=False):
        """
        Add the tensorboard operations to the acoustic RNN
        This method will add ops to feed tensorboard
          self.train_summaries_op : will produce the summary for a training step
          self.test_summaries_op : will produce the summary for a test step
          self.summary_writer_op : will write the summary to disk

        Parameters
        ----------
        :param session: the tensorflow session
        :param tensorboard_dir: path to tensorboard directory
        :param tb_run_name: directory name for the tensorboard files inside tensorboard_dir, if None a default dir
                            will be created
        :param timeline_enabled: enable the output of a trace file for timeline visualization
        """
        self.tensorboard_dir = tensorboard_dir
        self.timeline_enabled = timeline_enabled

        # Define GraphKeys for TensorBoard
        graphkey_training = tf.GraphKeys()
        graphkey_test = tf.GraphKeys()

        # Learning rate
        tf.summary.scalar('Learning_rate', self.learning_rate_var, collections=[graphkey_training, graphkey_test])

        # Loss
        with tf.name_scope('Mean_loss'):
            mean_loss = tf.divide(self.accumulated_mean_loss, self.mini_batch)
            tf.summary.scalar('Training', mean_loss, collections=[graphkey_training])
            tf.summary.scalar('Test', mean_loss, collections=[graphkey_test])

        # Accuracy
        with tf.name_scope('Accuracy_-_Error_Rate'):
            mean_error_rate = tf.divide(self.accumulated_error_rate, self.mini_batch)
            tf.summary.scalar('Training', mean_error_rate, collections=[graphkey_training])
            tf.summary.scalar('Test', mean_error_rate, collections=[graphkey_test])

        # Hidden state
        with tf.name_scope('RNN_internal_state'):
            for idx, state_variable in enumerate(self.rnn_tuple_state):
                tf.summary.histogram('Training_layer-{0}_cell_state'.format(idx), state_variable[0],
                                     collections=[graphkey_training])
                tf.summary.histogram('Test_layer-{0}_cell_state'.format(idx), state_variable[0],
                                     collections=[graphkey_test])
                tf.summary.histogram('Training_layer-{0}_hidden_state'.format(idx), state_variable[1],
                                     collections=[graphkey_training])
                tf.summary.histogram('Test_layer-{0}_hidden_state'.format(idx), state_variable[1],
                                     collections=[graphkey_test])

        self.train_summaries_op = tf.summary.merge_all(key=graphkey_training)
        self.test_summaries_op = tf.summary.merge_all(key=graphkey_test)
        if not self.is_ditributed:
            self.summary_writer_op = tf.summary.FileWriter(tensorboard_dir, graph=session.graph)
예제 #2
0
파일: sessinit.py 프로젝트: stasysp/SYQ
    def _init(self, sess):
        variables = tf.get_collection(tf.GraphKeys().VARIABLES)  # TODO

        variable_names = set(
            [get_savename_from_varname(k.name) for k in variables])
        param_names = set(six.iterkeys(self.prms))

        intersect = variable_names & param_names

        logger.info("Params to restore: {}".format(', '.join(
            map(str, intersect))))
        for k in variable_names - param_names:
            if not is_training_name(k):
                logger.warn(
                    "Variable {} in the graph not found in the dict!".format(
                        k))
        for k in param_names - variable_names:
            logger.warn(
                "Variable {} in the dict not found in the graph!".format(k))


        upd = SessionUpdate(sess,
                [v for v in variables if \
                    get_savename_from_varname(v.name) in intersect])
        logger.info("Restoring from dict ...")
        upd.update({
            name: value
            for name, value in six.iteritems(self.prms) if name in intersect
        })
예제 #3
0
 def __init__(self, filename):
     tf.train.import_meta_graph(filename)
     all_coll = tf.get_default_graph().get_all_collection_keys()
     for k in [INPUT_VARS_KEY, tf.GraphKeys.TRAINABLE_VARIABLES,
             tf.GraphKeys().VARIABLES]:
         assert k in all_coll, \
                 "Collection {} not found in metagraph!".format(k)
예제 #4
0
    def main(self, args=None):
        self.args = self.parsearg(args)
        self.loadModelParams()

        x = tf.placeholder(tf.float32, [None, 4096 * 2])
        y = tf.placeholder(tf.int32, [None])

        w = tf.Variable(tf.truncated_normal([4096 * 2, 2],
                                            stddev=np.sqrt(0.5)))
        b = tf.Variable(tf.zeros([2]))

        pred = tf.nn.softmax(tf.matmul(x, w) + b)
        cost = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                           logits=pred))
        optimizer = tf.train.AdamOptimizer(
            self.args.learning_rate).minimize(cost)

        #Saving the variables
        save_list = [var for var in tf.global_variables()]
        self.saver = tf.train.Saver(save_list)

        self.sess = tf.Session()
        #Tensorboard
        graphkey_training = tf.GraphKeys()
        with tf.name_scope("Loss"):
            tf.summary.scalar('Training', cost)
        train_sum_op = tf.summary.merge_all()
        run_name = datetime.now().strftime('%Y-%m-%d--%H-%M-%S')
        self.writer = tf.summary.FileWriter(self.tensorboard_dir + run_name +
                                            '/',
                                            graph=self.sess.graph)

        self.initializer = tf.initialize_all_variables()

        self.sess.run(self.initializer)

        #training
        model = self.Model_dir + self.Model_name + '-' + '.ckpt'
        self.saver.restore(sess, model)
        print('model restored')
        return
        for epoch in range(self.args.num_epoch):
            data_gen = self.data_iter()
            for features, labels in tqdm(data_gen, desc='Training'):
                _, c, summary = self.sess.run([optimizer, cost, train_sum_op],
                                              feed_dict={
                                                  x: features,
                                                  y: labels
                                              })
                self.writer.add_summary(summary, self.global_step)

                self.global_step += 1

                if self.global_step % self.args.save_every == 0:
                    model = self.Model_dir + self.Model_name + '-' + '.ckpt'
                    self.saver.save(self.sess, model)
                    tqdm.write("----- Step %d -- Loss %.2f " %
                               (self.global_step, c))
예제 #5
0
 def __init__(self, lr, n_actions, name, fcl_dims=256, input_dims=(210, 160, 4), chkpt_dir='tmp/dqn'):
     self.lr = lr
     self.name = name
     self.n_actions = n_actions
     self.fcl_dims = fcl_dims
     self.input_dims = input_dims
     self.sess = tf.Session()
     self.build_network()
     self.sess.run(tf.global_variables_initializer())
     self.saver = tf.train.Saver()
     self.checkpoint_file = os.path.join(chkpt_dir, 'deepqnet.ckpt')
     self.params = tf.get_collection(tf.GraphKeys().TRAINABLE_VARIABLES, scope=self.name)
예제 #6
0
    def __init__(self, session, num_labels, num_layers, hidden_size, dropout,
                 batch_size, learning_rate, lr_decay_factor, grad_clip,
                 max_input_seq_length, max_target_seq_length, input_dim,
                 forward_only=False, tensorboard_dir=None, tb_run_name=None):
        """
        Acoustic rnn model, using ctc loss with lstm cells
        Inputs:
        session - tensorflow session
        num_labels - dimension of character input/one hot encoding
        num_layers - number of lstm layers
        hidden_size - size of hidden layers
        dropout - probability of dropping hidden weights
        batch_size - number of training examples fed at once
        learning_rate - learning rate parameter fed to optimizer
        lr_decay_factor - decay factor of the learning rate
        grad_clip - max gradient size (prevent exploding gradients)
        max_input_seq_length - maximum length of input vector sequence
        max_target_seq_length - maximum length of ouput vector sequence
        input_dim - dimension of input vector
        forward_only - whether to build back prop nodes or not
        tensorboard_dir - path to tensorboard file (None if not activated)
        """
        # Define GraphKeys for TensorBoard
        graphkey_training = tf.GraphKeys()
        graphkey_test = tf.GraphKeys()

        self.dropout = dropout
        self.batch_size = batch_size
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False, name='learning_rate')
        tf.scalar_summary('Learning rate', self.learning_rate, collections=[graphkey_training, graphkey_test])
        self.learning_rate_decay_op = self.learning_rate.assign(self.learning_rate * lr_decay_factor)
        self.global_step = tf.Variable(0, trainable=False, name='global_step')
        self.dropout_keep_prob_lstm_input = tf.constant(self.dropout)
        self.dropout_keep_prob_lstm_output = tf.constant(self.dropout)
        self.max_input_seq_length = max_input_seq_length
        self.max_target_seq_length = max_target_seq_length
        self.tensorboard_dir = tensorboard_dir

        # Initialize data pipes and audio_processor to None
        self.train_conn = None
        self.test_conn = None
        self.audio_processor = None

        # graph inputs
        self.inputs = tf.placeholder(tf.float32,
                                     shape=[self.max_input_seq_length, None, input_dim],
                                     name="inputs")
        # We could take an int16 for less memory consumption but CTC need an int32
        self.input_seq_lengths = tf.placeholder(tf.int32,
                                                shape=[None],
                                                name="input_seq_lengths")
        # Take an int16 for less memory consumption
        # max_target_seq_length should be less than 65535 (which is huge)
        self.target_seq_lengths = tf.placeholder(tf.int16,
                                                 shape=[None],
                                                 name="target_seq_lengths")

        # Define cells of acoustic model
        cell = rnn_cell.BasicLSTMCell(hidden_size, state_is_tuple=True)
        if not forward_only:
            # If we are in training then add a dropoutWrapper to the cells
            cell = rnn_cell.DropoutWrapper(cell, input_keep_prob=self.dropout_keep_prob_lstm_input,
                                           output_keep_prob=self.dropout_keep_prob_lstm_output)

        if num_layers > 1:
            cell = rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)

        # build input layer
        with tf.name_scope('Input_Layer'):
            w_i = tf.Variable(tf.truncated_normal([input_dim, hidden_size], stddev=np.sqrt(2.0 / (2 * hidden_size))),
                              name="input_w")
            b_i = tf.Variable(tf.zeros([hidden_size]), name="input_b")

        # make rnn inputs
        inputs = [tf.matmul(tf.squeeze(i, squeeze_dims=[0]), w_i) + b_i
                  for i in tf.split(0, self.max_input_seq_length, self.inputs)]

        # set rnn init state to 0s
        init_state = cell.zero_state(self.batch_size, tf.float32)

        # build rnn
        with tf.name_scope('Dynamic_rnn'):
            rnn_output, self.hidden_state = rnn.dynamic_rnn(cell, tf.pack(inputs),
                                                            sequence_length=self.input_seq_lengths,
                                                            initial_state=init_state,
                                                            time_major=True, parallel_iterations=1000)

        # build output layer
        with tf.name_scope('Output_layer'):
            w_o = tf.Variable(tf.truncated_normal([hidden_size, num_labels], stddev=np.sqrt(2.0 / (2 * num_labels))),
                              name="output_w")
            b_o = tf.Variable(tf.zeros([num_labels]), name="output_b")

        # compute logits
        self.logits = tf.pack([tf.matmul(tf.squeeze(i, squeeze_dims=[0]), w_o) + b_o
                               for i in tf.split(0, self.max_input_seq_length, rnn_output)])

        # compute prediction
        self.prediction = tf.to_int32(ctc.ctc_beam_search_decoder(self.logits, self.input_seq_lengths)[0][0])

        if not forward_only:
            # graph sparse tensor inputs
            # We could take an int16 for less memory consumption but SparseTensor need an int64
            self.target_indices = tf.placeholder(tf.int64,
                                                 shape=[None, 2],
                                                 name="target_indices")
            # We could take an int8 for less memory consumption but CTC need an int32
            self.target_vals = tf.placeholder(tf.int32,
                                              shape=[None],
                                              name="target_vals")

            # setup sparse tensor for input into ctc loss
            sparse_labels = tf.SparseTensor(
                indices=self.target_indices,
                values=self.target_vals,
                shape=[self.batch_size, self.max_target_seq_length])

            # compute ctc loss
            self.ctc_loss = ctc.ctc_loss(self.logits, sparse_labels,
                                         self.input_seq_lengths)
            self.mean_loss = tf.reduce_mean(self.ctc_loss)
            tf.scalar_summary('Mean loss (Training)', self.mean_loss, collections=[graphkey_training])
            tf.scalar_summary('Mean loss (Test)', self.mean_loss, collections=[graphkey_test])
            params = tf.trainable_variables()

            opt = tf.train.AdamOptimizer(self.learning_rate)
            gradients = tf.gradients(self.ctc_loss, params)
            clipped_gradients, norm = tf.clip_by_global_norm(gradients,
                                                             grad_clip)
            self.update = opt.apply_gradients(zip(clipped_gradients, params),
                                              global_step=self.global_step)

            # Accuracy
            with tf.name_scope('Accuracy'):
                errorRate = tf.reduce_sum(tf.edit_distance(self.prediction, sparse_labels, normalize=False)) / \
                           tf.to_float(tf.size(sparse_labels.values))
                tf.scalar_summary('Error Rate (Training)', errorRate, collections=[graphkey_training])
                tf.scalar_summary('Error Rate (Test)', errorRate, collections=[graphkey_test])

        # TensorBoard init
        if self.tensorboard_dir is not None:
            self.train_summaries = tf.merge_all_summaries(key=graphkey_training)
            self.test_summaries = tf.merge_all_summaries(key=graphkey_test)
            if tb_run_name is None:
                run_name = datetime.now().strftime('%Y-%m-%d--%H-%M-%S')
            else:
                run_name = tb_run_name
            self.summary_writer = tf.train.SummaryWriter(tensorboard_dir + '/' + run_name + '/', graph=session.graph)
        else:
            self.summary_writer = None

        # We need to save all variables except for the hidden_state
        # we keep it across batches but we don't need it across different runs
        # Especially when we process a one time file
        save_list = [var for var in tf.all_variables() if var.name.find('hidden_state') == -1]
        self.saver = tf.train.Saver(save_list)
예제 #7
0
 def get_network_params(self):
     network_params = tf.get_collection(tf.GraphKeys().TRAINABLE_VARIABLES, scope=self._name)
     network_params = [variable for variable in network_params if "Std" not in variable.name]
     return network_params
예제 #8
0
 def get_network_params(self):
     return tf.get_collection(tf.GraphKeys().TRAINABLE_VARIABLES, scope=self._name)
예제 #9
0
def run_FUCOS(**kwargs):
    training_data = kwargs.get('training_data')
    validation_data = kwargs.get('validation_data')
    batchsize = kwargs.get('batchsize')
    TRAIN = kwargs.get('TRAIN', True)
    run = kwargs.get('run')

    config_sess = tf.ConfigProto(allow_soft_placement=True,
                                 log_device_placement=False)
    config_sess.gpu_options.allow_growth = True
    sess = tf.InteractiveSession(config=config_sess)

    #build the model
    model = []
    with tf.device('/gpu:2'):
        x = tf.placeholder(tf.float32, (None, 135, 240, 3), 'input')
        y_ = tf.placeholder(tf.float32, (None, 135, 240, 1), 'gt')
        keep_prob = tf.placeholder(tf.float32, name='dropout_prob')

        with tf.variable_scope('conv1'):
            conv1 = layers.ConvolutionalLayer(x, [135, 240, 3], [3, 3, 3, 64])
            model.append(conv1)
        with tf.variable_scope('conv2'):
            conv2 = layers.ConvolutionalLayer(conv1.output(),
                                              conv1.get_output_shape(),
                                              [3, 3, 64, 64],
                                              pool=True)
            model.append(conv2)

        with tf.variable_scope('conv3'):
            conv3 = layers.ConvolutionalLayer(conv2.output(),
                                              conv2.get_output_shape(),
                                              [3, 3, 64, 128])
            model.append(conv3)
        with tf.variable_scope('conv4'):
            conv4 = layers.ConvolutionalLayer(conv3.output(),
                                              conv3.get_output_shape(),
                                              [3, 3, 128, 128],
                                              pool=True)
            model.append(conv4)

        with tf.variable_scope('conv5'):
            conv5 = layers.ConvolutionalLayer(conv4.output(),
                                              conv4.get_output_shape(),
                                              [3, 3, 128, 256])
            model.append(conv5)
        with tf.variable_scope('conv6'):
            conv6 = layers.ConvolutionalLayer(conv5.output(),
                                              conv5.get_output_shape(),
                                              [3, 3, 256, 256])
            model.append(conv6)
        with tf.variable_scope('conv7'):
            conv7 = layers.ConvolutionalLayer(conv6.output(),
                                              conv6.get_output_shape(),
                                              [3, 3, 256, 256],
                                              pool=True)
            model.append(conv7)

        with tf.variable_scope('conv8'):
            conv8 = layers.ConvolutionalLayer(conv7.output(),
                                              conv7.get_output_shape(),
                                              [3, 3, 256, 512])
            model.append(conv8)
        with tf.variable_scope('conv9'):
            conv9 = layers.ConvolutionalLayer(conv8.output(),
                                              conv8.get_output_shape(),
                                              [3, 3, 512, 512])
            model.append(conv9)
        with tf.variable_scope('conv10'):
            conv10 = layers.ConvolutionalLayer(conv9.output(),
                                               conv9.get_output_shape(),
                                               [3, 3, 512, 512],
                                               pool=True)
            model.append(conv10)

        with tf.variable_scope('conv11'):
            conv11 = layers.ConvolutionalLayer(conv10.output(),
                                               conv10.get_output_shape(),
                                               [3, 3, 512, 512])
            model.append(conv11)
        with tf.variable_scope('conv12'):
            conv12 = layers.ConvolutionalLayer(conv11.output(),
                                               conv11.get_output_shape(),
                                               [3, 3, 512, 512])
            model.append(conv12)
        with tf.variable_scope('conv13'):
            conv13 = layers.ConvolutionalLayer(conv12.output(),
                                               conv12.get_output_shape(),
                                               [3, 3, 512, 512],
                                               pool=True)
            model.append(conv13)

        with tf.variable_scope('conv14'):
            conv14 = layers.ConvolutionalLayer(conv13.output(),
                                               conv13.get_output_shape(),
                                               [7, 7, 512, 4096],
                                               drop_out=True,
                                               drop_out_prob=keep_prob)
            model.append(conv14)
        with tf.variable_scope('conv15'):
            conv15 = layers.ConvolutionalLayer(conv14.output(),
                                               conv14.get_output_shape(),
                                               [1, 1, 4096, 4096],
                                               drop_out=True,
                                               drop_out_prob=keep_prob)
            model.append(conv15)
        with tf.variable_scope('convtrans1'):
            deconv1 = layers.ConvolutionalTransposeLayer(
                conv15.output(), [4, 4, 60, 4096], None)
            model.append(deconv1)
        with tf.variable_scope('conv16'):
            conv16 = layers.ConvolutionalLayer(conv10.output(),
                                               conv10.get_output_shape(),
                                               [1, 1, 512, 60])
            model.append(conv16)
        conv16_output = conv16.output()
        sum1 = conv16_output + tf.image.resize_images(
            deconv1.output(),
            (tf.shape(conv16_output)[1], tf.shape(conv16_output)[2]))

        with tf.variable_scope('convtrans2'):
            deconv2 = layers.ConvolutionalTransposeLayer(
                sum1, [4, 4, 60, 60], None)
            model.append(deconv2)
        with tf.variable_scope('conv17'):
            conv17 = layers.ConvolutionalLayer(conv7.output(),
                                               conv7.get_output_shape(),
                                               [1, 1, 256, 60])
            model.append(conv17)
        conv17_output = conv17.output()
        sum2 = conv17_output + tf.image.resize_images(
            deconv2.output(),
            (tf.shape(conv17_output)[1], tf.shape(conv17_output)[2]))

        with tf.variable_scope('convtrans3'):
            deconv3 = layers.ConvolutionalTransposeLayer(sum2,
                                                         [16, 16, 60, 60],
                                                         None,
                                                         deconv_stride=(1, 8,
                                                                        8, 1))
            model.append(deconv3)

        with tf.variable_scope('conv18'):
            conv18 = layers.ConvolutionalLayer(deconv3.output(),
                                               deconv3.get_output_shape(),
                                               [1, 1, 60, 12])
            model.append(conv18)
        with tf.variable_scope('conv19'):
            conv19 = layers.ConvolutionalLayer(
                conv18.output(),
                conv18.get_output_shape_tensor(), [1, 1, 12, 1],
                activation=function['linear'])
            model.append(conv19)

        y_pre_activation = tf.image.resize_images(
            conv19.output(),
            (135, 240))  #resize to match the ground truth's shape
        y_pred = function['sigmoid'](
            y_pre_activation)  #activate the output by sigmoid

        cost = metrics.MultinoulliCrossEntropy(y_pre_activation,
                                               y_)  #use binary cross entropy
        var_list = tf.get_collection(tf.GraphKeys().TRAINABLE_VARIABLES)
        L2 = sum([
            tf.reduce_mean(tf.square(theta))  #L2 regularization
            for theta in (weight for weight in var_list
                          if 'weights' in weight.name)
        ])
        cost += 1e-4 * L2

        opt = tf.train.AdamOptimizer(1e-3, 0.9, 0.99, 1e-8).minimize(
            cost, var_list=var_list)  #ADAM optimization
        accuracy = tf.reduce_mean(
            tf.cast(
                tf.equal(tf.cast(y_pred >= 0.5, tf.uint8),
                         tf.cast(y_, tf.uint8)), tf.float32))
        saver = tf.train.Saver()

        if TRAIN:
            tf.Operation.run(tf.global_variables_initializer())
            print('Loading VGG16 weights...')
            load_weights('pretrained/vgg16_weights.npz', model,
                         sess)  #load pretrained VGG16 weights

            best_valid_accuracy = 0.
            best_valid_loss = np.inf
            best_epoch = 0
            epoch = 0
            vote_to_terminate = 0
            done_looping = False
            print('TRAINING...')
            start_training_time = time.time()
            while epoch < 200 and not done_looping:
                epoch += 1
                num_iter_training = int(training_data[0].shape[0] / batchsize)
                losses_train = 0.
                accuracies_train = 0.
                start_batch_time = time.time()
                print('Epoch %d...' % epoch)
                batch = next_batch(training_data, batchsize)  #training
                for b in batch:
                    fd = {x: b[0], y_: b[1], keep_prob: 0.1}
                    _, a, l = sess.run([opt, accuracy, cost], feed_dict=fd)
                    assert not np.isnan(l), 'Train failed with loss being NaN'
                    losses_train += l
                    accuracies_train += a

                print('\ttraining loss: %s' %
                      (losses_train / num_iter_training))
                print('\ttraining accuracy: %s' %
                      (accuracies_train / num_iter_training))
                print('\tepoch %d took %.2f hours' %
                      (epoch, (time.time() - start_batch_time) / 3600.))

                num_iter_valid = int(validation_data[0].shape[0] / batchsize)
                losses_valid = 0.
                accuracies_valid = 0.
                start_valid_time = time.time()
                batch = next_batch(validation_data, batchsize)  #validation
                for b in batch:
                    fd = {x: b[0], y_: b[1], keep_prob: 1}
                    l, a = sess.run([cost, accuracy], feed_dict=fd)
                    losses_valid += l
                    accuracies_valid += a
                avr_acc_valid = accuracies_valid / num_iter_valid
                losses_valid /= num_iter_valid

                print('\tvalidation took %.2f hours' %
                      ((time.time() - start_valid_time) / 3600.))
                print('\tvalidation loss: %s' % losses_valid)
                print('\tvalidation accuracy: %s' % avr_acc_valid)

                if losses_valid < best_valid_loss:
                    best_valid_loss = losses_valid
                    best_epoch = epoch
                    vote_to_terminate = 0
                    print('\tbest validation loss achieved: %.4f' %
                          best_valid_loss)
                    save_path = saver.save(sess, run)
                    print("\tmodel saved in file: %s" % save_path)
                else:
                    vote_to_terminate += 1

                if vote_to_terminate > 30:
                    done_looping = True
            print('Training ends after %.2f hours' %
                  ((time.time() - start_training_time) / 3600.))
            print('\tbest validation accuracy: %.2f' % best_valid_accuracy)
            print('Training the model using all data available...')
            total_training_data = (np.concatenate(
                (training_data[0], validation_data[0])),
                                   np.concatenate(
                                       (training_data[1], validation_data[1])))
            for i in range(best_epoch):
                num_iter_training = int(total_training_data[0].shape[0] /
                                        batchsize)
                losses_train = 0.
                start_batch_time = time.time()
                print('Epoch %d...' % (i + 1))
                batch = next_batch(total_training_data, batchsize)  #training
                for b in batch:
                    fd = {x: b[0], y_: b[1], keep_prob: 0.1}
                    _, _, l = sess.run([opt, accuracy, cost], feed_dict=fd)
                    assert not np.isnan(l), 'Train failed with loss being NaN'
                    losses_train += l

                print('\ttraining loss: %s' %
                      (losses_train / num_iter_training))
                print('\tepoch %d took %.2f hours' %
                      (i + 1, (time.time() - start_batch_time) / 3600.))

        else:  #testing
            path = kwargs.get('testing_path')
            isfolder = kwargs.get('isfolder')

            image_list = [
                path + '/' + f for f in os.listdir(path) if f.endswith('.jpg')
            ] if isfolder else [path]
            saver.restore(sess, tf.train.latest_checkpoint(run))
            print('Checkpoint restored...')
            print('Testing %d images...' % len(image_list))
            images = []
            predictions = []
            time.sleep(0.1)
            for i in tqdm.tqdm(range(len(image_list)), unit='images'):
                ori_img = misc.imread(image_list[i])
                if len(ori_img.shape) < 3:
                    continue
                img = padding(ori_img, 135, 240)
                img = np.reshape(img, (1, 135, 240, 3)) / 255.
                fd = {x: img, keep_prob: 1}
                pred = sess.run(y_pred, feed_dict=fd)
                images.append(ori_img)
                predictions.append(pred)
            time.sleep(0.1)
            print('Testing finished!')

            for i in range(len(images)):
                plt.figure(1)
                image = images[i]
                sal = np.reshape(predictions[i], (135, 240))
                sal = depadding(sal, image.shape[0], image.shape[1])
                sal = sal * (sal > np.percentile(sal, 95))
                sal = gaussian_filter(sal, sigma=0.09 * sal.shape[0])
                sal = (sal - np.min(sal)) / (np.max(sal) - np.min(sal))
                plt.subplot(211)
                plt.imshow(image)
                plt.subplot(212)
                plt.imshow(sal, cmap='gray')
                plt.show()
예제 #10
0
    def __init__(self,
                 session,
                 num_labels,
                 num_layers,
                 hidden_size,
                 input_keep_prob,
                 output_keep_prob,
                 batch_size,
                 learning_rate,
                 lr_decay_factor,
                 grad_clip,
                 max_input_seq_length,
                 max_target_seq_length,
                 input_dim,
                 normalization,
                 forward_only=False,
                 tensorboard_dir=None,
                 tb_run_name=None,
                 timeline_enabled=False):
        """
        Acoustic rnn model, using ctc loss with lstm cells
        Inputs:
        session - tensorflow session
        num_labels - dimension of character input/one hot encoding
        num_layers - number of lstm layers
        hidden_size - size of hidden layers
        input_keep_prob - probability of keeping input signal for a cell during training
        output_keep_prob - probability of keeping output signal from a cell during training
        batch_size - number of training examples fed at once
        learning_rate - learning rate parameter fed to optimizer
        lr_decay_factor - decay factor of the learning rate
        grad_clip - max gradient size (prevent exploding gradients)
        max_input_seq_length - maximum length of input vector sequence
        max_target_seq_length - maximum length of ouput vector sequence
        input_dim - dimension of input vector
        normalization - boolean indicating whether or not to normalize data in a input batch
        forward_only - whether to build back prop nodes or not
        tensorboard_dir - path to tensorboard file (None if not activated)
        tb_run_name - directory name for the tensorboard files (inside tensorboard_dir, None mean no sub-directory)
        timeline_enabled - enable the output of a trace file for timeline visualization
        """
        # Initialize thread management
        self.lock = threading.Lock()

        # Define GraphKeys for TensorBoard
        graphkey_training = tf.GraphKeys()
        graphkey_test = tf.GraphKeys()

        # Store model variables
        self.input_keep_prob = input_keep_prob
        self.output_keep_prob = output_keep_prob
        self.batch_size = batch_size
        self.learning_rate = tf.Variable(float(learning_rate),
                                         trainable=False,
                                         name='learning_rate')
        tf.summary.scalar('Learning_rate',
                          self.learning_rate,
                          collections=[graphkey_training, graphkey_test])
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * lr_decay_factor)
        self.global_step = tf.Variable(0, trainable=False, name='global_step')
        self.max_input_seq_length = max_input_seq_length
        self.max_target_seq_length = max_target_seq_length
        self.tensorboard_dir = tensorboard_dir
        self.timeline_enabled = timeline_enabled
        self.input_dim = input_dim
        self.epsilon = 1e-3

        # graph inputs
        self.inputs = tf.placeholder(
            tf.float32,
            shape=[self.max_input_seq_length, None, self.input_dim],
            name="inputs")
        # We could take an int16 for less memory consumption but CTC need an int32
        self.input_seq_lengths = tf.placeholder(tf.int32,
                                                shape=[None],
                                                name="input_seq_lengths")

        # Define cells of acoustic model
        cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size, state_is_tuple=True)

        # Define a dropout layer (used only when training)
        with tf.name_scope('dropout'):
            # Create placeholders, used to override values when running on the test set
            self.input_keep_prob_ph = tf.placeholder(tf.float32)
            self.output_keep_prob_ph = tf.placeholder(tf.float32)
            if not forward_only:
                # If we are in training then add a dropoutWrapper to the cells
                tf.summary.scalar('input_keep_prob',
                                  self.input_keep_prob_ph,
                                  collections=[graphkey_training])
                tf.summary.scalar('output_keep_prob',
                                  self.output_keep_prob_ph,
                                  collections=[graphkey_training])
                cell = tf.nn.rnn_cell.DropoutWrapper(
                    cell,
                    input_keep_prob=self.input_keep_prob_ph,
                    output_keep_prob=self.output_keep_prob_ph)

        if num_layers > 1:
            cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers,
                                               state_is_tuple=True)

        # build input layer
        with tf.name_scope('Input_Layer'):
            w_i = tf.Variable(tf.truncated_normal(
                [input_dim, hidden_size],
                stddev=np.sqrt(2.0 / (2 * hidden_size))),
                              name="input_w")
            b_i = tf.Variable(tf.zeros([hidden_size]), name="input_b")

        # make rnn inputs
        inputs = [
            tf.matmul(tf.squeeze(i, squeeze_dims=[0]), w_i) + b_i
            for i in tf.split(0, self.max_input_seq_length, self.inputs)
        ]
        # Switch from a list to a tensor
        inputs = tf.pack(inputs)

        # If we are in training then add a batch normalization layer to the model
        if normalization and not forward_only:
            # Note : the tensor is [time, batch_size, input vector] so we go against dim 1
            batch_mean, batch_var = tf.nn.moments(inputs, [1],
                                                  shift=None,
                                                  name="moments",
                                                  keep_dims=True)
            inputs = tf.nn.batch_normalization(inputs,
                                               batch_mean,
                                               batch_var,
                                               None,
                                               None,
                                               self.epsilon,
                                               name="batch_norm")

        # set rnn init state to 0s
        init_state = cell.zero_state(self.batch_size, tf.float32)

        # build rnn
        with tf.name_scope('Dynamic_rnn'):
            rnn_output, self.hidden_state = tf.nn.dynamic_rnn(
                cell,
                inputs,
                sequence_length=self.input_seq_lengths,
                initial_state=init_state,
                time_major=True)

        # build output layer
        with tf.name_scope('Output_layer'):
            w_o = tf.Variable(tf.truncated_normal([hidden_size, num_labels],
                                                  stddev=np.sqrt(
                                                      2.0 / (2 * num_labels))),
                              name="output_w")
            b_o = tf.Variable(tf.zeros([num_labels]), name="output_b")

        # Compute logits
        self.logits = tf.pack([
            tf.matmul(tf.squeeze(i, squeeze_dims=[0]), w_o) + b_o
            for i in tf.split(0, self.max_input_seq_length, rnn_output)
        ])

        # compute prediction
        decoded, _log_prob = tf.nn.ctc_beam_search_decoder(
            self.logits, self.input_seq_lengths)
        self.prediction = tf.to_int32(decoded[0])

        if not forward_only:
            # Sparse tensor for corrects labels input
            self.sparse_labels = tf.sparse_placeholder(tf.int32)

            # Compute ctc loss
            self.ctc_loss = tf.nn.ctc_loss(self.logits, self.sparse_labels,
                                           self.input_seq_lengths)
            # Compute mean loss : only to check on progression in learning
            # The loss is averaged accross the batch but before we take into account the real size of the label
            self.mean_loss = tf.reduce_mean(
                tf.truediv(self.ctc_loss, tf.to_float(self.input_seq_lengths)))
            with tf.name_scope('Mean_loss'):
                tf.summary.scalar('Training',
                                  self.mean_loss,
                                  collections=[graphkey_training])
                tf.summary.scalar('Test',
                                  self.mean_loss,
                                  collections=[graphkey_test])
            params = tf.trainable_variables()

            opt = tf.train.AdamOptimizer(self.learning_rate)
            gradients = tf.gradients(self.ctc_loss, params)
            clipped_gradients, norm = tf.clip_by_global_norm(
                gradients, grad_clip)
            self.update = opt.apply_gradients(zip(clipped_gradients, params),
                                              global_step=self.global_step)

            # Accuracy
            with tf.name_scope('Accuracy_-_Error_Rate'):
                error_rate = tf.reduce_mean(
                    tf.edit_distance(self.prediction,
                                     self.sparse_labels,
                                     normalize=True))
                tf.summary.scalar('Training',
                                  error_rate,
                                  collections=[graphkey_training])
                tf.summary.scalar('Test',
                                  error_rate,
                                  collections=[graphkey_test])

        # TensorBoard init
        if self.tensorboard_dir is not None:
            self.train_summaries = tf.summary.merge_all(key=graphkey_training)
            self.test_summaries = tf.summary.merge_all(key=graphkey_test)
            if tb_run_name is None:
                run_name = datetime.now().strftime('%Y-%m-%d--%H-%M-%S')
            else:
                run_name = tb_run_name
            self.summary_writer = tf.summary.FileWriter(tensorboard_dir + '/' +
                                                        run_name + '/',
                                                        graph=session.graph)
        else:
            self.summary_writer = None

        # We need to save all variables except for the hidden_state
        # we keep it across batches but we don't need it across different runs
        # Especially when we process a one time file
        save_list = [
            var for var in tf.global_variables()
            if var.name.find('hidden_state') == -1
        ]
        self.saver = tf.train.Saver(save_list)
예제 #11
0
from __future__ import division
from __future__ import print_function
from __future__ import absolute_import

import tensorflow as tf
from collections import OrderedDict

from .utils import get_from_module
from .utils import process_params


LOSSES = tf.GraphKeys().LOSSES


__all__ = ['add_loss',
           'get_losses',
           'get_regularization_losses',
           'get_total_loss',
           'l1_loss',
           'l2_loss',
           'get',
           'process_parameters']


def add_loss(loss):
    """Adds an externally defined loss to collection of losses.

    Parameters
    ----------
    loss: A loss `Tensor`.
    """
예제 #12
0
 def _get_vars(self, scope):
     return tf.get_collection(tf.GraphKeys().TRAINABLE_VARIABLES, scope=scope)