Example #1
0
    def start_train(self):
        y_ = tf.placeholder(tf.float32, [None, 2])
        y = self.vgg.y
        loss = calculate_loss(logits=y, labels=y_)
        tf.summary.scalar('loss', loss)
        train_op = tf.train.GradientDescentOptimizer(
            learning_rate=self.learning_rate).minimize(loss)
        # 计算准确率
        accuracy_tensor = calculate_accuracy(logits=y, labels=y_)
        merged = tf.summary.merge_all()
        max_accuracy = 0.0
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            log_path = './log/train'
            val_log_path = './log/val'
            writer = tf.summary.FileWriter(log_path, tf.get_default_graph())
            val_writer = tf.summary.FileWriter(val_log_path,
                                               tf.get_default_graph())
            for i in range(self.iterator_number):
                train_images, labels, scores = self.dataset.next_batch(
                    self.BATCH_SZIE, self.BATCH_DISTRIBUTION)
                feed_dict = {self.vgg.imgs: train_images, y_: labels}
                _, loss_value, accuracy_value, summary, y_value = sess.run(
                    [train_op, loss, accuracy_tensor, merged, y],
                    feed_dict=feed_dict)
                writer.add_summary(summary, i)
                if (i % 40) == 0 and i != 0:
                    val_images, labels, scores = self.val_dataset.next_batch(
                        self.BATCH_SZIE, self.BATCH_DISTRIBUTION)
                    feed_dict = {self.vgg.imgs: val_images, y_: labels}
                    val_loss, val_accuracy, summary = sess.run(
                        [loss, accuracy_tensor, merged], feed_dict=feed_dict)
                    if val_accuracy > 0.9:
                        print 'will save, accuracy is %g' % val_accuracy
                        save_weights(
                            '/home/give/PycharmProjects/FaceDetection/fine_tuning_vgg16/vgg16_trained.npy',
                            self.vgg.layers_name)
                    max_accuracy = max(max_accuracy, val_accuracy)
                    val_writer.add_summary(summary, i)
                    print '-' * 15, 'val loss is %g, val accuracy is %g' % (
                        val_loss, val_accuracy), '-' * 15
                if (i % 20) == 0:

                    print 'predict the number of positive number is ', np.sum(
                        np.argmax(y_value, 1))
                    print 'loss value is %g accuracy is %g' \
                          % (loss_value, accuracy_value)
                del train_images, labels, scores
                gc.collect()
        writer.close()
        val_writer.close()
Example #2
0
    def start_train(self, load_model=False):
        y = self.upsample_8s
        global_step = tf.Variable(0, trainable=False)
        tf.summary.image("pred_annotation",
                         tf.cast(
                             tf.expand_dims(tf.argmax(y, dimension=3), dim=3),
                             tf.uint8),
                         max_outputs=2)
        loss = calculate_loss(logits=y, labels=self.y_, arg_index=3)
        learning_rate = tf.train.exponential_decay(
            self.learning_rate,
            global_step,
            len(self.dataset.train_image) / self.BATCH_SIZE,
            self.learning_rate_decay,
            staircase=False)
        train_op = tf.train.GradientDescentOptimizer(
            learning_rate=learning_rate, ).minimize(loss,
                                                    global_step=global_step)

        accuracy_tensor = calculate_accuracy(logits=y,
                                             labels=self.y_,
                                             arg_index=3)
        merged = tf.summary.merge_all()
        self.sess.run(tf.global_variables_initializer())
        summary_writer = tf.summary.FileWriter('./log', sess.graph)
        saver = tf.train.Saver()
        if load_model:
            saver.restore(sess, self.model_save_path)
        for i in range(self.itertator_number):
            train_image, train_annotation, flag = self.dataset.next_batch(
                self.BATCH_SIZE)
            train_annotation = conver2onehot(np.array(train_annotation))
            feed_dict = {self.imgs: train_image, self.y_: train_annotation}
            _, loss_value, accuracy_value, summary, step, learning_rate_value = self.sess.run(
                [
                    train_op, loss, accuracy_tensor, merged, global_step,
                    learning_rate
                ],
                feed_dict=feed_dict)
            if (i % 1000) == 0 and i != 0:
                saver.save(sess, self.model_save_path)
                save_layers = []
                save_layers.extend(self.vgg.layers_name)
                save_layers.extend(self.layers_name)
                save_weights('./trained_vgg16.npy', save_layers)
            if (i % 20) == 0:
                summary_writer.add_summary(summary, i)
                print 'step is %d, loss value is %g, accuracy is %g, learning_rate_value is %g' % (
                    i, loss_value, accuracy_value, learning_rate_value)
        summary_writer.close()
Example #3
0
def one_epoch():
    is_changed = 0
    for x in dataset:
        x1, x2 = x
        correct = tools.f(x1, x2)
        neuron_output = weights[0] + weights[1] * x1 + weights[2] * x2
        result = tools.signum(neuron_output)

        d = correct - result

        if not d:
            continue

        weights[0] += tools.weight_delta(d, 1, weights[0])
        weights[1] += tools.weight_delta(d, x1, weights[1])
        weights[2] += tools.weight_delta(d, x2, weights[2])
        tools.save_weights(weights)

        is_changed = 1

    return is_changed
    def train_without_val(self, train_set_x, train_set_y, save_model=False):
        train_model = self._train_without_val_init_(train_set_x, train_set_y)
        patience = 10000
        patience_increase = 4
        improvement_threshold = 0.995
        validation_frequency = self.n_train_batches
        best_validation_loss = np.inf
        best_train_acc = 0.
        best_iter = 0
        test_score = 0.
        start_time = time.clock()
        epoch = 0
        done_looping = False
        n_epochs = self.config['n_epochs']
        t_cost, t_acc, v_acc = [], [], []
        print 'start training...'
        while (epoch < n_epochs) and (not done_looping):
            epoch = epoch + 1
            DropoutLayer.SetDropoutOn()
            for minibatch_index in xrange(self.n_train_batches):
                minibatch_avg_cost,train_acc = train_model(minibatch_index)
                this_train_acc = 1 - train_acc
            print('epoch %i/%s, cost %.4f , train acc %.4f ' %(epoch,str(n_epochs),minibatch_avg_cost,(this_train_acc)))

            if save_model:
                if this_train_acc > best_train_acc:
                    best_train_acc = this_train_acc
                    #print "best val acc at epoch %i is %.4f" %(epoch,best_val_acc)
                    folder = "./snapshot_{0}_{1}/".format(epoch, round(best_train_acc,3))
                    os.mkdir(folder)
                    tools.save_weights(self.layers, folder, epoch)
                    #print "model saved at epoch %i" %(epoch)

            if self.learning_rate_decay == True:
                if epoch % 5 == 0:
                    rate = theano.shared(np.cast[theano.config.floatX](0.5))
                    self.optimizer.lr = self.optimizer.lr * rate

        end_time = time.clock()
Example #5
0
def train_net(config):

    # UNPACK CONFIGS
    (train_filenames, img_mean) = unpack_configs(config)

    import theano.sandbox.cuda
    theano.sandbox.cuda.use(config['gpu'])

    import theano
    theano.config.on_unused_input = 'warn'
    import theano.tensor as T

    from multilabel_layers import DropoutLayer
    from multilabel_net import CNN_model, compile_models

    import theano.misc.pycuda_init
    import theano.misc.pycuda_utils

    # load hash_step1_bits
    group_idx = sio.loadmat('./step1/temp/group_idx.mat')
    group_idx = group_idx['group_idx']
    group_idx = group_idx[0][0]
    code_per_group = 8

    bits_idxes = range((group_idx - 1) * code_per_group)

    config['output_num'] = len(bits_idxes)

    model = CNN_model(config)

    batch_size = model.batch_size
    layers = model.layers
    weight_types = model.weight_types
    params = model.params

    val_filenames = train_filenames[:20]

    n_train_batches = len(train_filenames)
    minibatch_range = range(n_train_batches)

    ## COMPILE FUNCTIONS ##
    (train_model, validate_model, predict_model, train_error, learning_rate,
     shared_x, shared_y, vels) = compile_models(model, config)

    train_labels = None

    for idx in bits_idxes:

        hash_step1_code = h5py.File('./step1/temp/hash_step1_code_' +
                                    str(idx + 1) + '.mat')
        temp = np.transpose(np.asarray(
            hash_step1_code['hash_step1_code'])).astype('int64')

        if train_labels is None:
            train_labels = temp
        else:
            train_labels = np.hstack([train_labels, temp])

    train_labels[train_labels == -1] = 0

    val_labels = train_labels[:20 * batch_size]

    ######################### TRAIN MODEL ################################

    print '... training'

    #    initialize_weights(layers, weight_types)
    #    learning_rate.set_value(config['learning_rate'])

    #    vels = [theano.shared(param_i.get_value() * 0.)
    #            for param_i in params]

    # Start Training Loop
    epoch = 0
    step_idx = 0
    val_record = []
    predicted_labels = None
    while epoch < config['n_epochs']:
        epoch = epoch + 1

        if config['shuffle']:
            np.random.shuffle(minibatch_range)

        if config['finetune'] and epoch == 1 and not config['resume_train']:
            load_weights_finetune(layers, config['finetune_weights_dir'])

        count = 0
        for minibatch_index in minibatch_range:

            num_iter = (epoch - 1) * n_train_batches + count
            count = count + 1
            if count == 1:
                s = time.time()
            if count == 20:
                e = time.time()
                print "time per 20 iter:", (e - s)

            cost_ij = train_model_wrap(train_model, shared_x, shared_y,
                                       minibatch_index, minibatch_range,
                                       batch_size, train_labels,
                                       train_filenames, img_mean)

            if num_iter % config['print_freq'] == 0:
                print 'training @ iter = ', num_iter
                print 'training cost:', cost_ij
                if config['print_train_error']:
                    print 'training error rate:', train_error()

        ############### Test on Validation Set ##################

        DropoutLayer.SetDropoutOff()

        this_validation_error, this_validation_loss = get_val_error_loss(
            shared_x, shared_y, img_mean, val_filenames, val_labels,
            batch_size, validate_model)

        print('epoch %i: validation loss %f ' % (epoch, this_validation_loss))
        print('epoch %i: validation error %f %%' %
              (epoch, this_validation_error * 100.))
        val_record.append([this_validation_error, this_validation_loss])

        savepath = config['weights_dir'] + 'classifier_' + str(group_idx -
                                                               1) + '/'
        if not os.path.exists(savepath):
            os.mkdir(savepath)

        np.save(savepath + 'val_record.npy', val_record)

        DropoutLayer.SetDropoutOn()

        ############################################

        # Adapt Learning Rate
        step_idx = adjust_learning_rate(config, epoch, step_idx, val_record,
                                        learning_rate)

        # Save weights for each iteration
        if epoch % 5 == 0:

            save_weights(layers, savepath, epoch)
            np.save(savepath + 'lr_' + str(epoch) + '.npy',
                    learning_rate.get_value())
            save_momentums(vels, savepath, epoch)

    DropoutLayer.SetDropoutOff()
    # generate the labels
    for minibatch_index in range(n_train_batches):

        label = get_prediction_labels(predict_model, shared_x, minibatch_index,
                                      train_filenames, img_mean)
        if predicted_labels is None:
            predicted_labels = label[0]
        else:
            predicted_labels = np.vstack((predicted_labels, label[0]))

    hash_step2_code = {'hash_step2_code': predicted_labels}
    sio.savemat('./temp/hash_step2_code_' + str(group_idx - 1) + '.mat',
                hash_step2_code)

    DropoutLayer.SetDropoutOn()

    print('Optimization complete.')
Example #6
0
def train(train_data_generator):
    checkpoint_dir = config["checkpoint_dir"]
    learning_rate = config['learning_rate']
    data_dims = config['data_dims']
    batch_size = config['batch_size']
    num_gpus = config['num_gpus']
    num_epochs = config['num_epochs']
    num_samples_per_epoch = config["num_samples_per_epoch"]
    pretrained_weights = config["pretrained_weights"]
    steps_per_epoch = num_samples_per_epoch // (batch_size * num_gpus)
    num_steps = steps_per_epoch * num_epochs
    checkpoint_iter = config["checkpoint_iter"]
    experiment_dir = config['experiment_dir']
    train_log_fpath = pth.join(experiment_dir, 'train.log')
    log = tools.MetricsLogger(train_log_fpath)


    # =====================
    # define training graph
    # =====================
    G = tf.Graph()
    with G.as_default(), tf.device('/cpu:0'):
        full_data_dims = [batch_size * num_gpus] + data_dims
        data = tf.placeholder(dtype=tf.float32,
                              shape=full_data_dims,
                              name='data')
        labels = tf.placeholder(dtype=tf.int32,
                                shape=[batch_size * num_gpus],
                                name='labels')

        # we split the large batch into sub-batches to be distributed onto each gpu
        split_data = tf.split(0, num_gpus, data)
        split_labels = tf.split(0, num_gpus, labels)

        # setup optimizer
        optimizer = tf.train.AdamOptimizer(learning_rate)

        # setup one model replica per gpu to compute loss and gradient
        replica_grads = []
        for i in range(num_gpus):
            with tf.name_scope('tower_%d' % i), tf.device('/gpu:%d' % i):
                model = build_model(split_data[i], split_labels[i])
                loss = model["loss"]
                grads = optimizer.compute_gradients(loss)
                replica_grads.append(grads)
                tf.get_variable_scope().reuse_variables()

        # We must calculate the mean of each gradient. Note this is a
        # synchronization point across all towers.
        average_grad = L.average_gradients(replica_grads)
        grad_step = optimizer.apply_gradients(average_grad)
        train_step = tf.group(grad_step)
        init = tf.initialize_all_variables()

    # ==================
    # run training graph
    # ==================
    config_proto = tf.ConfigProto(allow_soft_placement=True)
    sess = tf.Session(graph=G, config=config_proto)
    sess.run(init)
    tf.train.start_queue_runners(sess=sess)
    with sess.as_default():
        if pretrained_weights:
            print("-- loading weights from %s" % pretrained_weights)
            tools.load_weights(G, pretrained_weights)

        for step in range(num_steps):
            data_batch, label_batch = train_data_generator.next()
            inputs = {data: data_batch, labels: label_batch}
            results = sess.run([train_step, loss], inputs)
            print("step:%s loss:%s" % (step, results[1]))
            log.report(step=step, split="TRN", loss=float(results[1]))


            if (step % checkpoint_iter == 0) or (step + 1 == num_steps):
                print("-- saving check point")
                tools.save_weights(G, pth.join(checkpoint_dir, "weights.%s" % step))
Example #7
0
                np.mean(validation_errors))


    (val_loss, val_error) = validate(test_set)


    print('epoch %i: validation loss %f ' %
            (epoch, val_loss))
    print('epoch %i: validation error %f %%' %
            (epoch, val_error * 100.))

    val_record.append([val_error, val_loss])
    np.save(config['weights_dir'] + 'val_record.npy', val_record)

    DropoutLayer.SetDropoutOn()
    ############################################

    # Adapt Learning Rate
    step_idx = adjust_learning_rate(config, epoch, step_idx,
                                    val_record, learning_rate)

    # Save weights
    if epoch % config['snapshot_freq'] == 0:
        save_weights(layers, config['weights_dir'], epoch)
        np.save(config['weights_dir'] + 'lr_' + str(epoch) + '.npy',
                    learning_rate.get_value())
        save_momentums(vels, config['weights_dir'], epoch)

print('Optimization complete.')

    def train_by_sentence(self, x_train, y_train, x_val, y_val, index_train,index_val, save_model=False):
        """
        - train: {name:'sentenceID', data:[features], label:[labels]}
        """
        #train_model, validate_model = self._train_by_order_init_(train, val)
        train_model, validate_model = self._train_by_sentence_init_(x_train, y_train, x_val, y_val,index_train,index_val)
        patience = 10000
        patience_increase = 4
        improvement_threshold = 0.995
        validation_frequency = len(index_train)-1#min(self.n_train_batches, patience / 2)
        best_validation_loss = np.inf
        best_val_acc = 0.
        best_iter = 0
        test_score = 0.
        start_time = time.clock()
        epoch = 0
        done_looping = False
        n_epochs = self.config['n_epochs']
        t_cost, t_acc, v_acc = [], [], []
        print 'start training...'
        while (epoch < n_epochs) and (not done_looping):
            epoch = epoch + 1
            DropoutLayer.SetDropoutOn()
            for minibatch_index in xrange(len(index_train)-1):
                minibatch_avg_cost,train_acc = train_model(minibatch_index)
                iter = (epoch - 1) * (len(index_train)-1) + minibatch_index
                if (iter + 1) % validation_frequency == 0:
                    DropoutLayer.SetDropoutOff()
                    validation_losses = [validate_model(i) for i in xrange(len(index_val)-1)]
                    this_validation_loss = np.mean(validation_losses)
                    this_val_acc = 1 - this_validation_loss
                    this_train_acc = 1 - train_acc
                    print('epoch %i/%s, cost %.4f , train acc %.4f , val acc %.4f ' %(epoch,str(n_epochs),minibatch_avg_cost,(this_train_acc),(this_val_acc)))

                    t_cost.append(round(minibatch_avg_cost,5))
                    t_acc.append(round(this_train_acc,5))
                    v_acc.append(round(this_val_acc,5))


                    if save_model:
                        if this_val_acc > best_val_acc:
                            best_val_acc = this_val_acc
                            #print "best val acc at epoch %i is %.4f" %(epoch,best_val_acc)
                            folder = "./snapshot_{0}_{1}/".format(epoch, round(best_val_acc,3))
                            os.mkdir(folder)
                            tools.save_weights(self.layers, folder, epoch)
                            #print "model saved at epoch %i" %(epoch)

                    if this_validation_loss < best_validation_loss:
                        if (this_validation_loss < best_validation_loss * improvement_threshold):
                            patience = max(patience, iter * patience_increase)
                        best_validation_loss = this_validation_loss
                        best_iter = iter
        #            if this_train_acc - this_val_acc >0.05:
        #                done_looping = True
        #                break
        #        if patience <= iter:
        #            done_looping = True
        #            break
            if self.learning_rate_decay == True:
                if epoch % 5 == 0:
                    rate = theano.shared(np.cast[theano.config.floatX](0.5))
                    self.optimizer.lr = self.optimizer.lr * rate

        self.record = {
                      'training loss' : t_cost,
                      'training accuracy' : t_acc,
                      'validation accuracy' : v_acc }
        end_time = time.clock()
        print(('Optimization complete. Best validation score of %f %% \n obtained at iteration %i, with test performance %f %%') %(best_validation_loss * 100., best_iter + 1, test_score * 100.))
Example #9
0
            loss, error = validate_model()
            validation_losses.append(loss)
            validation_errors.append(error)

        return (np.mean(validation_losses), np.mean(validation_errors))

    (val_loss, val_error) = validate(test_set)

    print('epoch %i: validation loss %f ' % (epoch, val_loss))
    print('epoch %i: validation error %f %%' % (epoch, val_error * 100.))

    val_record.append([val_error, val_loss])
    np.save(config['weights_dir'] + 'val_record.npy', val_record)

    DropoutLayer.SetDropoutOn()
    ############################################

    # Adapt Learning Rate
    step_idx = adjust_learning_rate(config, epoch, step_idx, val_record,
                                    learning_rate)

    # Save weights
    if epoch % config['snapshot_freq'] == 0:
        save_weights(layers, config['weights_dir'], epoch)
        np.save(config['weights_dir'] + 'lr_' + str(epoch) + '.npy',
                learning_rate.get_value())
        save_momentums(vels, config['weights_dir'], epoch)

print('Optimization complete.')
Example #10
0
def train():
    checkpoint_dir = config["checkpoint_dir"]
    learning_rate = config['learning_rate']
    image_size = config['image_size']
    batch_size = config['batch_size']
    num_gpus = config['num_gpus']
    num_epochs = config['num_epochs']
    pretrained_weights = config["pretrained_weights"]
    checkpoint_iter = config["checkpoint_iter"]
    experiment_dir = config['experiment_dir']
    data_folder = config['data_folder']
    split_ratio = config['split_ratio']
    min_nrof_images_per_class = config['min_nrof_images_per_class']
    nrof_preprocess_threads = config['nrof_preprocess_threads']
    learning_rate_decay_epochs = config['learning_rate_decay_epochs']
    learning_rate_decay_factor = config['learning_rate_decay_factor']
    opt = config['opt']
    train_log_fpath = pth.join(experiment_dir, 'train.log')

    # ====================
    # get the data set, note that you only specify the folder that contains subfolders
    # which named by label_name is OK
    # ====================
    dataset = train_data_generator.get_dataset(data_folder)
    train_set, val_set = train_data_generator.split_dataset(
        dataset, split_ratio, min_nrof_images_per_class, mode='SPLIT_IMAGES')
    nrof_classes = len(train_set)
    print 'My classes is: ', nrof_classes, split_ratio
    raw_input("------------")
    # in train_set, we store {label:[many photos]}, now change to [label...]:[photo...]
    image_list, label_list = train_data_generator.get_image_paths_and_labels(
        train_set)
    # val_image_list, val_label_list = train_data_generator.get_image_paths_and_labels(val_set)

    num_samples_per_epoch = len(image_list)
    steps_per_epoch = num_samples_per_epoch // (batch_size * num_gpus)
    num_steps = steps_per_epoch * num_epochs
    print 'My steps_per_epoch is: ', steps_per_epoch
    raw_input("------------")
    # =====================
    # define training graph
    # =====================
    G = tf.Graph()
    with G.as_default(), tf.device('/cpu:0'):
        # at first we define a index queue
        # for reading (photo[index...index],label[index...index]
        # note that we should shuffle
        # may not worry to the capacity, queue will block either full or empty
        labels = ops.convert_to_tensor(label_list, dtype=tf.int32)
        range_size = array_ops.shape(labels)[0]
        index_queue = tf.train.range_input_producer(
            range_size,
            num_epochs=None,
            shuffle=True,
            seed=None,
            capacity=num_samples_per_epoch)
        index_dequeue_op = index_queue.dequeue_many(num_samples_per_epoch,
                                                    'index_dequeue')

        # then create a queue for [image_path,label]
        input_queue = data_flow_ops.FIFOQueue(
            capacity=num_samples_per_epoch * 2,
            dtypes=[tf.string, tf.int32, tf.int32],
            shapes=[(1, ), (1, ), (1, )],
            shared_name=None,
            name=None)
        image_paths_placeholder = tf.placeholder(tf.string,
                                                 shape=(None, 1),
                                                 name='image_paths')
        labels_placeholder = tf.placeholder(tf.int32,
                                            shape=(None, 1),
                                            name='labels')
        control_placeholder = tf.placeholder(tf.int32,
                                             shape=(None, 1),
                                             name='control_value')
        enqueue_op = input_queue.enqueue_many(
            [image_paths_placeholder, labels_placeholder, control_placeholder],
            name='enqueue_op')
        data, labels = train_data_generator.create_input_pipeline(
            input_queue, (image_size, image_size), nrof_preprocess_threads,
            batch_size * num_gpus)

        # we split the large batch into sub-batches to be distributed onto each gpu
        split_data = tf.split(data, num_gpus, 0)
        split_labels = tf.split(labels, num_gpus, 0)

        learning_rate_placeholder = tf.placeholder(tf.float32,
                                                   name='learning_rate')
        global_step = tf.placeholder(tf.int32, name='global_step')
        learning_rate_op = tf.train.exponential_decay(
            learning_rate_placeholder,
            global_step,
            learning_rate_decay_epochs * steps_per_epoch,
            learning_rate_decay_factor,
            staircase=True)

        if opt == 'ADAGRAD':
            optimizer = tf.train.AdagradOptimizer(learning_rate_op)
        elif opt == 'ADADELTA':
            optimizer = tf.train.AdadeltaOptimizer(learning_rate_op,
                                                   rho=0.9,
                                                   epsilon=1e-6)
        elif opt == 'ADAM':
            optimizer = tf.train.AdamOptimizer(learning_rate_op,
                                               beta1=0.9,
                                               beta2=0.999,
                                               epsilon=0.1)
        elif opt == 'RMSPROP':
            optimizer = tf.train.RMSPropOptimizer(learning_rate_op,
                                                  decay=0.9,
                                                  momentum=0.9,
                                                  epsilon=1.0)
        else:
            optimizer = tf.train.MomentumOptimizer(learning_rate_op,
                                                   0.9,
                                                   use_nesterov=True)

        # setup one model replica per gpu to compute loss and gradient
        replica_grads = []
        with tf.variable_scope(tf.get_variable_scope()):
            for i in range(num_gpus):
                with tf.name_scope('tower_%d' % i), tf.device('/gpu:%d' % i):
                    model = build_model(split_data[i], split_labels[i],
                                        nrof_classes, 'tower_%d' % i)
                    loss = model["loss"]
                    tf.get_variable_scope().reuse_variables()
                    grads = optimizer.compute_gradients(loss)
                    replica_grads.append(grads)

        # We must calculate the mean of each gradient.
        average_grad = L.average_gradients(replica_grads)
        grad_step = optimizer.apply_gradients(average_grad)
        train_step = tf.group(grad_step)
        init = tf.global_variables_initializer()
        init2 = tf.local_variables_initializer()

    # ==================
    # run training graph
    # ==================
    config_proto = tf.ConfigProto(allow_soft_placement=True)
    sess = tf.Session(graph=G, config=config_proto)
    sess.run([init, init2])
    summary_writer = tf.summary.FileWriter('log', sess.graph)
    coord = tf.train.Coordinator()
    tf.train.start_queue_runners(coord=coord, sess=sess)

    with sess.as_default():
        if pretrained_weights:
            print("-- loading weights from %s" % pretrained_weights)
            if 'npz' in pretrained_weights:
                tools.load_weights_npz(G, pretrained_weights)
            elif 'npy' in pretrained_weights:
                tools.load_weights_npy(pretrained_weights, sess, G)
            else:
                print 'unknow file, skip load weights'

        for step in range(num_steps):
            if step % steps_per_epoch == 0:
                # after a epoch, fetch datas, THIS pipeline performance is not so satisfying for me
                # MAY BE WILL replace late :(
                print('Loading data ...', num_steps)
                index_epoch = sess.run(index_dequeue_op)
                label_epoch = np.array(label_list)[index_epoch]
                image_epoch = np.array(image_list)[index_epoch]
                # Enqueue one epoch of image paths and labels
                labels_array = np.expand_dims(np.array(label_epoch), 1)
                image_paths_array = np.expand_dims(np.array(image_epoch), 1)
                control_value = train_data_generator.RANDOM_ROTATE * config[
                    'random_rotate'] + train_data_generator.RANDOM_CROP * config[
                        'random_crop'] + train_data_generator.RANDOM_FLIP * config[
                            'random_flip'] + train_data_generator.FIXED_STANDARDIZATION * config[
                                'use_fixed_image_standardization']
                control_array = np.ones_like(labels_array) * control_value
                # after enqueue_op, input is send to tower
                sess.run(
                    enqueue_op, {
                        image_paths_placeholder: image_paths_array,
                        labels_placeholder: labels_array,
                        control_placeholder: control_array
                    })
                print('reading end')

            inputs = {
                learning_rate_placeholder: learning_rate,
                global_step: step
            }
            results = sess.run([train_step, loss, learning_rate_op], inputs)
            print("step:%s loss:%s lr:%s" % (step, results[1], results[2]))

            if (step % (steps_per_epoch * checkpoint_iter) == 0
                    and step != 0) or (step + 1 == num_steps):
                print("-- saving check point")
                tools.save_weights(
                    G, pth.join(checkpoint_dir, "weights_%s" % step))
Example #11
0
def train_net(config):
    # UNPACK CONFIGS
    (flag_para_load, train_filenames, val_filenames,
     train_labels, val_labels, img_mean) = unpack_configs(config)
    if flag_para_load:
        #  zmq set up
        sock = zmq.Context().socket(zmq.PAIR)
        sock.connect('tcp://*****:*****@ iter = %i" % (num_iter))
                logger.info("training cost: %lf" % (cost_ij))
                if config['print_train_error']:
                    logger.info('training error rate: %lf' % train_error())

            if flag_para_load and (count < len(minibatch_range)):
                load_send_queue.put('calc_finished')

        ############### Test on Validation Set ##################

        #"""
        DropoutLayer.SetDropoutOff()

        result_list = get_val_error_loss(
            rand_arr, shared_x, shared_y,
            val_filenames, val_labels,
            flag_para_load, img_mean,
            batch_size, validate_model,
            send_queue=load_send_queue,
            recv_queue=load_recv_queue,
            flag_top_5=flag_top5)


        logger.info(('epoch %i: validation loss %f ' %
              (epoch, result_list[-1])))

        if flag_top5:
            logger.info(('epoch %i: validation error (top 1) %f %%, (top5) %f %%' %
                (epoch,  result_list[0] * 100., result_list[1] * 100.)))
        else:
            logger.info(('epoch %i: validation error %f %%' %
                (epoch, result_list[0] * 100.)))

        val_record.append(result_list)
        np.save(config['weights_dir'] + 'val_record.npy', val_record)

        DropoutLayer.SetDropoutOn()
        ############################################

        # Adapt Learning Rate
        step_idx = adjust_learning_rate(config, epoch, step_idx,
                                        val_record, learning_rate)

        # Save weights
        if epoch % config['snapshot_freq'] == 0:
            save_weights(layers, config['weights_dir'], epoch)
            np.save(config['weights_dir'] + 'lr_' + str(epoch) + '.npy',
                       learning_rate.get_value())
            save_momentums(vels, config['weights_dir'], epoch)
        #"""

    print('Optimization complete.')
Example #12
0
def train_net(config, private_config):

    # UNPACK CONFIGS
    (flag_para_load, flag_datalayer, train_filenames, val_filenames,
     train_labels, val_labels, img_mean) = \
        unpack_configs(config, ext_data=private_config['ext_data'],
                       ext_label=private_config['ext_label'])

    gpu_send_queue = private_config['queue_gpu_send']
    gpu_recv_queue = private_config['queue_gpu_recv']

    # pycuda and zmq set up
    drv.init()
    dev = drv.Device(int(private_config['gpu'][-1]))
    ctx = dev.make_context()

    sock_gpu = zmq.Context().socket(zmq.PAIR)
    if private_config['flag_client']:
        sock_gpu.connect('tcp://*****:*****@ iter = ', num_iter
                    print 'training cost:', cost_ij

                if config['print_train_error']:
                    error_ij = train_error()

                    gpu_send_queue.put(error_ij)
                    that_error = gpu_recv_queue.get()
                    error_ij = (error_ij + that_error) / 2.

                    if private_config['flag_verbose']:
                        print 'training error rate:', error_ij

            if flag_para_load and (count < len(minibatch_range)):
                load_send_queue.put('calc_finished')

        ############### Test on Validation Set ##################

        DropoutLayer.SetDropoutOff()

        this_val_error, this_val_loss = get_val_error_loss(
            rand_arr,
            shared_x,
            shared_y,
            val_filenames,
            val_labels,
            flag_datalayer,
            flag_para_load,
            batch_size,
            validate_model,
            send_queue=load_send_queue,
            recv_queue=load_recv_queue)

        # report validation stats
        gpu_send_queue.put(this_val_error)
        that_val_error = gpu_recv_queue.get()
        this_val_error = (this_val_error + that_val_error) / 2.

        gpu_send_queue.put(this_val_loss)
        that_val_loss = gpu_recv_queue.get()
        this_val_loss = (this_val_loss + that_val_loss) / 2.

        if private_config['flag_verbose']:
            print('epoch %i: validation loss %f ' % (epoch, this_val_loss))
            print('epoch %i: validation error %f %%' %
                  (epoch, this_val_error * 100.))
        val_record.append([this_val_error, this_val_loss])

        if private_config['flag_save']:
            np.save(config['weights_dir'] + 'val_record.npy', val_record)

        DropoutLayer.SetDropoutOn()
        ############################################

        # Adapt Learning Rate
        step_idx = adjust_learning_rate(config, epoch, step_idx, val_record,
                                        learning_rate)

        # Save Weights, only one of them will do
        if private_config['flag_save']:
            if epoch % config['snapshot_freq'] == 0:
                save_weights(layers, config['weights_dir'], epoch)
                np.save(config['weights_dir'] + 'lr_' + str(epoch) + '.npy',
                        learning_rate.get_value())
                save_momentums(vels, config['weights_dir'], epoch)

    print('Optimization complete.')
Example #13
0
def train_net(config):
    # UNPACK CONFIGS
    (flag_para_load, train_filenames, val_filenames,
     train_labels, val_labels, img_mean) = unpack_configs(config)
    if flag_para_load:
        #  zmq set up
        sock = zmq.Context().socket(zmq.PAIR)
        sock.connect('tcp://*****:*****@iter " + str(count)
            if count == 1:
                s = time.time()
            if count == 20:
                e = time.time()
                print "time per 20 iter:", (e - s)
                logger.info("time per 20 iter: %f" % (e - s)) 
            cost_ij = train_model_wrap(train_model, shared_x,
                                       shared_y, rand_arr, img_mean,
                                       count, minibatch_index,
                                       minibatch_range, batch_size,
                                       train_filenames, train_labels,
                                       flag_para_load,
                                       config['batch_crop_mirror'],
                                       send_queue=load_send_queue,
                                       recv_queue=load_recv_queue)

            if num_iter % config['print_freq'] == 0:
                #print 'training @ iter = ', num_iter
                #print 'training cost:', cost_ij
		logger.info("training @ iter = %i" % (num_iter)) 
		logger.info("training cost: %lf" % (cost_ij)) 
                if config['print_train_error']:
                    logger.info('training error rate: %lf' % train_error())
                    #print 'training error rate:', train_error()

            if flag_para_load and (count < len(minibatch_range)):
                load_send_queue.put('calc_finished')

        ############### Test on Validation Set ##################

        #"""
        DropoutLayer.SetDropoutOff()

        # result_list = [ this_validation_error, this_validation_error_top5, this_validation_loss ]
        # or
        # result_list = [ this_validation_error, this_validation_loss ]
        result_list = get_val_error_loss(
        #this_validation_error, this_validation_loss = get_val_error_loss(
            rand_arr, shared_x, shared_y,
            val_filenames, val_labels,
            flag_para_load, img_mean,
            batch_size, validate_model,
            send_queue=load_send_queue, 
            recv_queue=load_recv_queue,
            flag_top_5=flag_top5)


        logger.info(('epoch %i: validation loss %f ' %
              (epoch, result_list[-1])))
        #print('epoch %i: validation loss %f ' %
        #      (epoch, this_validation_loss))
        if flag_top5:
            logger.info(('epoch %i: validation error (top 1) %f %%, (top5) %f %%' %
                (epoch,  result_list[0] * 100., result_list[1] * 100.)))
        else:
            logger.info(('epoch %i: validation error %f %%' %
                (epoch, result_list[0] * 100.)))
        #print('epoch %i: validation error %f %%' %
        #      (epoch, this_validation_error * 100.))
        val_record.append(result_list)
        #val_record.append([this_validation_error, this_validation_loss])
        np.save(config['weights_dir'] + 'val_record.npy', val_record)

        DropoutLayer.SetDropoutOn()
        ############################################

        # Adapt Learning Rate
        step_idx = adjust_learning_rate(config, epoch, step_idx,
                                        val_record, learning_rate)

        # Save weights
        if epoch % config['snapshot_freq'] == 0:
            save_weights(layers, config['weights_dir'], epoch)
            np.save(config['weights_dir'] + 'lr_' + str(epoch) + '.npy',
                       learning_rate.get_value())
            save_momentums(vels, config['weights_dir'], epoch)
        #"""

    print('Optimization complete.')
Example #14
0
def train_net(config, private_config):

    # UNPACK CONFIGS
    (flag_para_load, train_filenames, val_filenames,
     train_labels, val_labels, img_mean) = \
        unpack_configs(config, ext_data=private_config['ext_data'],
                       ext_label=private_config['ext_label'])


    gpu_send_queue = private_config['queue_gpu_send']
    gpu_recv_queue = private_config['queue_gpu_recv']

    # pycuda and zmq set up
    drv.init()
    dev = drv.Device(int(private_config['gpu'][-1]))
    ctx = dev.make_context()

    sock_gpu = zmq.Context().socket(zmq.PAIR)
    if private_config['flag_client']:
        sock_gpu.connect('tcp://*****:*****@ iter = ', num_iter
                    log_iter.write("%d\n" % num_iter)
                    log_iter.flush()
                    print 'training cost:', cost_ij
                    log_err_cost.write("%f\n" % cost_ij)
                    log_err_cost.flush()

                if config['print_train_error']:
                    error_ij = train_error()

                    gpu_send_queue.put(error_ij)
                    that_error = gpu_recv_queue.get()
                    error_ij = (error_ij + that_error) / 2.

                    if private_config['flag_verbose']:
                        print 'training error rate:', error_ij
                        log_err_rate.write("%f\n" % error_ij)
                        log_err_rate.flush()


            if flag_para_load and (count < len(minibatch_range)):
                load_send_queue.put('calc_finished')

            if count%20 == 0:
                e = time.time()
                print "time per 20 iter:", (e - s)
                
        ############### Test on Validation Set ##################

        DropoutLayer.SetDropoutOff()

        this_val_error, this_val_loss = get_val_error_loss(
            rand_arr, shared_x, shared_y,
            val_filenames, val_labels,
            flag_para_load, img_mean,
            batch_size, validate_model,
            send_queue=load_send_queue, recv_queue=load_recv_queue)

        # report validation stats
        gpu_send_queue.put(this_val_error)
        that_val_error = gpu_recv_queue.get()
        this_val_error = (this_val_error + that_val_error) / 2.

        gpu_send_queue.put(this_val_loss)
        that_val_loss = gpu_recv_queue.get()
        this_val_loss = (this_val_loss + that_val_loss) / 2.

        if private_config['flag_verbose']:
            print('epoch %i: validation loss %f ' %
                  (epoch, this_val_loss))
            print('epoch %i: validation error %f %%' %
                  (epoch, this_val_error * 100.))
        val_record.append([this_val_error, this_val_loss])

        if private_config['flag_save']:
            np.save(config['weights_dir'] + 'val_record.npy', val_record)
            np.savetxt(config['weights_dir'] + 'val_record_txt.txt', val_record)

        DropoutLayer.SetDropoutOn()
        ############################################

        # Adapt Learning Rate
        step_idx = adjust_learning_rate(config, epoch, step_idx,
                                        val_record, learning_rate)

        # Save Weights, only one of them will do
        if private_config['flag_save']:
            if epoch % config['snapshot_freq'] == 0:
                save_weights(layers, config['weights_dir'], epoch)
                np.save(config['weights_dir'] + 'lr_' + str(epoch) + '.npy',
                        learning_rate.get_value())
                save_momentums(vels, config['weights_dir'], epoch)

    print('Optimization complete.')
Example #15
0
def train_net(config, private_config):

    # UNPACK CONFIGS
    (train_videos_spatial_jhmdb,val_videos_spatial_jhmdb,train_videos_temporal_jhmdb,val_videos_temporal_jhmdb,
     train_targets,val_targets,
           train_labels_jhmdb,val_labels_jhmdb) = unpack_configs_jhmdb(config,gpu_id=private_config['gpu_id'])
    # print('val_len',len(val_videos_spatial_jhmdb),'train_len',len(train_videos_spatial_jhmdb))
    if config['modal']=='rgb':
        train_videos = list(train_videos_spatial_jhmdb)
        test_videos = list(val_videos_spatial_jhmdb)
    else:
        train_videos = list(train_videos_temporal_jhmdb)
        test_videos = list(val_videos_temporal_jhmdb)
    print('jhmdb_len',len(train_videos),len(train_labels_jhmdb))#,len(tr_video_length_jhmdb))
    flag_para_load =config['para_load']
    gpu_send_queue = private_config['queue_gpu_send']
    gpu_recv_queue = private_config['queue_gpu_recv']

    # pycuda and zmq set up
    drv.init()
    dev = drv.Device(int(private_config['gpu'][-1]))
    ctx = dev.make_context()

    sock_gpu = zmq.Context().socket(zmq.PAIR)
    if private_config['flag_client']:
        sock_gpu.connect('tcp://*****:*****@ iter = ', num_iter
                        print 'training cost:', cost_ij,'cost_nll:',cost_nll,'cost_attention:',cost_att

                    if config['print_train_error']:
                        error_ij = train_error()

                        gpu_send_queue.put(error_ij)
                        that_error = gpu_recv_queue.get()
                        error_ij = (error_ij + that_error) / 2.

                        if private_config['flag_verbose']:
                            print 'training error rate:', error_ij

                if flag_para_load and (count < len(minibatch_range)):
                    load_send_queue.put('calc_finished')

                if count%20 == 0:
                    e = time.time()
                    print "time per 20 iter:", (e - s)
            # ############### Test on Validation Set ##################
            DropoutLayer.SetDropoutOff()
            this_val_error, this_val_loss = get_test_error(config,
                 shared_x, shared_mask, shared_y,shared_target,shared_use_noise,
                 shared_conv,test_videos,  val_labels_jhmdb,
                flag_para_load,
                batch_size,num_seq, validate_model_lstm,train_model,
                send_queue=load_send_queue, recv_queue=load_recv_queue)

            # report validation stats
            gpu_send_queue.put(this_val_error)
            that_val_error = gpu_recv_queue.get()
            this_val_error = (this_val_error + that_val_error) / 2.

            gpu_send_queue.put(this_val_loss)
            that_val_loss = gpu_recv_queue.get()
            this_val_loss = (this_val_loss + that_val_loss) / 2.

            if private_config['flag_verbose']:
                print('epoch %i: test loss of jhmdb %f ' %
                      (epoch, this_val_loss))
                print('epoch %i: test error of jhmdb %f %%' %
                      (epoch, this_val_error * 100.))
            val_record.append([this_val_error, this_val_loss])
            if private_config['flag_save']:
                np.save(config['weights_dir'] + 'test_record_jhmdb.npy', val_record)

            DropoutLayer.SetDropoutOn()
            ###########################################
            # Adapt Learning Rate
            step_idx = adjust_learning_rate(config, epoch, step_idx,
                                            val_record, learning_rate)
            # Save Weights, only one of them will do
            if private_config['flag_save'] :
                if epoch % config['snapshot_freq'] == 0:
                    save_weights(layers, config['weights_dir'], epoch)
                    np.save(config['weights_dir'] + 'lr_' + str(epoch) + '.npy',
                            learning_rate.get_value())
                    save_momentums(vels, config['weights_dir'], epoch)
        print('Optimization complete.')
Example #16
0
def train(trn_data_generator, vld_data=None):
    learning_rate = config['learning_rate']
    experiment_dir = config['experiment_dir']
    data_dims = config['data_dims']
    batch_size = config['batch_size']
    num_epochs = config['num_epochs']
    num_samples_per_epoch = config["num_samples_per_epoch"]
    steps_per_epoch = num_samples_per_epoch // batch_size
    num_steps = steps_per_epoch * num_epochs
    checkpoint_dir = pth.join(experiment_dir, 'checkpoints')
    train_log_fpath = pth.join(experiment_dir, 'train.log')
    vld_iter = config["vld_iter"]
    checkpoint_iter = config["checkpoint_iter"]
    pretrained_weights = config.get("pretrained_weights", None)

    # ========================
    # construct training graph
    # ========================
    G = tf.Graph()
    with G.as_default():
        input_data_tensor = tf.placeholder(tf.float32, [None] + data_dims)
        input_label_tensor = tf.placeholder(tf.int32, [None])
        model = build_model(input_data_tensor, input_label_tensor)
        optimizer = tf.train.AdamOptimizer(learning_rate)
        grads = optimizer.compute_gradients(model["loss"])
        grad_step = optimizer.apply_gradients(grads)
        init = tf.initialize_all_variables()


    # ===================================
    # initialize and run training session
    # ===================================
    log = tools.StatLogger(train_log_fpath)
    config_proto = tf.ConfigProto(allow_soft_placement=True)
    sess = tf.Session(graph=G, config=config_proto)
    sess.run(init)
    tf.train.start_queue_runners(sess=sess)
    with sess.as_default():
        if pretrained_weights:
            print("-- loading weights from %s" % pretrained_weights)
            tools.load_weights(G, pretrained_weights)


        # Start training loop
        for step in range(num_steps):
            batch_train = list(trn_data_generator.__next__())
            #print(np.array(list(batch_train)[1]).shape)
            X_trn = np.array(batch_train[0])
            Y_trn = np.array(batch_train[1])

            ops = [grad_step] + [model[k] for k in sorted(model.keys())]
            inputs = {input_data_tensor: X_trn, input_label_tensor: Y_trn}
            results = sess.run(ops, feed_dict=inputs)
            results = dict(zip(sorted(model.keys()), results[1:]))
            print("TRN step:%-5d error_top1: %.4f, error_top5: %.4f, loss:%s" % (step,
                                                                                 results["error_top1"],
                                                                                 results["error_top5"],
                                                                                 results["loss"]))
            with open("loss.log", "w+") as f:
                f.write("{}\n".format(results["loss"]))
            
            log.report(step=step,
                       split="TRN",
                       error_top5=float(results["error_top5"]),
                       error_top1=float(results["error_top5"]),
                       loss=float(results["loss"]))

            # report evaluation metrics every 10 training steps
            if (step % vld_iter == 0):
                print("-- running evaluation on vld split")
                X_vld = vld_data[0]
                Y_vld = vld_data[1]
                inputs = [input_data_tensor, input_label_tensor]
                args = [X_vld, Y_vld]
                ops = [model[k] for k in sorted(model.keys())]
                results = tools.iterative_reduce(ops, inputs, args, batch_size=1, fn=lambda x: np.mean(x, axis=0))
                results = dict(zip(sorted(model.keys()), results))
                print("VLD step:%-5d error_top1: %.4f, error_top5: %.4f, loss:%s" % (step,
                                                                                     results["error_top1"],
                                                                                     results["error_top5"],
                                                                                     results["loss"]))
                log.report(step=step,
                           split="VLD",
                           error_top5=float(results["error_top5"]),
                           error_top1=float(results["error_top1"]),
                           loss=float(results["loss"]))

            if (step % checkpoint_iter == 0) or (step + 1 == num_steps):
                print("-- saving check point")
                tools.save_weights(G, pth.join(checkpoint_dir, "weights.%s" % step))
Example #17
0
def train_net(config):

    # UNPACK CONFIGS
    (flag_para_load, train_filenames, val_filenames,
     train_labels, val_labels, img_mean) = unpack_configs(config)

    # pycuda set up
    drv.init()
    dev = drv.Device(int(config['gpu'][-1]))
    ctx = dev.make_context()
    
    if flag_para_load:
        #  zmq set up
        sock = zmq.Context().socket(zmq.PAIR)
        sock.connect('tcp://*****:*****@ iter = ', num_iter
                print 'training cost:', cost_ij
                if config['print_train_error']:
                    print 'training error rate:', train_error()

            if flag_para_load and (count < len(minibatch_range)):
                load_send_queue.put('calc_finished')

        ############### Test on Validation Set ##################

        DropoutLayer.SetDropoutOff()

        this_validation_error, this_validation_loss = get_val_error_loss(
            rand_arr, shared_x, shared_y,
            val_filenames, val_labels,
            flag_para_load, img_mean,
            batch_size, validate_model,
            send_queue=load_send_queue, recv_queue=load_recv_queue)


        print('epoch %i: validation loss %f ' %
              (epoch, this_validation_loss))
        print('epoch %i: validation error %f %%' %
              (epoch, this_validation_error * 100.))
        val_record.append([this_validation_error, this_validation_loss])
        np.save(config['weights_dir'] + 'val_record.npy', val_record)

        DropoutLayer.SetDropoutOn()
        ############################################

        # Adapt Learning Rate
        step_idx = adjust_learning_rate(config, epoch, step_idx,
                                        val_record, learning_rate)

        # Save weights
        if epoch % config['snapshot_freq'] == 0:
            save_weights(layers, config['weights_dir'], epoch)
            np.save(config['weights_dir'] + 'lr_' + str(epoch) + '.npy',
                       learning_rate.get_value())
            save_momentums(vels, config['weights_dir'], epoch)

    print('Optimization complete.')
Example #18
0
def train_net(config):

    # UNPACK CONFIGS
    (flag_para_load, train_filenames, val_filenames, train_labels, val_labels,
     img_mean) = unpack_configs(config)

    # pycuda set up
    drv.init()
    dev = drv.Device(int(config['gpu'][-1]))
    ctx = dev.make_context()

    if flag_para_load:
        #  zmq set up
        sock = zmq.Context().socket(zmq.PAIR)
        sock.connect('tcp://*****:*****@ iter = ', num_iter
                print 'training cost:', cost_ij
                if config['print_train_error']:
                    print 'training error rate:', train_error()

            if flag_para_load and (count < len(minibatch_range)):
                load_send_queue.put('calc_finished')

        ############### Test on Validation Set ##################

        DropoutLayer.SetDropoutOff()

        this_validation_error, this_validation_loss = get_val_error_loss(
            rand_arr,
            shared_x,
            shared_y,
            val_filenames,
            val_labels,
            flag_para_load,
            img_mean,
            batch_size,
            validate_model,
            send_queue=load_send_queue,
            recv_queue=load_recv_queue)

        print('epoch %i: validation loss %f ' % (epoch, this_validation_loss))
        print('epoch %i: validation error %f %%' %
              (epoch, this_validation_error * 100.))
        val_record.append([this_validation_error, this_validation_loss])
        np.save(config['weights_dir'] + 'val_record.npy', val_record)

        DropoutLayer.SetDropoutOn()
        ############################################

        # Adapt Learning Rate
        step_idx = adjust_learning_rate(config, epoch, step_idx, val_record,
                                        learning_rate)

        # Save weights
        if epoch % config['snapshot_freq'] == 0:
            save_weights(layers, config['weights_dir'], epoch)
            np.save(config['weights_dir'] + 'lr_' + str(epoch) + '.npy',
                    learning_rate.get_value())
            save_momentums(vels, config['weights_dir'], epoch)

    print('Optimization complete.')
def train(trn_data_generator, vld_data=None):
    learning_rate = config['learning_rate']
    experiment_dir = config['experiment_dir']
    data_dims = config['data_dims']
    batch_size = config['batch_size']
    num_epochs = config['num_epochs']
    num_samples_per_epoch = config["num_samples_per_epoch"]
    steps_per_epoch = num_samples_per_epoch // batch_size
    num_steps = steps_per_epoch * num_epochs
    checkpoint_dir = pth.join(experiment_dir, 'checkpoints')
    train_log_fpath = pth.join(experiment_dir, 'train.log')
    vld_iter = config["vld_iter"]
    checkpoint_iter = config["checkpoint_iter"]
    pretrained_weights = config.get("pretrained_weights", None)

    # ========================
    # construct training graph
    # ========================
    G = tf.Graph()
    with G.as_default():
        input_data_tensor = tf.placeholder(tf.float32, [None] + data_dims)
        input_label_tensor = tf.placeholder(tf.int32, [None])
        model = build_model(input_data_tensor, input_label_tensor)
        optimizer = tf.train.AdamOptimizer(learning_rate)
        grads = optimizer.compute_gradients(model["loss"])
        grad_step = optimizer.apply_gradients(grads)
        init = tf.initialize_all_variables()

    # ===================================
    # initialize and run training session
    # ===================================

    config_proto = tf.ConfigProto(allow_soft_placement=True)
    sess = tf.Session(graph=G, config=config_proto)

    run_metadata = tf.RunMetadata()
    options = tf.RunOptions(trace_level=tf.RunOptions.SOFTWARE_TRACE)

    def profile(run_metadata, epoch=0):
        with open('profs/timeline_step' + str(epoch) + '.json', 'w') as f:
            # Create the Timeline object, and write it to a json file
            fetched_timeline = timeline.Timeline(run_metadata.step_stats)
            chrome_trace = fetched_timeline.generate_chrome_trace_format()
            f.write(chrome_trace)

    sess.run(init, run_metadata=run_metadata, options=options)
    profile(run_metadata, -1)

    tf.train.start_queue_runners(sess=sess)
    with sess.as_default():
        if pretrained_weights:
            print("-- loading weights from %s" % pretrained_weights)
            tools.load_weights(G, pretrained_weights)

        # Start training loop
        num_steps = 50
        for step in range(num_steps):
            batch_train = trn_data_generator.next()
            X_trn = np.array(batch_train[0])
            Y_trn = np.array(batch_train[1])

            ops = [grad_step] + [model[k] for k in sorted(model.keys())]
            inputs = {input_data_tensor: X_trn, input_label_tensor: Y_trn}
            start_time = time.time()
            results = sess.run(ops,
                               feed_dict=inputs,
                               run_metadata=run_metadata,
                               options=options)
            elapsed = time.time() - start_time

            samples_p_second = 0
            print("Ex/sec: %s" % str(float(batch_size) / float(elapsed)))
            profile(run_metadata, step)

            results = dict(zip(sorted(model.keys()), results[1:]))
            print("TRN step:%-5d error_top1: %.4f, error_top5: %.4f, loss:%s" %
                  (step, results["error_top1"], results["error_top5"],
                   results["loss"]))

            # report evaluation metrics every 10 training steps
            if (step % vld_iter == 0):
                print("-- running evaluation on vld split")
                X_vld = vld_data[0]
                Y_vld = vld_data[1]
                inputs = [input_data_tensor, input_label_tensor]
                args = [X_vld, Y_vld]
                ops = [model[k] for k in sorted(model.keys())]
                results = tools.iterative_reduce(
                    ops,
                    inputs,
                    args,
                    batch_size=1,
                    fn=lambda x: np.mean(x, axis=0))
                results = dict(zip(sorted(model.keys()), results))
                print(
                    "VLD step:%-5d error_top1: %.4f, error_top5: %.4f, loss:%s"
                    % (step, results["error_top1"], results["error_top5"],
                       results["loss"]))

            if (step % checkpoint_iter == 0) or (step + 1 == num_steps):
                print("-- saving check point")
                tools.save_weights(
                    G, pth.join(checkpoint_dir, "weights.%s" % step))
    options = tf.profiler.ProfileOptionBuilder.time_and_memory()
    options["min_bytes"] = 0
    options["min_micros"] = 0
    options["output"] = 'file:outfile=ooo.txt'
    options["select"] = ("bytes", "peak_bytes", "output_bytes",
                         "residual_bytes")
    mem = tf.profiler.profile(tf.Graph(),
                              run_meta=run_metadata,
                              cmd="scope",
                              options=options)
    with open('profs/mem.txt', 'w') as f:
        f.write(str(mem))

    operations_tensors = {}
    operations_names = G.get_operations()
    count1 = 0
    count2 = 0
    print(operations_names)
    for operation in operations_names:
        operation_name = operation.name
        operations_info = tf.get_default_graph().get_operation_by_name(
            operation_name).values()
        if len(operations_info) > 0:
            if not (operations_info[0].shape.ndims is None):
                operation_shape = operations_info[0].shape.as_list()
                operation_dtype_size = operations_info[0].dtype.size
                if not (operation_dtype_size is None):
                    operation_no_of_elements = 1
                    for dim in operation_shape:
                        if not (dim is None):
                            operation_no_of_elements = operation_no_of_elements * dim
                    total_size = operation_no_of_elements * operation_dtype_size
                    operations_tensors[operation_name] = total_size
                else:
                    count1 = count1 + 1
            else:
                count1 = count1 + 1
                operations_tensors[operation_name] = -1

            #   print('no shape_1: ' + operation_name)
            #  print('no shape_2: ' + str(operations_info))
            #  operation_namee = operation_name + ':0'
            # tensor = tf.get_default_graph().get_tensor_by_name(operation_namee)
            # print('no shape_3:' + str(tf.shape(tensor)))
            # print('no shape:' + str(tensor.get_shape()))

        else:
            # print('no info :' + operation_name)
            # operation_namee = operation.name + ':0'
            count2 = count2 + 1
            operations_tensors[operation_name] = -1

            # try:
            #   tensor = tf.get_default_graph().get_tensor_by_name(operation_namee)
            # print(tensor)
            # print(tf.shape(tensor))
            # except:
            # print('no tensor: ' + operation_namee)
    print(count1)
    print(count2)

    with open('tensors_sz.json', 'w') as f:
        json.dump(operations_tensors, f)