Exemple #1
0
import model  # all model
#from scipy.misc import imsave
import numpy as np
#from scipy.ndimage.interpolation import rotate

if __name__ == '__main__':

    #print(sys.argv[0]) # input from terminal
    #print(sys.argv[1]) # input from terminal
    #print(sys.argv[2]) # input from terminal

    learning_rate = 0.001

    max_epoch = 150
    es = tg.EarlyStopper(max_epoch=max_epoch,
                         epoch_look_back=4,
                         percent_decrease=0)

    dataset = HVSMRdataset('./datasetHVSMR16Heart')
    assert dataset.AbleToRetrieveData(
    ), 'not able to locate the directory of dataset'
    dataset.InitDataset(splitRatio=1.0, shuffle=True)  # Take everything 100%
    X_ph = tf.placeholder('float32', [None, 181, 239, 165, 1])  #float32
    y_ph = tf.placeholder('uint8', [None, 181, 239, 165, 1])
    #X_ph = tf.placeholder('float32', [None, None, None, None, 1])  #float32
    #y_ph = tf.placeholder('uint8', [None, None, None, None, 1])

    y_ph_cat = tf.one_hot(
        y_ph,
        3)  # --> unstack into 3 categorical Tensor [?, 84, 256, 256, 1, 3]
    y_ph_cat = y_ph_cat[:, :, :, :, 0, :]
Exemple #2
0
def train():
    ### params
    sent_len = 50
    word_len = 20
    ch_embed_dim = 100
    unicode_size = 128
    tfidf_dim = 1000
    tfidf_embed_dim = 1000
    fc_dim = 1000
    batchsize = 32
    train_valid_ratio = [5, 1]
    learning_rate = 0.001
    # components = [len(np.unique(val)) for val in comps]
    # components = [65, 454, 983, 892, 242, 6]
    # components = [42]
    # components = [65]
    # num_train = 10000

    num_train = 10000
    components = [65]
    train_X, valid_X, train_ys, valid_ys = infodocs(num_train, word_len,
                                                    sent_len, components)

    #    num_train = 560000
    #    train_X, valid_X, train_ys, valid_ys, components = BASF_char(num_train,word_len, sent_len)

    #num_train=16000
    #components = [20]
    #train_X, valid_X, train_ys, valid_ys = twenty_newsgroup(num_train, word_len, sent_len, components, use_sean=True)
    #num_train = 20000
    #train_X, valid_X, train_ys, valid_ys, components = BASF_char(num_train,word_len, sent_len)
    print 'num train', len(train_X)
    print 'num valid', len(valid_X)

    trainset_X = SequentialIterator(train_X, batchsize=batchsize)
    trainset_y = SequentialIterator(*train_ys, batchsize=batchsize)
    validset_X = SequentialIterator(valid_X, batchsize=batchsize)
    validset_y = SequentialIterator(*valid_ys, batchsize=batchsize)
    ### define placeholders

    X_ph = tf.placeholder('int32', [None, sent_len, word_len])
    y_phs = []
    for comp in components:
        y_phs.append(tf.placeholder('float32', [None, comp]))

    ### define the graph model structure
    start = StartNode(input_vars=[X_ph])

    # character CNN
    embed_n = HiddenNode(prev=[start],
                         layers=[
                             Reshape(shape=(-1, word_len)),
                             Embedding(cat_dim=unicode_size,
                                       encode_dim=ch_embed_dim,
                                       zero_pad=True),
                             Reshape(shape=(-1, ch_embed_dim, word_len, 1))
                         ])

    h1, w1 = valid(ch_embed_dim,
                   word_len,
                   strides=(1, 1),
                   filters=(ch_embed_dim, 4))
    conv1_n = HiddenNode(prev=[embed_n],
                         layers=[
                             Conv2D(input_channels=1,
                                    num_filters=10,
                                    padding='VALID',
                                    kernel_size=(ch_embed_dim, 4),
                                    stride=(1, 1)),
                             RELU(),
                             Flatten(),
                             Linear(int(h1 * w1 * 10), 1000),
                             RELU(),
                             Reshape((-1, sent_len, 1000)),
                             ReduceSum(1),
                             BatchNormalization(layer_type='fc',
                                                dim=1000,
                                                short_memory=0.01)
                         ])

    # conv2_n = HiddenNode(prev=[embed_n], layers=[Conv2D(input_channels=1, num_filters=1, padding='VALID',
    #                                                   kernel_size=(ch_embed_dim,2), stride=(1,1)),
    #                                              Squeeze()])
    # h2, w2 = valid(ch_embed_dim, word_len, strides=(1,1), filters=(ch_embed_dim,2))

    # conv3_n = HiddenNode(prev=[embed_n], layers=[Conv2D(input_channels=1, num_filters=1, padding='VALID',
    #                                                   kernel_size=(ch_embed_dim,3), stride=(1,1)),
    #                                              Squeeze()])
    # h3, w3 = valid(ch_embed_dim, word_len, strides=(1,1), filters=(ch_embed_dim,3))
    #
    # conv4_n = HiddenNode(prev=[embed_n], layers=[Conv2D(input_channels=1, num_filters=1, padding='VALID',
    #                                                   kernel_size=(ch_embed_dim,4), stride=(1,1)),
    #                                              Squeeze()])
    # h4, w4 = valid(ch_embed_dim, word_len, strides=(1,1), filters=(ch_embed_dim,4))
    # concat_n = HiddenNode(prev=[conv1_n, conv2_n, conv3_n, conv4_n],
    #                       input_merge_mode=Concat(), layers=[RELU()])
    # concat_n = HiddenNode(prev=[conv1_n, conv2_n],
    #                       input_merge_mode=Concat(), layers=[RELU()])
    # fc_n = HiddenNode(prev=[concat_n], layers=[Linear(int(w1+w2), fc_dim), Sigmoid()])
    #
    # # TF-IDF Embedding
    # words_combined_layer = WordsCombined(this_dim=tfidf_dim, mode='sum')
    # words_combined_n = HiddenNode(prev=[fc_n],
    #                               layers=[Linear(prev_dim=fc_dim, this_dim=tfidf_dim), Sigmoid(),
    #                                       Reshape(shape=(-1, sent_len, tfidf_dim)),
    #                                       words_combined_layer,
    #                                       BatchNormalization(dim=tfidf_dim, layer_type='fc', short_memory=0.01)])

    out_n = HiddenNode(
        prev=[conv1_n],
        layers=[Linear(prev_dim=1000, this_dim=components[0]),
                Softmax()])

    # # hierachical softmax
    # prev_dim = components[0]
    # prev_node = HiddenNode(prev=[out_n], layers=[Linear(tfidf_embed_dim, prev_dim), Softmax()])
    # end_nodes = []
    # end_nodes.append(EndNode(prev=[prev_node]))
    # for this_dim in components[1:]:
    #     top_connect = HiddenNode(prev=[out_n], layers=[Linear(tfidf_embed_dim, prev_dim), Sigmoid()])
    #     prev_node = HiddenNode(prev=[prev_node, top_connect], layers=[Linear(prev_dim, this_dim), Softmax()])
    #     end_nodes.append(EndNode(prev=[prev_node]))
    #     prev_dim = this_dim
    end_nodes = [EndNode(prev=[out_n])]

    graph = Graph(start=[start], end=end_nodes)
    # import pdb; pdb.set_trace()

    train_outs_sb = graph.train_fprop()
    test_outs = graph.test_fprop()

    ttl_mse = []
    accus = []
    for y_ph, out in zip(y_phs, train_outs_sb):
        ttl_mse.append(tf.reduce_mean((y_ph - out)**2))
        pos = tf.reduce_sum((y_ph * out))
        accus.append(pos)

    mse = sum(ttl_mse)
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(mse)

    with tf.Session() as sess:
        init = tf.initialize_all_variables()
        sess.run(init)

        max_epoch = 50
        es = tg.EarlyStopper(max_epoch=max_epoch,
                             epoch_look_back=3,
                             percent_decrease=0.1)
        temp_acc = []

        for epoch in range(max_epoch):
            print 'epoch:', epoch
            train_error = 0
            train_accuracy = 0
            ttl_examples = 0
            for X_batch, ys in zip(trainset_X, trainset_y):
                feed_dict = {X_ph: X_batch[0]}
                for y_ph, y_batch in zip(y_phs, ys):
                    feed_dict[y_ph] = y_batch

                sess.run(optimizer, feed_dict=feed_dict)
                train_outs = sess.run(train_outs_sb, feed_dict=feed_dict)
                train_error += total_mse(train_outs, ys)[0]
                train_accuracy += total_accuracy(train_outs, ys)[0]
                ttl_examples += len(X_batch[0])
            print 'train mse', train_error / float(ttl_examples)
            print 'train accuracy', train_accuracy / float(ttl_examples)
            # print 'outputs'
            # ypred_train = sess.run(outs, feed_dict=feed_dict)
            #
            # print 'ypreds'
            # ypred = np.argmax(ypred_train[0],axis=1)
            # print ypred
            # print 'ylabels'
            # ylabel = np.argmax(ys[0],axis=1)
            # print ylabel
            # print 'mse'
            # print np.mean((ypred_train[0] - ys[0])**2)
            # for v in graph.variables:
            #     print v.name,
            #     print 'mean:', np.mean(np.abs(sess.run(v)))
            #     print 'std:', np.std(sess.run(v))
            #     print sess.run(v)
            # print '---------------------------------'
            # import pdb; pdb.set_trace()
            # ypreds = []
            # print 'words_combined_layer in',sess.run(tf.reduce_mean(words_combined_layer.train_in, reduction_indices=0), feed_dict=feed_dict)
            # print 'words_combined_layer out',sess.run(tf.reduce_mean(words_combined_layer.train_out, reduction_indices=0), feed_dict=feed_dict)
            # # for out in outs:
            #     ypreds.append(sess.run(out, feed_dict=feed_dict))
            # accus = []
            # for y_batch, ypred_batch in zip(ys, ypreds):
            # accu = accuracy_score(y_batch.argmax(axis=1), ypred_batch.argmax(axis=1))
            # accus.append(accu)
            # print accus

            # import pdb; pdb.set_trace()
            # train_error = sess.run(mse, feed_dict=feed_dict)
            # print 'train error:', train_error
            # for accu in accus:
            #     train_pos = sess.run(, feed_dict=feed_dict)

            # print sess.run(embed._W[0,:])
            #
            # print sess.run(embed.embedding[0,:])
            # print '--------------'
            # import pdb; pdb.set_trace()

            # train_error = sess.run(mse, feed_dict=feed_dict)
            # print 'train error:', train_error

            valid_error = 0
            valid_accuracy = 0
            ttl_examples = 0
            for X_batch, ys in zip(validset_X, validset_y):
                feed_dict = {X_ph: X_batch[0]}
                for y_ph, y_batch in zip(y_phs, ys):
                    feed_dict[y_ph] = y_batch

                valid_outs = sess.run(test_outs, feed_dict=feed_dict)
                valid_error += total_mse(valid_outs, ys)[0]
                valid_accuracy += total_accuracy(valid_outs, ys)[0]
                ttl_examples += len(X_batch[0])

            print 'valid mse', valid_error / float(ttl_examples)
            print 'valid accuracy', valid_accuracy / float(ttl_examples)
            temp_acc.append(valid_accuracy / float(ttl_examples))

        print 'average accuracy is:\t', sum(temp_acc) / len(temp_acc)
Exemple #3
0
def train(model,
          data,
          epoch_look_back=5,
          max_epoch=100,
          percent_decrease=0,
          batch_size=64,
          learning_rate=0.001,
          weight_regularize=True,
          save_dir=None,
          restore=False):

    if save_dir:
        logdir = '{}/log'.format(save_dir)
        if not os.path.exists(logdir):
            os.makedirs(logdir)
        model_dir = "{}/model".format(save_dir)
        if not os.path.exists(model_dir):
            os.makedirs(model_dir)

    train_tf, n_train, valid_tf, n_valid = data(create_tfrecords=True,
                                                batch_size=batch_size)

    y_train_sb = model._train_fprop(train_tf['X'])
    y_valid_sb = model._test_fprop(valid_tf['X'])

    loss_train_sb = tg.cost.mse(y_train_sb, train_tf['y'])

    if weight_regularize:
        loss_reg = tc.layers.apply_regularization(
            tc.layers.l2_regularizer(2.5e-5),
            weights_list=[
                var for var in tf.global_variables()
                if __MODEL_VARSCOPE__ in var.name
            ])
        loss_train_sb = loss_train_sb + loss_reg

    accu_train_sb = tg.cost.accuracy(y_train_sb, train_tf['y'])
    accu_valid_sb = tg.cost.accuracy(y_valid_sb, valid_tf['y'])

    tf.summary.scalar('train', accu_train_sb)

    if save_dir:
        sav_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                     scope=__MODEL_VARSCOPE__ +
                                     '/TemplateModel')
        saver = tf.train.Saver(sav_vars)

    # opt = tf.train.RMSPropOptimizer(learning_rate)
    opt = tf.train.AdamOptimizer(learning_rate)
    # opt = hvd.DistributedOptimizer(opt)

    # required for BatchNormalization layer
    update_ops = ops.get_collection(ops.GraphKeys.UPDATE_OPS)
    with ops.control_dependencies(update_ops):
        train_op = opt.minimize(loss_train_sb)

    init_op = tf.group(tf.global_variables_initializer(),
                       tf.local_variables_initializer())
    # bcast = hvd.broadcast_global_variables(0)

    # Pin GPU to be used to process local rank (one GPU per process)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    # config.gpu_options.visible_device_list = str(hvd.local_rank())

    with tf.Session(config=config) as sess:
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        sess.run(init_op)
        if restore:
            logger.info('restoring model')
            saver.restore(sess, restore)
        train_writer = tf.summary.FileWriter('{}/train'.format(logdir),
                                             sess.graph)
        # bcast.run()
        # merge = tf.summary.merge_all()
        es = tg.EarlyStopper(max_epoch, epoch_look_back, percent_decrease)
        epoch = 0
        best_valid_accu = 0
        while True:
            epoch += 1

            pbar = tg.ProgressBar(n_train)
            ttl_train_loss = 0
            for i in range(0, n_train, batch_size):
                pbar.update(i)
                _, loss_train = sess.run([train_op, loss_train_sb])
                # _, loss_train, merge_v = sess.run([train_op, loss_train_sb, merge])
                ttl_train_loss += loss_train * batch_size
                # train_writer.add_summary(merge_v, i)
            pbar.update(n_train)
            ttl_train_loss /= n_train
            print('')
            logger.info('epoch {}, train loss {}'.format(
                epoch, ttl_train_loss))

            pbar = tg.ProgressBar(n_valid)
            ttl_valid_accu = 0
            for i in range(0, n_valid, batch_size):
                pbar.update(i)
                loss_accu = sess.run(accu_valid_sb)
                ttl_valid_accu += loss_accu * batch_size
            pbar.update(n_valid)
            ttl_valid_accu /= n_valid
            print('')
            logger.info('epoch {}, valid accuracy {}'.format(
                epoch, ttl_valid_accu))
            if es.continue_learning(-ttl_valid_accu, epoch=epoch):
                logger.info('best epoch last update: {}'.format(
                    es.best_epoch_last_update))
                logger.info('best valid last update: {}'.format(
                    es.best_valid_last_update))

                if ttl_valid_accu > best_valid_accu:
                    best_valid_accu = ttl_valid_accu
                    if save_dir:
                        save_path = saver.save(sess, model_dir + '/model.tf')
                        print("Best model saved in file: %s" % save_path)

            else:
                logger.info('training done!')
                break

        coord.request_stop()
        coord.join(threads)
Exemple #4
0
def train(dt):

    batchsize = 32
    learning_rate = 0.005
    max_epoch = 1000
    epoch_look_back = 3
    percent_decrease = 0.0

    min_density = 0.01
    num_patch_per_img = 200
    threshold = 0.6

    # dt = datetime.now()
    # dt = dt.strftime('%Y%m%d_%H%M_%S%f')

    dt = './save/' + dt
    if not os.path.exists(dt):
        os.makedirs(dt)
    save_path = dt + '/model.tf'

    # blks_train, blks_valid, valid_paths = datablks(depth, height, width, batchsize, min_density, num_patch_per_img)

    train_iter, valid_iter, max_shape = fullimage()
    depth, height, width = max_shape
    X_ph = tf.placeholder('float32', [None, depth, height, width, 1])
    M_ph = tf.placeholder('float32', [None, depth, height, width, 1])

    seq = model()

    M_train_s = seq.train_fprop(X_ph)
    M_valid_s = seq.test_fprop(X_ph)

    train_cost = tf.reduce_mean((M_ph - M_train_s)**2)
    train_iou = iou(M_ph, tf.to_float(M_train_s > threshold))
    train_f1 = tg.cost.image_f1(M_ph, tf.to_float(M_train_s > threshold))
    # train_cost = iou(M_ph, M_train_s)
    valid_cost = tf.reduce_mean((M_ph - M_valid_s)**2)
    valid_iou = iou(M_ph, tf.to_float(M_valid_s > threshold))
    valid_f1 = tg.cost.image_f1(M_ph, tf.to_float(M_valid_s > threshold))

    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(train_cost)

    with tf.Session() as sess:
        saver = tf.train.Saver()
        init = tf.global_variables_initializer()
        sess.run(init)
        es = tg.EarlyStopper(max_epoch=max_epoch,
                             epoch_look_back=epoch_look_back,
                             percent_decrease=percent_decrease)
        for epoch in range(1, max_epoch):
            print('epoch:', epoch)
            print('..training')
            pbar = ProgressBar(len(train_iter))
            n_exp = 0
            train_mse_score = 0
            train_iou_score = 0
            train_f1_score = 0
            # for data_train in blks_train:
            # for X_batch, M_batch in blks_train:
            for X_batch, M_batch, shapes in train_iter:
                feed_dict = {X_ph: X_batch, M_ph: M_batch}
                import pdb
                pdb.set_trace()
                sess.run(optimizer, feed_dict=feed_dict)
                train_mse_score += sess.run(train_cost,
                                            feed_dict=feed_dict) * len(X_batch)
                train_iou_score += sess.run(train_iou,
                                            feed_dict=feed_dict) * len(X_batch)
                train_f1_score += sess.run(train_f1,
                                           feed_dict=feed_dict) * len(X_batch)
                n_exp += len(X_batch)
                pbar.update(n_exp)
            train_mse_score /= n_exp
            print('average patch train mse:', train_mse_score)
            train_iou_score /= n_exp
            print('average patch train iou:', train_iou_score)
            train_f1_score /= n_exp
            print('average patch train f1:', train_f1_score)

            print('..validating')
            pbar = ProgressBar(len(valid_iter))
            n_exp = 0
            valid_mse_score = 0
            # for data_valid in blks_valid:
            valid_f1_score = 0
            valid_iou_score = 0
            for X_batch, M_batch, shapes in valid_iter:
                feed_dict = {X_ph: X_batch, M_ph: M_batch}
                valid_mse_score += sess.run(valid_cost,
                                            feed_dict=feed_dict) * len(X_batch)
                valid_iou_score += sess.run(valid_iou,
                                            feed_dict=feed_dict) * len(X_batch)
                valid_f1_score += sess.run(valid_f1,
                                           feed_dict=feed_dict) * len(X_batch)
                n_exp += len(X_batch)
                pbar.update(n_exp)
            valid_mse_score /= n_exp
            print('average patch valid mse:', valid_mse_score)

            valid_iou_score /= n_exp
            print('average patch valid iou:', valid_iou_score)

            valid_f1_score /= n_exp
            print('average patch valid f1:', valid_f1_score)

            ############################[ Testing ]#############################
            # if epoch % 10 == 0:
            # print('full image testing')
            # test(valid_paths, depth, height, width, M_valid_s, sess, threshold)

            if es.continue_learning(valid_error=valid_mse_score):
                print('epoch', epoch)
                print('valid error so far:', valid_mse_score)
                print('best epoch last update:', es.best_epoch_last_update)
                print('best valid last update:', es.best_valid_last_update)
                saver.save(sess, save_path)
                print('model saved to:', save_path)

            else:
                print('training done!')
                break
Exemple #5
0
def train(modelclass, dt=None):

    batchsize = 64
    gen_learning_rate = 0.001
    dis_learning_rate = 0.001
    bottleneck_dim = 300

    max_epoch = 1000
    epoch_look_back = 3
    percent_decrease = 0
    noise_factor = 0.05
    max_outputs = 10

    noise_type = 'normal'

    print('gen_learning_rate:', gen_learning_rate)
    print('dis_learning_rate:', dis_learning_rate)
    print('noise_factor:', noise_factor)
    print('noise_type:', noise_type)


    if dt is None:
        timestamp = tg.utils.ts()
    else:
        timestamp = dt
    save_path = './save/{}/model'.format(timestamp)
    logdir = './log/{}'.format(timestamp)

    #X_train, y_train, X_valid, y_valid = Cifar10()
    X_train, y_train, X_valid, y_valid = data_char()
    _, h, w, c = X_train.shape
    _, nclass = y_train.shape
    # c = 1
    # train_embed, test_embed = text_embed(ch_embed_dim, sent_len, word_len)    
    
    data_train = tg.SequentialIterator(X_train, y_train, batchsize=batchsize)
    data_valid = tg.SequentialIterator(X_valid, y_valid, batchsize=batchsize)
    # gan = AuGan(h, w, nclass, bottleneck_dim)
    gan = getattr(model, modelclass)(h, w, c, nclass, bottleneck_dim)

    y_ph, noise_ph, G_train_sb, G_test_sb, gen_var_list = gan.generator()
    real_ph, real_train, real_valid, fake_train, fake_valid, dis_var_list = gan.discriminator()

    print('..using model:', gan.__class__.__name__)

    print('Generator Variables')
    for var in gen_var_list:
        print(var.name)

    print('\nDiscriminator Variables')
    for var in dis_var_list:
        print(var.name)

    with gan.tf_graph.as_default():
        # X_oh = ph2onehot(X_ph)


        # train_mse = tf.reduce_mean((X_ph - G_train_s)**2)
        # valid_mse = tf.reduce_mean((X_ph - G_valid_s)**2)
        # gen_train_cost_sb = generator_cost(class_train_sb, judge_train_sb)
        # gen_valid_cost_sb = generator_cost(class_test_sb, judge_test_sb)
        gen_train_cost_sb = generator_cost(y_ph, real_train, fake_train)
        fake_clss, fake_judge = fake_train

        dis_train_cost_sb = discriminator_cost(y_ph, real_train, fake_train)
        # dis_train_cost_sb = discriminator_cost(class_train_sb, judge_train_sb)
        # dis_valid_cost_sb = disciminator_cost(class_test_sb, judge_test_sb)

        # gen_train_img = put_kernels_on_grid(G_train_sb, batchsize)
        #
        gen_train_sm = tf.summary.image('gen_train_img', G_train_sb, max_outputs=max_outputs)
        gen_train_mg = tf.summary.merge([gen_train_sm])

        gen_train_cost_sm = tf.summary.scalar('gen_cost', gen_train_cost_sb)
        dis_train_cost_sm = tf.summary.scalar('dis_cost', dis_train_cost_sb)
        cost_train_mg = tf.summary.merge([gen_train_cost_sm, dis_train_cost_sm])


        # gen_optimizer = tf.train.RMSPropOptimizer(gen_learning_rate).minimize(gen_train_cost_sb, var_list=gen_var_list)
        # dis_optimizer = tf.train.RMSPropOptimizer(dis_learning_rate).minimize(dis_train_cost_sb, var_list=dis_var_list)

        gen_optimizer = tf.train.AdamOptimizer(gen_learning_rate).minimize(gen_train_cost_sb, var_list=gen_var_list)
        dis_optimizer = tf.train.AdamOptimizer(dis_learning_rate).minimize(dis_train_cost_sb, var_list=dis_var_list)

        clip_D = [p.assign(tf.clip_by_value(p, -0.01, 0.01)) for p in dis_var_list]



        init = tf.global_variables_initializer()
        gan.sess.run(init)
        es = tg.EarlyStopper(max_epoch=max_epoch,
                             epoch_look_back=epoch_look_back,
                             percent_decrease=percent_decrease)



        ttl_iter = 0
        error_writer = tf.summary.FileWriter(logdir + '/experiment', gan.sess.graph)

        img_writer = tf.summary.FileWriter('{}/orig_img'.format(logdir))
        orig_sm = tf.summary.image('orig_img', real_ph, max_outputs=max_outputs)
        # import pdb; pdb.set_trace()
        img_writer.add_summary(orig_sm.eval(session=gan.sess, feed_dict={real_ph:data_train[:100].data[0]}))
        img_writer.flush()
        img_writer.close()

        for epoch in range(1, max_epoch):
            print('epoch:', epoch)
            print('..training')
            print('..logdir', logdir)
            pbar = tg.ProgressBar(len(data_train))
            n_exp = 0
            ttl_mse = 0
            ttl_gen_cost = 0
            ttl_dis_cost = 0
            error_writer.reopen()
            for X_batch, y_batch in data_train:

                for i in range(3):
                    if noise_type == 'normal':
                        noise = np.random.normal(loc=0, scale=noise_factor, size=(len(X_batch), bottleneck_dim))
                    else:
                        noise = np.random.uniform(-1,1, size=(len(X_batch), bottleneck_dim)) * noise_factor

                    feed_dict = {noise_ph:noise, real_ph:X_batch, y_ph:y_batch}
                    gan.sess.run([dis_optimizer, clip_D], feed_dict=feed_dict)

                for i in range(1):
                    if noise_type == 'normal':
                        noise = np.random.normal(loc=0, scale=noise_factor, size=(len(X_batch), bottleneck_dim))
                    else:
                        noise = np.random.uniform(-1,1, size=(len(X_batch), bottleneck_dim)) * noise_factor

                    feed_dict = {noise_ph:noise, real_ph:X_batch, y_ph:y_batch}
                    gan.sess.run(gen_optimizer, feed_dict={noise_ph:noise, real_ph:X_batch, y_ph:y_batch})


                fake_judge_v, cost_train, gen_cost, dis_cost = gan.sess.run([fake_judge, cost_train_mg, gen_train_cost_sb, dis_train_cost_sb],
                                                               feed_dict=feed_dict)


                ttl_gen_cost += gen_cost * len(X_batch)
                ttl_dis_cost += dis_cost * len(X_batch)
                n_exp += len(X_batch)
                pbar.update(n_exp)
                error_writer.add_summary(cost_train, n_exp + ttl_iter)
                error_writer.flush()
            error_writer.close()


            ttl_iter += n_exp

            mean_gan_cost = ttl_gen_cost / n_exp
            mean_dis_cost = ttl_dis_cost / n_exp
            print('\nmean train gen cost:', mean_gan_cost)
            print('mean train dis cost:', mean_dis_cost)


            if save_path:
                # print('\n..saving best model to: {}'.format(save_path))
                dname = os.path.dirname(save_path)
                if not os.path.exists(dname):
                    os.makedirs(dname)
                print('saved to {}'.format(dname))
                # gan.save(save_path)

                for X_batch, y_batch in data_train:

                    if noise_type == 'normal':
                        noise = np.random.normal(loc=0, scale=noise_factor, size=(len(X_batch), bottleneck_dim))
                    else:
                        noise = np.random.uniform(-1,1, size=(len(X_batch), bottleneck_dim)) * noise_factor

                    feed_dict = {noise_ph:noise, real_ph:X_batch, y_ph:y_batch}
                    G_train, G_img = gan.sess.run([G_train_sb, gen_train_mg], feed_dict=feed_dict)
                    train_writer = tf.summary.FileWriter('{}/experiment/{}'.format(logdir, epoch))

                    train_writer.add_summary(G_img)

                    train_writer.flush()
                    train_writer.close()

                    break



    return save_path
Exemple #6
0
def train(modelclass, dt=None):

    batchsize = 64
    gen_learning_rate = 0.001
    dis_learning_rate = 0.001
    bottleneck_dim = 300

    max_epoch = 100
    epoch_look_back = 3
    percent_decrease = 0
    noise_factor = 0.3  #  20170616_1459: 0.05   20170616_1951: 0.01
    max_outputs = 10

    noise_type = 'normal'

    print('gen_learning_rate:', gen_learning_rate)
    print('dis_learning_rate:', dis_learning_rate)
    print('noise_factor:', noise_factor)
    print('noise_type:', noise_type)

    if dt is None:
        timestamp = tg.utils.ts()
    else:
        timestamp = dt
    save_path = './save/{}/model'.format(timestamp)
    logdir = './log/{}'.format(timestamp)

    X_train, y_train, X_valid, y_valid = Mnist()
    # 0617_1346: 0.05   #0619_1033: 0.01   0619_1528:0.1  0619_1944: 0.3
    # X_train, y_train, X_valid, y_valid = Cifar100()
    # X_train, y_train, X_valid, y_valid = Cifar10(contrast_normalize=False, whiten=False)

    _, h, w, c = X_train.shape
    _, nclass = y_train.shape

    data_train = tg.SequentialIterator(X_train, y_train, batchsize=batchsize)
    data_valid = tg.SequentialIterator(X_valid, y_valid, batchsize=batchsize)

    gan = getattr(model, modelclass)(h, w, c, nclass, bottleneck_dim)

    y_ph, noise_ph, G_train_sb, G_test_sb, gen_var_list = gan.generator()
    real_ph, real_train, real_valid, fake_train, fake_valid, dis_var_list = gan.discriminator(
    )
    # real_ph, real_train, real_valid, fake_train, fake_valid, dis_var_list = gan.discriminator_allconv()

    print('..using model:', gan.__class__.__name__)

    print('Generator Variables')
    for var in gen_var_list:
        print(var.name)

    print('\nDiscriminator Variables')
    for var in dis_var_list:
        print(var.name)

    with gan.tf_graph.as_default():
        gen_train_cost_sb = generator_cost(y_ph, real_train, fake_train)
        fake_clss, fake_judge = fake_train

        dis_train_cost_sb = discriminator_cost(y_ph, real_train, fake_train)
        gen_train_sm = tf.summary.image('gen_train_img',
                                        G_train_sb,
                                        max_outputs=max_outputs)
        gen_train_mg = tf.summary.merge([gen_train_sm])

        gen_train_cost_sm = tf.summary.scalar('gen_cost', gen_train_cost_sb)
        dis_train_cost_sm = tf.summary.scalar('dis_cost', dis_train_cost_sb)
        cost_train_mg = tf.summary.merge(
            [gen_train_cost_sm, dis_train_cost_sm])

        gen_optimizer = tf.train.AdamOptimizer(gen_learning_rate).minimize(
            gen_train_cost_sb, var_list=gen_var_list)
        dis_optimizer = tf.train.AdamOptimizer(dis_learning_rate).minimize(
            dis_train_cost_sb, var_list=dis_var_list)

        clip_D = [
            p.assign(tf.clip_by_value(p, -0.01, 0.01)) for p in dis_var_list
        ]

        init = tf.global_variables_initializer()
        gan.sess.run(init)
        es = tg.EarlyStopper(max_epoch=max_epoch,
                             epoch_look_back=epoch_look_back,
                             percent_decrease=percent_decrease)

        ttl_iter = 0
        error_writer = tf.summary.FileWriter(logdir + '/experiment',
                                             gan.sess.graph)

        img_writer = tf.summary.FileWriter('{}/orig_img'.format(logdir))
        orig_sm = tf.summary.image('orig_img',
                                   real_ph,
                                   max_outputs=max_outputs)
        img_writer.add_summary(
            orig_sm.eval(session=gan.sess,
                         feed_dict={real_ph: data_train[:100].data[0]}))
        img_writer.flush()
        img_writer.close()

        for epoch in range(1, max_epoch):
            print('epoch:', epoch)
            print('..training')
            print('..logdir', logdir)
            pbar = tg.ProgressBar(len(data_train))
            n_exp = 0
            ttl_mse = 0
            ttl_gen_cost = 0
            ttl_dis_cost = 0
            error_writer.reopen()
            batch_iter = 1
            for X_batch, y_batch in data_train:

                for i in range(3):
                    if noise_type == 'normal':
                        noise = np.random.normal(loc=0,
                                                 scale=noise_factor,
                                                 size=(len(X_batch),
                                                       bottleneck_dim))
                    else:
                        noise = np.random.uniform(
                            -1, 1,
                            size=(len(X_batch), bottleneck_dim)) * noise_factor

                    feed_dict = {
                        noise_ph: noise,
                        real_ph: X_batch,
                        y_ph: y_batch
                    }
                    gan.sess.run([dis_optimizer, clip_D], feed_dict=feed_dict)

                for i in range(1):
                    if noise_type == 'normal':
                        noise = np.random.normal(loc=0,
                                                 scale=noise_factor,
                                                 size=(len(X_batch),
                                                       bottleneck_dim))
                    else:
                        noise = np.random.uniform(
                            -1, 1,
                            size=(len(X_batch), bottleneck_dim)) * noise_factor

                    feed_dict = {
                        noise_ph: noise,
                        real_ph: X_batch,
                        y_ph: y_batch
                    }
                    gan.sess.run(gen_optimizer,
                                 feed_dict={
                                     noise_ph: noise,
                                     real_ph: X_batch,
                                     y_ph: y_batch
                                 })

                if batch_iter == 1:
                    G_train, G_img = gan.sess.run([G_train_sb, gen_train_mg],
                                                  feed_dict=feed_dict)
                    gen_writer = tf.summary.FileWriter(
                        '{}/generator/{}'.format(logdir, epoch))
                    gen_writer.add_summary(G_img)
                    gen_writer.flush()
                    gen_writer.close()
                    batch_iter = 0

                fake_judge_v, cost_train, gen_cost, dis_cost = gan.sess.run(
                    [
                        fake_judge, cost_train_mg, gen_train_cost_sb,
                        dis_train_cost_sb
                    ],
                    feed_dict=feed_dict)

                ttl_gen_cost += gen_cost * len(X_batch)
                ttl_dis_cost += dis_cost * len(X_batch)
                n_exp += len(X_batch)
                pbar.update(n_exp)
                error_writer.add_summary(cost_train, n_exp + ttl_iter)
                error_writer.flush()

            error_writer.close()

            ttl_iter += n_exp

            mean_gan_cost = ttl_gen_cost / n_exp
            mean_dis_cost = ttl_dis_cost / n_exp
            print('\nmean train gen cost:', mean_gan_cost)
            print('mean train dis cost:', mean_dis_cost)

            if save_path:
                # print('\n..saving best model to: {}'.format(save_path))
                dname = os.path.dirname(save_path)
                if not os.path.exists(dname):
                    os.makedirs(dname)
                print('saved to {}'.format(dname))
                # gan.save(save_path)

                for X_batch, y_batch in data_train:

                    if noise_type == 'normal':
                        noise = np.random.normal(loc=0,
                                                 scale=noise_factor,
                                                 size=(len(X_batch),
                                                       bottleneck_dim))
                    else:
                        noise = np.random.uniform(
                            -1, 1,
                            size=(len(X_batch), bottleneck_dim)) * noise_factor

                    feed_dict = {
                        noise_ph: noise,
                        real_ph: X_batch,
                        y_ph: y_batch
                    }
                    # print '---- Before ----'
                    # print '--Number of threads running ', threading.active_count()
                    G_train, G_img = gan.sess.run([G_train_sb, gen_train_mg],
                                                  feed_dict=feed_dict)
                    train_writer = tf.summary.FileWriter(
                        '{}/experiment/{}'.format(logdir, epoch))
                    # print '---- After ----'
                    # print '--Number of threads running ', threading.active_count()
                    train_writer.add_summary(G_img)
                    train_writer.flush()
                    train_writer.close()

                    break

    return save_path
Exemple #7
0
def train(modelclass, dt=None):

    batchsize = 64
    gen_learning_rate = 0.001
    dis_learning_rate = 0.001
    bottleneck_dim = 300

    max_epoch = 2
    epoch_look_back = 3
    percent_decrease = 0
    noise_factor = 0.1  #  20170616_1459: 0.05   20170616_1951: 0.01    
    max_outputs = 10

    noise_type = 'normal'

    print('gen_learning_rate:', gen_learning_rate)
    print('dis_learning_rate:', dis_learning_rate)
    print('noise_factor:', noise_factor)
    print('noise_type:', noise_type)


    if dt is None:
        timestamp = tg.utils.ts()
    else:
        timestamp = dt
    save_path = './save/{}/model'.format(timestamp)
    logdir = './log/{}'.format(timestamp)

    X_train, y_train, X_valid, y_valid = Mnist()  
    AuX_train = X_train
    Auy_train = y_train
    aux = np.empty((0, 28, 28, 1), 'float32')
    auy = np.empty((0, 10), 'int32')
    # 0617_1346: 0.05   #0619_1033: 0.01   0619_1528:0.1  0619_1944: 0.3
    # X_train, y_train, X_valid, y_valid = Cifar100()
    # X_train, y_train, X_valid, y_valid = Cifar10(contrast_normalize=False, whiten=False)

    _, h, w, c = X_train.shape
    _, nclass = y_train.shape

    data_train = tg.SequentialIterator(X_train, y_train, batchsize=batchsize)
    data_valid = tg.SequentialIterator(X_valid, y_valid, batchsize=batchsize)
    
    print '\n====== Before augment data size ', X_train.shape , ' ======\n'
    
    gan = getattr(model, modelclass)(h, w, c, nclass, bottleneck_dim)

    y_ph, noise_ph, G_train_sb, G_test_sb, gen_var_list = gan.generator()
    real_ph, real_train, real_valid, fake_train, fake_valid, dis_var_list = gan.discriminator()
    # real_ph, real_train, real_valid, fake_train, fake_valid, dis_var_list = gan.discriminator_allconv()

    print('..using model:', gan.__class__.__name__)

    print('Generator Variables')
    for var in gen_var_list:
        print(var.name)

    print('\nDiscriminator Variables')
    for var in dis_var_list:
        print(var.name)

    with gan.tf_graph.as_default():

        gen_train_cost_sb = generator_cost(y_ph, real_train, fake_train)
        fake_clss, fake_judge = fake_train

        dis_train_cost_sb = discriminator_cost(y_ph, real_train, fake_train)

        gen_train_sm = tf.summary.image('gen_train_img', G_train_sb, max_outputs=max_outputs)
        gen_train_mg = tf.summary.merge([gen_train_sm])

        gen_train_cost_sm = tf.summary.scalar('gen_cost', gen_train_cost_sb)
        dis_train_cost_sm = tf.summary.scalar('dis_cost', dis_train_cost_sb)
        cost_train_mg = tf.summary.merge([gen_train_cost_sm, dis_train_cost_sm])

        gen_optimizer = tf.train.AdamOptimizer(gen_learning_rate).minimize(gen_train_cost_sb, var_list=gen_var_list)
        dis_optimizer = tf.train.AdamOptimizer(dis_learning_rate).minimize(dis_train_cost_sb, var_list=dis_var_list)

        clip_D = [p.assign(tf.clip_by_value(p, -0.01, 0.01)) for p in dis_var_list]

        init = tf.global_variables_initializer()
        gan.sess.run(init)
        es = tg.EarlyStopper(max_epoch=max_epoch,
                             epoch_look_back=epoch_look_back,
                             percent_decrease=percent_decrease)

        ttl_iter = 0
        error_writer = tf.summary.FileWriter(logdir + '/experiment', gan.sess.graph)
        
        img_writer = tf.summary.FileWriter('{}/orig_img'.format(logdir))
        orig_sm = tf.summary.image('orig_img', real_ph, max_outputs=max_outputs)
        img_writer.add_summary(orig_sm.eval(session=gan.sess, feed_dict={real_ph:data_train[:100].data[0]}))
        img_writer.flush()
        img_writer.close()

        for epoch in range(1, max_epoch):
            print('epoch:', epoch)
            print('..training')
            print('..logdir', logdir)
            pbar = tg.ProgressBar(len(data_train))
            n_exp = 0
            ttl_mse = 0
            ttl_gen_cost = 0
            ttl_dis_cost = 0
            error_writer.reopen()
            for X_batch, y_batch in data_train:

                for i in range(3):
                    if noise_type == 'normal':
                        noise = np.random.normal(loc=0, scale=noise_factor, size=(len(X_batch), bottleneck_dim))
                    else:
                        noise = np.random.uniform(-1,1, size=(len(X_batch), bottleneck_dim)) * noise_factor

                    feed_dict = {noise_ph:noise, real_ph:X_batch, y_ph:y_batch}
                    gan.sess.run([dis_optimizer, clip_D], feed_dict=feed_dict)

                for i in range(1):
                    if noise_type == 'normal':
                        noise = np.random.normal(loc=0, scale=noise_factor, size=(len(X_batch), bottleneck_dim))
                    else:
                        noise = np.random.uniform(-1,1, size=(len(X_batch), bottleneck_dim)) * noise_factor

                    feed_dict = {noise_ph:noise, real_ph:X_batch, y_ph:y_batch}
                    gan.sess.run(gen_optimizer, feed_dict={noise_ph:noise, real_ph:X_batch, y_ph:y_batch})
                                
                fake_judge_v, cost_train, gen_cost, dis_cost = gan.sess.run([fake_judge, cost_train_mg, gen_train_cost_sb, dis_train_cost_sb],
                                                               feed_dict=feed_dict)

                ttl_gen_cost += gen_cost * len(X_batch)
                ttl_dis_cost += dis_cost * len(X_batch)
                n_exp += len(X_batch)
                pbar.update(n_exp)
                error_writer.add_summary(cost_train, n_exp + ttl_iter)
                error_writer.flush()
                
            error_writer.close()

            ttl_iter += n_exp

            mean_gan_cost = ttl_gen_cost / n_exp
            mean_dis_cost = ttl_dis_cost / n_exp
            print('\nmean train gen cost:', mean_gan_cost)
            print('mean train dis cost:', mean_dis_cost)


            if save_path and epoch == max_epoch-1:
                # print('\n..saving best model to: {}'.format(save_path))
                dname = os.path.dirname(save_path)
                if not os.path.exists(dname):
                    os.makedirs(dname)
                print('saved to {}'.format(dname))
                train_writer = tf.summary.FileWriter('{}/experiment/{}'.format(logdir, epoch))
                
                for X_batch, y_batch in data_train:
                    #import pdb; pdb.set_trace()

                    if noise_type == 'normal':
                        noise = np.random.normal(loc=0, scale=noise_factor, size=(len(X_batch), bottleneck_dim))
                    else:
                        noise = np.random.uniform(-1,1, size=(len(X_batch), bottleneck_dim)) * noise_factor

                    feed_dict = {noise_ph:noise, real_ph:X_batch, y_ph:y_batch}
                    G_train, G_img, fake_dis = gan.sess.run([G_train_sb, gen_train_mg, fake_train], feed_dict=feed_dict)
                    fake_class_dis, fake_judge_dis = fake_dis
                    idx = [i for i,v in enumerate(fake_judge_dis) if v>0.5]
                    aux = np.concatenate((aux, G_train[idx]), axis = 0)
                    auy = np.concatenate((auy, fake_class_dis[idx]), axis = 0)
                    AuX_train = np.concatenate((G_train, AuX_train), axis = 0)
                    Auy_train = np.concatenate((y_batch, Auy_train), axis = 0)
                    # temp_data = zip(G_img, y_batch)
                    # aug_data.append(temp_data)
                    train_writer.add_summary(G_img)                    
                    train_writer.flush()
                train_writer.close()
                xname = 'genx.npy'
                yname = 'geny.npy'
                np.save('{}/{}'.format(logdir, xname), aux)
                np.save('{}/{}'.format(logdir, yname), auy)
        
        print '\n====== Augment data size ', AuX_train.shape , ' ======\n'
        print '\n====== Augment data size ', Auy_train.shape , ' ======\n'
        

    return save_path, X_train, y_train, X_valid, y_valid, AuX_train, Auy_train, aux, auy
Exemple #8
0
def train(model_name, data_name, fout):

    batchsize = 32
    learning_rate = 0.001
    max_epoch = 100
    epoch_look_back = 3
    percent_decrease = 0.0
    train_valid = [5, 1]
    case = data_name
    model = model_name

    if case == 'skin':
        patch_size = [128, 128]
        data = Skin(batchsize=batchsize, train_valid=train_valid)
    if case == 'iris':
        patch_size = [64, 64]
        data = Iris(batchsize=batchsize,
                    patch_size=patch_size,
                    train_valid=train_valid)

    X_ph = tf.placeholder('float32', [None] + patch_size + [3])
    M_ph = tf.placeholder('float32', [None] + patch_size + [1])

    if model == 'fcn':
        model = fcn()
        M_train_s = model.train_fprop(X_ph)
        M_valid_s = model.test_fprop(X_ph)

    if model == 'resnet':
        model = resnet()
        M_train_s = model.train_fprop(X_ph)
        M_valid_s = model.test_fprop(X_ph)

    if model == 'crf_rnn':
        M_train_s, M_valid_s = crf_rnn(X_ph)
        # import pdb; pdb.set_trace()

    if model == 'resnet_crf_rnn':
        M_train_s, M_valid_s = resnet_crf_rnn(X_ph)

    h, w = patch_size
    train_mse = tf.reduce_mean((M_ph - M_train_s)**2)
    valid_mse = tf.reduce_mean((M_ph - M_valid_s)**2)

    # import pdb; pdb.set_trace()

    train_entropy = tg.cost.entropy(M_ph, M_train_s)

    data_train, data_valid = data.make_data()

    # optimizer = tf.train.AdamOptimizer(learning_rate).minimize(train_mse)
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(train_entropy)

    with tf.Session() as sess:
        init = tf.global_variables_initializer()
        sess.run(init)
        es = tg.EarlyStopper(max_epoch=max_epoch,
                             epoch_look_back=epoch_look_back,
                             percent_decrease=percent_decrease)
        best_valid_f1 = 0
        for epoch in range(max_epoch):
            print('epoch:', epoch)
            print('..training')
            pbar = ProgressBar(len(data_train))
            n_exp = 0
            train_mse_score = 0
            for X_batch, M_batch in data_train:
                feed_dict = {X_ph: X_batch, M_ph: M_batch}
                sess.run(optimizer, feed_dict=feed_dict)
                train_mse_score += sess.run(train_mse,
                                            feed_dict=feed_dict) * len(X_batch)
                n_exp += len(X_batch)
                pbar.update(n_exp)
            train_mse_score /= n_exp
            print('\nmean train mse:', train_mse_score)

            print('..validating')
            pbar = ProgressBar(len(data_valid))
            n_exp = 0
            valid_mse_score = 0
            valid_f1_score = 0
            best_th = 0
            for X_batch, M_batch in data_valid:
                feed_dict = {X_ph: X_batch, M_ph: M_batch}
                ypred_valid = sess.run(M_valid_s, feed_dict=feed_dict)
                if model in ['crf_rnn', 'resnet_crf_rnn']:
                    ypred_valid = ypred_valid[0]

                if case == 'skin':
                    th = 0.25
                    ypvalid = ypred_valid > th
                    max_fscore = f1_score(M_batch.flatten(), ypvalid.flatten())

                if case == 'iris':
                    th = 0.25
                    ypvalid = ypred_valid > th
                    max_fscore = f1_score(M_batch.flatten(), ypvalid.flatten())

                # if case == 'iris':
                #     max_fscore = 0
                #     for th in range(0, 70):
                #         th = (0.0 + th * 0.01)
                #         ypvalid = ypred_valid > th
                #         fscore = f1_score(M_batch.flatten(), ypvalid.flatten())
                #         if fscore > max_fscore:
                #             max_fscore = fscore
                #             best_th = th
                #     print('best th:', best_th)

                valid_f1_score += max_fscore * len(X_batch)

                # valid_f1_score += f1_score(M_batch.flatten(), ypred_valid.flatten()>0.5)

                valid_mse_score += sess.run(valid_mse,
                                            feed_dict=feed_dict) * len(X_batch)
                n_exp += len(X_batch)
                pbar.update(n_exp)
            # import pdb; pdb.set_trace()
            valid_f1_score /= n_exp
            valid_mse_score /= n_exp
            print('\nmean valid f1:', valid_f1_score)
            print('mean valid mse:', valid_mse_score)

            if valid_f1_score > best_valid_f1:
                best_valid_f1 = valid_f1_score
            print('best valid f1:', best_valid_f1)

            if es.continue_learning(valid_error=valid_mse_score):
                print('epoch', epoch)
                print('valid error so far:', valid_mse_score)
                print('best epoch last update:', es.best_epoch_last_update)
                print('best valid last update:', es.best_valid_last_update)

            else:
                # import pdb; pdb.set_trace()
                print('training done for {model} on {data}'.format(
                    model=model_name, data=data_name))
                fout.write('{model},{data},f1_score:{f1}\n'.format(
                    model=model_name, data=data_name, f1=best_valid_f1))
                fout.write('{model},{data},valid_cost:{valid_cost}\n'.format(
                    model=model_name,
                    data=data_name,
                    valid_cost=es.best_valid_last_update))
                break
Exemple #9
0
def train():
    learning_rate = 0.001
    batchsize = 32

    max_epoch = 300
    es = tg.EarlyStopper(max_epoch=max_epoch,
                         epoch_look_back=3,
                         percent_decrease=0)

    seq = model()
    X_train, y_train, X_test, y_test = Mnist(flatten=False,
                                             onehot=True,
                                             binary=True,
                                             datadir='.')
    iter_train = tg.SequentialIterator(X_train, y_train, batchsize=batchsize)
    iter_test = tg.SequentialIterator(X_test, y_test, batchsize=batchsize)

    X_ph = tf.placeholder('float32', [None, 28, 28, 1])
    y_ph = tf.placeholder('float32', [None, 10])

    y_train_sb = seq.train_fprop(X_ph)
    y_test_sb = seq.test_fprop(X_ph)

    train_cost_sb = entropy(y_ph, y_train_sb)
    test_cost_sb = entropy(y_ph, y_test_sb)
    test_accu_sb = accuracy(y_ph, y_test_sb)

    # required for BatchNormalization layer
    optimizer = tf.train.AdamOptimizer(learning_rate)
    update_ops = ops.get_collection(ops.GraphKeys.UPDATE_OPS)
    with ops.control_dependencies(update_ops):
        train_ops = optimizer.minimize(train_cost_sb)

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        init = tf.global_variables_initializer()
        sess.run(init)

        best_valid_accu = 0
        for epoch in range(max_epoch):
            print('epoch:', epoch)
            pbar = tg.ProgressBar(len(iter_train))
            ttl_train_cost = 0
            ttl_examples = 0
            print('..training')
            for X_batch, y_batch in iter_train:
                feed_dict = {X_ph: X_batch, y_ph: y_batch}
                _, train_cost = sess.run([train_ops, train_cost_sb],
                                         feed_dict=feed_dict)
                ttl_train_cost += len(X_batch) * train_cost
                ttl_examples += len(X_batch)
                pbar.update(ttl_examples)
            mean_train_cost = ttl_train_cost / float(ttl_examples)
            print('\ntrain cost', mean_train_cost)

            ttl_valid_cost = 0
            ttl_valid_accu = 0
            ttl_examples = 0
            pbar = tg.ProgressBar(len(iter_test))
            print('..validating')
            for X_batch, y_batch in iter_test:
                feed_dict = {X_ph: X_batch, y_ph: y_batch}
                valid_cost, valid_accu = sess.run([test_cost_sb, test_accu_sb],
                                                  feed_dict=feed_dict)
                ttl_valid_cost += len(X_batch) * valid_cost
                ttl_valid_accu += len(X_batch) * valid_accu
                ttl_examples += len(X_batch)
                pbar.update(ttl_examples)
            mean_valid_cost = ttl_valid_cost / float(ttl_examples)
            mean_valid_accu = ttl_valid_accu / float(ttl_examples)
            print('\nvalid cost', mean_valid_cost)
            print('valid accu', mean_valid_accu)
            if best_valid_accu < mean_valid_accu:
                best_valid_accu = mean_valid_accu

            if es.continue_learning(valid_error=mean_valid_cost, epoch=epoch):
                print('epoch', epoch)
                print('best epoch last update:', es.best_epoch_last_update)
                print('best valid last update:', es.best_valid_last_update)
                print('best valid accuracy:', best_valid_accu)
            else:
                print('training done!')
                break
Exemple #10
0
def train():
    learning_rate = 0.001
    batchsize = 64
    max_epoch = 300
    es = tg.EarlyStopper(max_epoch=max_epoch,
                         epoch_look_back=None,
                         percent_decrease=0)

    X_train, y_train, X_test, y_test = Cifar10(contrast_normalize=False,
                                               whiten=False)
    _, h, w, c = X_train.shape
    _, nclass = y_train.shape

    seq = model(nclass=nclass, h=h, w=w, c=c)
    iter_train = tg.SequentialIterator(X_train, y_train, batchsize=batchsize)
    iter_test = tg.SequentialIterator(X_test, y_test, batchsize=batchsize)

    X_ph = tf.placeholder('float32', [None, h, w, c])
    y_ph = tf.placeholder('float32', [None, nclass])

    y_train_sb = seq.train_fprop(X_ph)
    y_test_sb = seq.test_fprop(X_ph)

    train_cost_sb = entropy(y_ph, y_train_sb)
    test_cost_sb = entropy(y_ph, y_test_sb)
    test_accu_sb = accuracy(y_ph, y_test_sb)

    # required for BatchNormalization layer
    optimizer = tf.train.AdamOptimizer(learning_rate)
    update_ops = ops.get_collection(ops.GraphKeys.UPDATE_OPS)
    with ops.control_dependencies(update_ops):
        train_ops = optimizer.minimize(train_cost_sb)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        init = tf.global_variables_initializer()
        sess.run(init)

        best_valid_accu = 0
        for epoch in range(max_epoch):
            print('epoch:', epoch)
            pbar = tg.ProgressBar(len(iter_train))
            ttl_train_cost = 0
            ttl_examples = 0
            print('..training')
            for X_batch, y_batch in iter_train:
                feed_dict = {X_ph: X_batch, y_ph: y_batch}
                _, train_cost = sess.run([train_ops, train_cost_sb],
                                         feed_dict=feed_dict)
                ttl_train_cost += len(X_batch) * train_cost
                ttl_examples += len(X_batch)
                pbar.update(ttl_examples)
            mean_train_cost = ttl_train_cost / float(ttl_examples)
            print('\ntrain cost', mean_train_cost)

            ttl_valid_cost = 0
            ttl_valid_accu = 0
            ttl_examples = 0
            pbar = tg.ProgressBar(len(iter_test))
            print('..validating')
            for X_batch, y_batch in iter_test:
                feed_dict = {X_ph: X_batch, y_ph: y_batch}
                valid_cost, valid_accu = sess.run([test_cost_sb, test_accu_sb],
                                                  feed_dict=feed_dict)
                ttl_valid_cost += len(X_batch) * valid_cost
                ttl_valid_accu += len(X_batch) * valid_accu
                ttl_examples += len(X_batch)
                pbar.update(ttl_examples)
            mean_valid_cost = ttl_valid_cost / float(ttl_examples)
            mean_valid_accu = ttl_valid_accu / float(ttl_examples)
            print('\nvalid cost', mean_valid_cost)
            print('valid accu', mean_valid_accu)
            if best_valid_accu < mean_valid_accu:
                best_valid_accu = mean_valid_accu

            if es.continue_learning(valid_error=mean_valid_cost, epoch=epoch):
                print('epoch', epoch)
                print('best epoch last update:', es.best_epoch_last_update)
                print('best valid last update:', es.best_valid_last_update)
                print('best valid accuracy:', best_valid_accu)
            else:
                print('training done!')
                break