コード例 #1
0
if __name__ == '__main__':
    args = docopt(__doc__)

    m_batch_size = 100
    dataset = load_mnist_full()
    test_set_x, test_set_y = dataset[1]
    n_test_batches = numpy.ceil((test_set_x.get_value(borrow=True).shape[0]) /
                                numpy.float(m_batch_size))

    trained_model = cPickle.load(
        open("trained_model/" + args['--load_filename'], 'rb'))[0]

    index = T.iscalar()
    x = T.matrix()
    t = T.ivector()
    test_error = theano.function(
        inputs=[index],
        outputs=error(x=x, t=t, forward_func=trained_model.forward_test),
        givens={
            x: test_set_x[m_batch_size * index:m_batch_size * (index + 1)],
            t: test_set_y[m_batch_size * index:m_batch_size * (index + 1)]
        })

    test_errors = [
        test_error(i) for i in xrange(numpy.int(numpy.ceil(n_test_batches)))
    ]
    print "the number of misclassified examples on test set:" + str(
        numpy.sum(test_errors)) + ", and test error rate(%):" + str(
            100 * numpy.sum(test_errors) /
            numpy.float(test_set_x.get_value(borrow=True).shape[0]))
コード例 #2
0
ファイル: train_mnist_semisup.py プロジェクト: ilovecv/vat
def train(args):

    print args

    numpy.random.seed(int(args['--seed']))

    dataset = load_data.load_mnist_for_semi_sup(n_l=int(args['--num_labeled_samples']),
                                            n_v=int(args['--num_validation_samples']))

    x_train, t_train, ul_x_train = dataset[0]
    x_test, t_test = dataset[1]


    layer_sizes = [int(layer_size) for layer_size in args['--layer_sizes'].split('-')] 
    model = FNN_MNIST(layer_sizes=layer_sizes)

    x = T.matrix()
    ul_x = T.matrix()
    t = T.ivector()

    if(args['--cost_type']=='MLE'):
        cost = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_train)
    elif(args['--cost_type']=='L2'):
        cost = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_train) \
               + costs.weight_decay(params=model.params,coeff=float(args['--lamb']))
    elif(args['--cost_type']=='AT'):
        cost = costs.adversarial_training(x,t,model.forward_train,
                                              'CE',
                                              epsilon=float(args['--epsilon']),
                                              lamb=float(args['--lamb']),
                                              norm_constraint = args['--norm_constraint'],
                                              forward_func_for_generating_adversarial_examples=model.forward_no_update_batch_stat)
    elif(args['--cost_type']=='VAT'):
        cost = costs.virtual_adversarial_training(x,t,model.forward_train,
                                              'CE',
                                              epsilon=float(args['--epsilon']),
                                              norm_constraint = args['--norm_constraint'],
                                              num_power_iter = int(args['--num_power_iter']),
                                              x_for_generating_adversarial_examples = ul_x,
                                              forward_func_for_generating_adversarial_examples=model.forward_no_update_batch_stat)
    elif(args['--cost_type']=='VAT_finite_diff'):
        cost = costs.virtual_adversarial_training_finite_diff(x,t,model.forward_train,
                                              'CE',
                                              epsilon=float(args['--epsilon']),
                                              norm_constraint = args['--norm_constraint'],
                                              num_power_iter = int(args['--num_power_iter']),
                                              x_for_generating_adversarial_examples = ul_x,
                                              forward_func_for_generating_adversarial_examples=model.forward_no_update_batch_stat)
    nll = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_test)
    error = costs.error(x=x,t=t,forward_func=model.forward_test)

    optimizer = optimizers.ADAM(cost=cost,params=model.params,alpha=float(args['--initial_learning_rate']))


    index = T.iscalar()
    ul_index = T.iscalar()
    batch_size = int(args['--batch_size'])
    ul_batch_size = int(args['--ul_batch_size'])

    f_train = theano.function(inputs=[index,ul_index], outputs=cost, updates=optimizer.updates,
                              givens={
                                  x:x_train[batch_size*index:batch_size*(index+1)],
                                  t:t_train[batch_size*index:batch_size*(index+1)],
                                  ul_x:ul_x_train[ul_batch_size*ul_index:ul_batch_size*(ul_index+1)]},
                              on_unused_input='warn')
    f_nll_train = theano.function(inputs=[index], outputs=nll,
                              givens={
                                  x:x_train[batch_size*index:batch_size*(index+1)],
                                  t:t_train[batch_size*index:batch_size*(index+1)]})
    f_nll_test = theano.function(inputs=[index], outputs=nll,
                              givens={
                                  x:x_test[batch_size*index:batch_size*(index+1)],
                                  t:t_test[batch_size*index:batch_size*(index+1)]})

    f_error_train = theano.function(inputs=[index], outputs=error,
                              givens={
                                  x:x_train[batch_size*index:batch_size*(index+1)],
                                  t:t_train[batch_size*index:batch_size*(index+1)]})
    f_error_test = theano.function(inputs=[index], outputs=error,
                              givens={
                                  x:x_test[batch_size*index:batch_size*(index+1)],
                                  t:t_test[batch_size*index:batch_size*(index+1)]})

    f_lr_decay = theano.function(inputs=[],outputs=optimizer.alpha,
                                 updates={optimizer.alpha:theano.shared(numpy.array(args['--learning_rate_decay']).astype(theano.config.floatX))*optimizer.alpha})

    # Shuffle training set
    randix = RandomStreams(seed=numpy.random.randint(1234)).permutation(n=x_train.shape[0])
    update_permutation = OrderedDict()
    update_permutation[x_train] = x_train[randix]
    update_permutation[t_train] = t_train[randix]
    f_permute_train_set = theano.function(inputs=[],outputs=x_train,updates=update_permutation)

    # Shuffle unlabeled training set
    ul_randix = RandomStreams(seed=numpy.random.randint(1234)).permutation(n=ul_x_train.shape[0])
    update_ul_permutation = OrderedDict()
    update_ul_permutation[ul_x_train] = ul_x_train[ul_randix]
    f_permute_ul_train_set = theano.function(inputs=[],outputs=ul_x_train,updates=update_ul_permutation)

    statuses = {}
    statuses['nll_train'] = []
    statuses['error_train'] = []
    statuses['nll_test'] = []
    statuses['error_test'] = []


    n_train = x_train.get_value().shape[0]
    n_test = x_test.get_value().shape[0]
    n_ul_train = ul_x_train.get_value().shape[0]

    sum_nll_train = numpy.sum(numpy.array([f_nll_train(i) for i in xrange(n_train/batch_size)]))*batch_size
    sum_error_train = numpy.sum(numpy.array([f_error_train(i) for i in xrange(n_train/batch_size)]))
    sum_nll_test = numpy.sum(numpy.array([f_nll_test(i) for i in xrange(n_test/batch_size)]))*batch_size
    sum_error_test = numpy.sum(numpy.array([f_error_test(i) for i in xrange(n_test/batch_size)]))
    statuses['nll_train'].append(sum_nll_train/n_train)
    statuses['error_train'].append(sum_error_train)
    statuses['nll_test'].append(sum_nll_test/n_test)
    statuses['error_test'].append(sum_error_test)
    print "[Epoch]",str(-1)
    print  "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
            "nll_test : " , statuses['nll_test'][-1],  "error_test : ", statuses['error_test'][-1]




    print "training..."

    make_sure_path_exists("./trained_model")

    l_i = 0
    ul_i = 0
    for epoch in xrange(int(args['--num_epochs'])):
        cPickle.dump((statuses,args),open('./trained_model/'+'tmp-' + args['--save_filename'],'wb'),cPickle.HIGHEST_PROTOCOL)
        
        f_permute_train_set()
        f_permute_ul_train_set()

        for it in xrange(int(args['--num_batch_it'])):
            f_train(l_i,ul_i)
            l_i = 0 if l_i>=n_train/batch_size-1 else l_i + 1
            ul_i = 0 if ul_i >=n_ul_train/ul_batch_size-1 else ul_i + 1


        sum_nll_train = numpy.sum(numpy.array([f_nll_train(i) for i in xrange(n_train/batch_size)]))*batch_size
        sum_error_train = numpy.sum(numpy.array([f_error_train(i) for i in xrange(n_train/batch_size)]))
        sum_nll_test = numpy.sum(numpy.array([f_nll_test(i) for i in xrange(n_test/batch_size)]))*batch_size
        sum_error_test = numpy.sum(numpy.array([f_error_test(i) for i in xrange(n_test/batch_size)]))
        statuses['nll_train'].append(sum_nll_train/n_train)
        statuses['error_train'].append(sum_error_train)
        statuses['nll_test'].append(sum_nll_test/n_test)
        statuses['error_test'].append(sum_error_test)
        print "[Epoch]",str(epoch)
        print  "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
                "nll_test : " , statuses['nll_test'][-1],  "error_test : ", statuses['error_test'][-1]


        f_lr_decay()

    ### finetune batch stat ###
    f_finetune = theano.function(inputs=[ul_index],outputs=model.forward_for_finetuning_batch_stat(x),
                                 givens={x:ul_x_train[ul_batch_size*ul_index:ul_batch_size*(ul_index+1)]})
    [f_finetune(i) for i in xrange(n_ul_train/ul_batch_size)]

    sum_nll_train = numpy.sum(numpy.array([f_nll_train(i) for i in xrange(n_train/batch_size)]))*batch_size
    sum_error_train = numpy.sum(numpy.array([f_error_train(i) for i in xrange(n_train/batch_size)]))
    sum_nll_test = numpy.sum(numpy.array([f_nll_test(i) for i in xrange(n_test/batch_size)]))*batch_size
    sum_error_test = numpy.sum(numpy.array([f_error_test(i) for i in xrange(n_test/batch_size)]))
    statuses['nll_train'].append(sum_nll_train/n_train)
    statuses['error_train'].append(sum_error_train)
    statuses['nll_test'].append(sum_nll_test/n_test)
    statuses['error_test'].append(sum_error_test)
    print "[after finetuning]"
    print  "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
        "nll_test : " , statuses['nll_test'][-1],  "error_test : ", statuses['error_test'][-1]

    ###########################

    make_sure_path_exists("./trained_model")
    cPickle.dump((model,statuses,args),open('./trained_model/'+args['--save_filename'],'wb'),cPickle.HIGHEST_PROTOCOL)
コード例 #3
0
def train(args):
    print args

    numpy.random.seed(int(args['--seed']))

    if (args['--validation']):
        dataset = load_data.load_mnist_for_validation(
            n_v=int(args['--num_validation_samples']))
    else:
        dataset = load_data.load_mnist_full()
    x_train, t_train = dataset[0]
    x_test, t_test = dataset[1]

    layer_sizes = [
        int(layer_size) for layer_size in args['--layer_sizes'].split('-')
    ]
    model = FNN_MNIST(layer_sizes=layer_sizes)

    x = T.matrix()
    t = T.ivector()

    if (args['--cost_type'] == 'MLE'):
        cost = costs.cross_entropy_loss(x=x,
                                        t=t,
                                        forward_func=model.forward_train)
    elif (args['--cost_type'] == 'L2'):
        cost = costs.cross_entropy_loss(x=x, t=t, forward_func=model.forward_train) \
               + costs.weight_decay(params=model.params, coeff=float(args['--lamb']))
    elif (args['--cost_type'] == 'AT'):
        cost = costs.adversarial_training(
            x,
            t,
            model.forward_train,
            'CE',
            epsilon=float(args['--epsilon']),
            lamb=float(args['--lamb']),
            norm_constraint=args['--norm_constraint'],
            forward_func_for_generating_adversarial_examples=model.
            forward_no_update_batch_stat)
    elif (args['--cost_type'] == 'VAT'):
        cost = costs.virtual_adversarial_training(
            x,
            t,
            model.forward_train,
            'CE',
            epsilon=float(args['--epsilon']),
            norm_constraint=args['--norm_constraint'],
            num_power_iter=int(args['--num_power_iter']),
            forward_func_for_generating_adversarial_examples=model.
            forward_no_update_batch_stat)
    elif (args['--cost_type'] == 'VAT_finite_diff'):
        cost = costs.virtual_adversarial_training_finite_diff(
            x,
            t,
            model.forward_train,
            'CE',
            epsilon=float(args['--epsilon']),
            norm_constraint=args['--norm_constraint'],
            num_power_iter=int(args['--num_power_iter']),
            forward_func_for_generating_adversarial_examples=model.
            forward_no_update_batch_stat)
    nll = costs.cross_entropy_loss(x=x, t=t, forward_func=model.forward_test)
    error = costs.error(x=x, t=t, forward_func=model.forward_test)

    optimizer = optimizers.ADAM(cost=cost,
                                params=model.params,
                                alpha=float(args['--initial_learning_rate']))

    index = T.iscalar()
    batch_size = int(args['--batch_size'])
    f_train = theano.function(
        inputs=[index],
        outputs=cost,
        updates=optimizer.updates,
        givens={
            x: x_train[batch_size * index:batch_size * (index + 1)],
            t: t_train[batch_size * index:batch_size * (index + 1)]
        })
    f_nll_train = theano.function(
        inputs=[index],
        outputs=nll,
        givens={
            x: x_train[batch_size * index:batch_size * (index + 1)],
            t: t_train[batch_size * index:batch_size * (index + 1)]
        })
    f_nll_test = theano.function(
        inputs=[index],
        outputs=nll,
        givens={
            x: x_test[batch_size * index:batch_size * (index + 1)],
            t: t_test[batch_size * index:batch_size * (index + 1)]
        })

    f_error_train = theano.function(
        inputs=[index],
        outputs=error,
        givens={
            x: x_train[batch_size * index:batch_size * (index + 1)],
            t: t_train[batch_size * index:batch_size * (index + 1)]
        })
    f_error_test = theano.function(
        inputs=[index],
        outputs=error,
        givens={
            x: x_test[batch_size * index:batch_size * (index + 1)],
            t: t_test[batch_size * index:batch_size * (index + 1)]
        })

    f_lr_decay = theano.function(
        inputs=[],
        outputs=optimizer.alpha,
        updates={
            optimizer.alpha:
            theano.shared(
                numpy.array(args['--learning_rate_decay']).astype(
                    theano.config.floatX)) * optimizer.alpha
        })
    randix = RandomStreams(seed=numpy.random.randint(1234)).permutation(
        n=x_train.shape[0])
    update_permutation = OrderedDict()
    update_permutation[x_train] = x_train[randix]
    update_permutation[t_train] = t_train[randix]
    f_permute_train_set = theano.function(inputs=[],
                                          outputs=x_train,
                                          updates=update_permutation)

    statuses = {}
    statuses['nll_train'] = []
    statuses['error_train'] = []
    statuses['nll_test'] = []
    statuses['error_test'] = []

    n_train = x_train.get_value().shape[0]
    n_test = x_test.get_value().shape[0]

    sum_nll_train = numpy.sum(
        numpy.array([f_nll_train(i)
                     for i in xrange(n_train / batch_size)])) * batch_size
    sum_error_train = numpy.sum(
        numpy.array([f_error_train(i) for i in xrange(n_train / batch_size)]))
    sum_nll_test = numpy.sum(
        numpy.array([f_nll_test(i)
                     for i in xrange(n_test / batch_size)])) * batch_size
    sum_error_test = numpy.sum(
        numpy.array([f_error_test(i) for i in xrange(n_test / batch_size)]))
    statuses['nll_train'].append(sum_nll_train / n_train)
    statuses['error_train'].append(sum_error_train)
    statuses['nll_test'].append(sum_nll_test / n_test)
    statuses['error_test'].append(sum_error_test)
    print "[Epoch]", str(-1)
    print  "nll_train : ", statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
        "nll_test : ", statuses['nll_test'][-1], "error_test : ", statuses['error_test'][-1]

    print "training..."

    make_sure_path_exists("./trained_model")

    for epoch in xrange(int(args['--num_epochs'])):
        cPickle.dump(
            (statuses, args),
            open('./trained_model/' + 'tmp-' + args['--save_filename'],
                 'wb'), cPickle.HIGHEST_PROTOCOL)

        f_permute_train_set()

        ### update parameters ###
        [f_train(i) for i in xrange(n_train / batch_size)]
        #########################

        sum_nll_train = numpy.sum(
            numpy.array([f_nll_train(i)
                         for i in xrange(n_train / batch_size)])) * batch_size
        sum_error_train = numpy.sum(
            numpy.array(
                [f_error_train(i) for i in xrange(n_train / batch_size)]))
        sum_nll_test = numpy.sum(
            numpy.array([f_nll_test(i)
                         for i in xrange(n_test / batch_size)])) * batch_size
        sum_error_test = numpy.sum(
            numpy.array([f_error_test(i)
                         for i in xrange(n_test / batch_size)]))
        statuses['nll_train'].append(sum_nll_train / n_train)
        statuses['error_train'].append(sum_error_train)
        statuses['nll_test'].append(sum_nll_test / n_test)
        statuses['error_test'].append(sum_error_test)
        print "[Epoch]", str(epoch)
        print  "nll_train : ", statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
            "nll_test : ", statuses['nll_test'][-1], "error_test : ", statuses['error_test'][-1]

        f_lr_decay()

    ### finetune batch stat ###
    f_finetune = theano.function(
        inputs=[index],
        outputs=model.forward_for_finetuning_batch_stat(x),
        givens={x: x_train[batch_size * index:batch_size * (index + 1)]})
    [f_finetune(i) for i in xrange(n_train / batch_size)]

    sum_nll_train = numpy.sum(
        numpy.array([f_nll_train(i)
                     for i in xrange(n_train / batch_size)])) * batch_size
    sum_error_train = numpy.sum(
        numpy.array([f_error_train(i) for i in xrange(n_train / batch_size)]))
    sum_nll_test = numpy.sum(
        numpy.array([f_nll_test(i)
                     for i in xrange(n_test / batch_size)])) * batch_size
    sum_error_test = numpy.sum(
        numpy.array([f_error_test(i) for i in xrange(n_test / batch_size)]))
    statuses['nll_train'].append(sum_nll_train / n_train)
    statuses['error_train'].append(sum_error_train)
    statuses['nll_test'].append(sum_nll_test / n_test)
    statuses['error_test'].append(sum_error_test)
    print "[after finetuning]"
    print  "nll_train : ", statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
        "nll_test : ", statuses['nll_test'][-1], "error_test : ", statuses['error_test'][-1]
    ###########################

    make_sure_path_exists("./trained_model")
    cPickle.dump((model, statuses, args),
                 open('./trained_model/' + args['--save_filename'], 'wb'),
                 cPickle.HIGHEST_PROTOCOL)
コード例 #4
0
ファイル: train_syn.py プロジェクト: sndnyang/vat_theano
def train(args):
    with open('dataset/' + args['--dataset_filename'], "rb") as fp:
        if sys.version_info.major == 3:
            dataset = cPickle.load(fp, encoding="bytes")
        else:
            dataset = cPickle.load(fp)

    x_train = theano.shared(
        numpy.asarray(dataset[0][0][0], dtype=theano.config.floatX))
    t_train = theano.shared(numpy.asarray(dataset[0][0][1], dtype='int32'))
    x_test = theano.shared(
        numpy.asarray(dataset[0][1][0], dtype=theano.config.floatX))
    t_test = theano.shared(numpy.asarray(dataset[0][1][1], dtype='int32'))

    avg_error_rate = 0
    train_err_history = 0
    test_err_history = 0
    exp = 1
    best_error_rate = 1000
    best_model = None
    statuses = {}

    for i in range(exp):
        numpy.random.seed(i * 10)

        if args['--cost_type'] == 'dropout':
            model = FNN_syn_dropout(drate=float(args['--dropout_rate']))
        else:
            model = FNN_syn()
        x = T.matrix()
        t = T.ivector()
        ul_x = T.matrix()

        cost = get_cost_type_semi(model, x, t, ul_x, args)
        nll = costs.cross_entropy_loss(x=x,
                                       t=t,
                                       forward_func=model.forward_test)
        error = costs.error(x=x, t=t, forward_func=model.forward_test)

        optimizer = optimizers.MomentumSGD(cost=cost,
                                           params=model.params,
                                           lr=float(args['--lr']),
                                           momentum_ratio=float(
                                               args['--momentum_ratio']))

        f_train = theano.function(inputs=[],
                                  outputs=cost,
                                  updates=optimizer.updates,
                                  givens={
                                      x: x_train,
                                      t: t_train,
                                      ul_x: x_test
                                  },
                                  on_unused_input='warn')
        f_nll_train = theano.function(inputs=[],
                                      outputs=nll,
                                      givens={
                                          x: x_train,
                                          t: t_train
                                      })
        f_nll_test = theano.function(inputs=[],
                                     outputs=nll,
                                     givens={
                                         x: x_test,
                                         t: t_test
                                     })
        f_error_train = theano.function(inputs=[],
                                        outputs=error,
                                        givens={
                                            x: x_train,
                                            t: t_train
                                        })
        f_error_test = theano.function(inputs=[],
                                       outputs=error,
                                       givens={
                                           x: x_test,
                                           t: t_test
                                       })
        if args['--monitoring_LDS']:
            LDS = costs.average_LDS_finite_diff(
                x,
                model.forward_test,
                main_obj_type='CE',
                epsilon=float(args['--epsilon']),
                norm_constraint=args['--norm_constraint'],
                num_power_iter=int(
                    args['--num_power_iter_for_monitoring_LDS']))
            f_LDS_train = theano.function(inputs=[],
                                          outputs=LDS,
                                          givens={x: x_train})
            f_LDS_test = theano.function(inputs=[],
                                         outputs=LDS,
                                         givens={x: x_test})
        f_lr_decay = theano.function(
            inputs=[],
            outputs=optimizer.lr,
            updates={
                optimizer.lr:
                theano.shared(
                    numpy.array(args['--learning_rate_decay']).astype(
                        theano.config.floatX)) * optimizer.lr
            })

        statuses = {
            'nll_train': [],
            'error_train': [],
            'nll_test': [],
            'error_test': []
        }
        if args['--monitoring_LDS']:
            statuses['LDS_train'] = []
            statuses['LDS_test'] = []

        statuses['nll_train'].append(f_nll_train())
        statuses['error_train'].append(f_error_train())
        statuses['nll_test'].append(f_nll_test())
        statuses['error_test'].append(f_error_test())
        if args['--monitoring_LDS']:
            statuses['LDS_train'].append(f_LDS_train())
            statuses['LDS_test'].append(f_LDS_test())
            # print("LDS_train : ", statuses['LDS_train'][-1], "LDS_test : ", statuses['LDS_test'][-1])
        for epoch in range(int(args['--num_epochs'])):
            f_train()
            if (epoch + 1) % 10 == 0:
                statuses['nll_train'].append(f_nll_train())
                statuses['error_train'].append(f_error_train())
                statuses['nll_test'].append(f_nll_test())
                statuses['error_test'].append(f_error_test())
                print("[Epoch]", str(epoch))
                print("nll_train : ", statuses['nll_train'][-1], "nll_test : ",
                      statuses['nll_test'][-1], "error_test : ",
                      statuses['error_test'][-1])
                if args['--monitoring_LDS']:
                    statuses['LDS_train'].append(f_LDS_train())
                    statuses['LDS_test'].append(f_LDS_test())
                    # print("LDS_train : ", statuses['LDS_train'][-1], "LDS_test : ", statuses['LDS_test'][-1])
            f_lr_decay()

        train_err_history += numpy.array(
            statuses['error_train']) * 1.0 / dataset[0][0][1].shape[0]
        test_err_history += numpy.array(
            statuses['error_test']) * 1.0 / dataset[0][1][1].shape[0]
        error_rate = statuses['error_test'][-1].item(
        ) * 1.0 / dataset[0][1][1].shape[0]
        # print("error rate", error_rate)
        if error_rate < best_error_rate:
            best_model = model
            best_error_rate = error_rate
        avg_error_rate += error_rate

    train_err_history /= exp
    test_err_history /= exp
    # saver.save_npy(train_err_history, "train_errrate")
    # saver.save_npy(test_err_history, "test_errrate")
    extra_info = "%s_%s_%s_%s" % (args['--epsilon'], args['--eps_w'],
                                  args['--n_eps_c'], args['--n_eps_w'])
    print("%s-avg error rate-%s-%g" %
          (extra_info, args['--save_filename'].split(".")[0],
           avg_error_rate / exp))
    print("%s-best error rate-%s-%g" %
          (extra_info, args['--save_filename'].split(".")[0], best_error_rate))
    make_sure_path_exists("./best_trained_model")
    cPickle.dump((best_model, statuses, args),
                 open('./best_trained_model/' + args['--save_filename'], 'wb'),
                 cPickle.HIGHEST_PROTOCOL)
コード例 #5
0
def train(args):
    print(args)
    numpy.random.seed(int(args['--seed']))

    dataset = load_data.load_mnist_for_semi_sup(n_l=int(args['--num_labeled_samples']),
                                                n_v=int(args['--num_validation_samples']))

    x_train, t_train, ul_x_train = dataset[0]
    x_test, t_test = dataset[2]

    layer_sizes = [int(layer_size) for layer_size in args['--layer_sizes'].split('-')]
    model = FNN_MNIST(layer_sizes=layer_sizes)

    x = t_func.matrix()
    ul_x = t_func.matrix()
    t = t_func.ivector()

    cost_semi = get_cost_type_semi(model, x, t, ul_x, args)
    nll = costs.cross_entropy_loss(x=x, t=t, forward_func=model.forward_test)
    error = costs.error(x=x, t=t, forward_func=model.forward_test)

    optimizer = optimizers.ADAM(cost=cost_semi, params=model.params, alpha=float(args['--initial_learning_rate']))

    index = t_func.iscalar()
    ul_index = t_func.iscalar()
    batch_size = int(args['--batch_size'])
    ul_batch_size = int(args['--ul_batch_size'])

    f_train = theano.function(inputs=[index, ul_index], outputs=cost_semi, updates=optimizer.updates,
                              givens={
                                  x: x_train[batch_size * index:batch_size * (index + 1)],
                                  t: t_train[batch_size * index:batch_size * (index + 1)],
                                  ul_x: ul_x_train[ul_batch_size * ul_index:ul_batch_size * (ul_index + 1)]},
                              on_unused_input='ignore')
    f_nll_train = theano.function(inputs=[index], outputs=nll,
                                  givens={
                                      x: x_train[batch_size * index:batch_size * (index + 1)],
                                      t: t_train[batch_size * index:batch_size * (index + 1)]})
    f_nll_test = theano.function(inputs=[index], outputs=nll,
                                 givens={
                                     x: x_test[batch_size * index:batch_size * (index + 1)],
                                     t: t_test[batch_size * index:batch_size * (index + 1)]})

    f_error_train = theano.function(inputs=[index], outputs=error,
                                    givens={
                                        x: x_train[batch_size * index:batch_size * (index + 1)],
                                        t: t_train[batch_size * index:batch_size * (index + 1)]})
    f_error_test = theano.function(inputs=[index], outputs=error,
                                   givens={
                                       x: x_test[batch_size * index:batch_size * (index + 1)],
                                       t: t_test[batch_size * index:batch_size * (index + 1)]})

    f_lr_decay = theano.function(inputs=[], outputs=optimizer.alpha,
                                 updates={optimizer.alpha: theano.shared(
                                     numpy.array(args['--learning_rate_decay']).astype(
                                         theano.config.floatX)) * optimizer.alpha})

    # Shuffle training set
    randix = RandomStreams(seed=numpy.random.randint(1234)).permutation(n=x_train.shape[0])
    update_permutation = OrderedDict()
    update_permutation[x_train] = x_train[randix]
    update_permutation[t_train] = t_train[randix]
    f_permute_train_set = theano.function(inputs=[], outputs=x_train, updates=update_permutation)

    # Shuffle unlabeled training set
    ul_randix = RandomStreams(seed=numpy.random.randint(1234)).permutation(n=ul_x_train.shape[0])
    update_ul_permutation = OrderedDict()
    update_ul_permutation[ul_x_train] = ul_x_train[ul_randix]
    f_permute_ul_train_set = theano.function(inputs=[], outputs=ul_x_train, updates=update_ul_permutation)

    statuses = {'nll_train': [], 'error_train': [], 'nll_test': [], 'error_test': []}

    n_train = x_train.get_value().shape[0]
    n_test = x_test.get_value().shape[0]
    n_ul_train = ul_x_train.get_value().shape[0]

    l_i = 0
    ul_i = 0
    for epoch in range(int(args['--num_epochs'])):
        # cPickle.dump((statuses, args), open('./trained_model/' + 'tmp-' + args['--save_filename'], 'wb'),
        #              cPickle.HIGHEST_PROTOCOL)
        f_permute_train_set()
        f_permute_ul_train_set()
        for it in range(int(args['--num_batch_it'])):
            f_train(l_i, ul_i)
            l_i = 0 if l_i >= n_train / batch_size - 1 else l_i + 1
            ul_i = 0 if ul_i >= n_ul_train / ul_batch_size - 1 else ul_i + 1

        sum_nll_train = numpy.sum(numpy.array([f_nll_train(i) for i in range(int(n_train / batch_size))])) * batch_size
        sum_error_train = numpy.sum(numpy.array([f_error_train(i) for i in range(int(n_train / batch_size))]))
        sum_nll_test = numpy.sum(numpy.array([f_nll_test(i) for i in range(int(n_test / batch_size))])) * batch_size
        sum_error_test = numpy.sum(numpy.array([f_error_test(i) for i in range(int(n_test / batch_size))]))
        statuses['nll_train'].append(sum_nll_train / n_train)
        statuses['error_train'].append(sum_error_train)
        statuses['nll_test'].append(sum_nll_test / n_test)
        statuses['error_test'].append(sum_error_test)
        wlog("[Epoch] %d" % epoch)
        acc = 1 - 1.0*statuses['error_test'][-1]/n_test
        wlog("nll_test : %f error_test : %d accuracy:%f" % (statuses['nll_test'][-1], statuses['error_test'][-1], acc))
        # writer.add_scalar("Test/Loss", statuses['nll_test'][-1], epoch * int(args['--num_batch_it']))
        # writer.add_scalar("Test/Acc", acc, epoch * int(args['--num_batch_it']))
        f_lr_decay()
    # fine_tune batch stat
    f_fine_tune = theano.function(inputs=[ul_index], outputs=model.forward_for_finetuning_batch_stat(x),
                                  givens={x: ul_x_train[ul_batch_size * ul_index:ul_batch_size * (ul_index + 1)]})
    [f_fine_tune(i) for i in range(n_ul_train // ul_batch_size)]

    sum_nll_test = numpy.sum(numpy.array([f_nll_test(i) for i in range(n_test // batch_size)])) * batch_size
    sum_error_test = numpy.sum(numpy.array([f_error_test(i) for i in range(n_test // batch_size)]))
    statuses['nll_test'].append(sum_nll_test / n_test)
    statuses['error_test'].append(sum_error_test)
    acc = 1 - 1.0*statuses['error_test'][-1]/n_test
    wlog("final nll_test: %f error_test: %d accuracy:%f" % (statuses['nll_test'][-1], statuses['error_test'][-1], acc))
コード例 #6
0
ファイル: train_syn.py プロジェクト: ilovecv/vat
def train(args):

    print args

    numpy.random.seed(1)

    dataset = cPickle.load(open('dataset/' + args['--dataset_filename']))
    x_train = theano.shared(numpy.asarray(dataset[0][0][0],dtype=theano.config.floatX))
    t_train =  theano.shared(numpy.asarray(dataset[0][0][1],dtype='int32'))
    x_test =  theano.shared(numpy.asarray(dataset[0][1][0],dtype=theano.config.floatX))
    t_test =  theano.shared(numpy.asarray(dataset[0][1][1],dtype='int32'))

    if(args['--cost_type']=='dropout'):
        model = FNN_syn_dropout(drate=float(args['--dropout_rate']))
    else:
        model = FNN_syn()
    x = T.matrix()
    t = T.ivector()

    if(args['--cost_type']=='MLE' or args['--cost_type']=='dropout'):
        cost = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_train)
    elif(args['--cost_type']=='L2'):
        cost = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_train) \
               + costs.weight_decay(params=model.params,coeff=float(args['--lamb']))
    elif(args['--cost_type']=='AT'):
        cost = costs.adversarial_training(x,t,model.forward_train,
                                              'CE',
                                              epsilon=float(args['--epsilon']),
                                              lamb=float(args['--lamb']),
                                              norm_constraint = args['--norm_constraint'])
    elif(args['--cost_type']=='VAT'):
        cost = costs.virtual_adversarial_training(x,t,model.forward_train,
                                              'CE',
                                              epsilon=float(args['--epsilon']),
                                              norm_constraint = args['--norm_constraint'],
                                              num_power_iter = int(args['--num_power_iter']))
    elif(args['--cost_type']=='VAT_finite_diff'):
        cost = costs.virtual_adversarial_training_finite_diff(x,t,model.forward_train,
                                              'CE',
                                              epsilon=float(args['--epsilon']),
                                              norm_constraint = args['--norm_constraint'],
                                              num_power_iter = int(args['--num_power_iter']))
    nll = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_test)
    error = costs.error(x=x,t=t,forward_func=model.forward_test)

    optimizer = optimizers.MomentumSGD(cost=cost,params=model.params,lr=float(args['--initial_learning_rate']),
                                       momentum_ratio=float(args['--momentum_ratio']))

    f_train = theano.function(inputs=[], outputs=cost, updates=optimizer.updates,
                              givens={
                                  x:x_train,
                                  t:t_train})
    f_nll_train = theano.function(inputs=[], outputs=nll,
                              givens={
                                  x:x_train,
                                  t:t_train})
    f_nll_test = theano.function(inputs=[], outputs=nll,
                              givens={
                                  x:x_test,
                                  t:t_test})

    f_error_train = theano.function(inputs=[], outputs=error,
                              givens={
                                  x:x_train,
                                  t:t_train})
    f_error_test = theano.function(inputs=[], outputs=error,
                              givens={
                                  x:x_test,
                                  t:t_test})
    if(args['--monitoring_LDS']):
        LDS = costs.average_LDS_finite_diff(x,
                        model.forward_test,
                        main_obj_type='CE',
                        epsilon=float(args['--epsilon']),
                        norm_constraint = args['--norm_constraint'],
                        num_power_iter = int(args['--num_power_iter_for_monitoring_LDS']))
        f_LDS_train = theano.function(inputs=[], outputs=LDS,
                              givens={
                                  x:x_train})
        f_LDS_test = theano.function(inputs=[], outputs=LDS,
                              givens={
                                  x:x_test})
    f_lr_decay = theano.function(inputs=[],outputs=optimizer.lr,
                                 updates={optimizer.lr:theano.shared(numpy.array(args['--learning_rate_decay']).astype(theano.config.floatX))*optimizer.lr})


    statuses = {}
    statuses['nll_train'] = []
    statuses['error_train'] = []
    statuses['nll_test'] = []
    statuses['error_test'] = []
    if(args['--monitoring_LDS']==True):
        statuses['LDS_train'] = []
        statuses['LDS_test'] = []

    statuses['nll_train'].append(f_nll_train())
    statuses['error_train'].append(f_error_train())
    statuses['nll_test'].append(f_nll_test())
    statuses['error_test'].append(f_error_test())
    print "[Epoch]",str(0)
    print  "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
            "nll_test : " , statuses['nll_test'][-1],  "error_test : ", statuses['error_test'][-1]
    if(args['--monitoring_LDS']):
        statuses['LDS_train'].append(f_LDS_train())
        statuses['LDS_test'].append(f_LDS_test())
        print "LDS_train : ", statuses['LDS_train'][-1], "LDS_test : " , statuses['LDS_test'][-1]

    print "training..."

    for epoch in xrange(int(args['--num_epochs'])):
        train_cost = f_train()
        if((epoch+1)%20==0):
            statuses['nll_train'].append(f_nll_train())
            statuses['error_train'].append(f_error_train())
            statuses['nll_test'].append(f_nll_test())
            statuses['error_test'].append(f_error_test())
            print "[Epoch]",str(epoch)
            print  "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
                    "nll_test : " , statuses['nll_test'][-1],  "error_test : ", statuses['error_test'][-1]
            if(args['--monitoring_LDS']):
                statuses['LDS_train'].append(f_LDS_train())
                statuses['LDS_test'].append(f_LDS_test())
                print "LDS_train : ", statuses['LDS_train'][-1], "LDS_test : " , statuses['LDS_test'][-1]

        f_lr_decay()
    make_sure_path_exists("./trained_model")
    cPickle.dump((model,statuses,args),open('./trained_model/'+args['--save_filename'],'wb'),cPickle.HIGHEST_PROTOCOL)
コード例 #7
0
def train(args):

    print args

    numpy.random.seed(1)

    dataset = cPickle.load(open('dataset/' + args['--dataset_filename']))
    x_train = theano.shared(
        numpy.asarray(dataset[0][0][0], dtype=theano.config.floatX))
    t_train = theano.shared(numpy.asarray(dataset[0][0][1], dtype='int32'))
    x_test = theano.shared(
        numpy.asarray(dataset[0][1][0], dtype=theano.config.floatX))
    t_test = theano.shared(numpy.asarray(dataset[0][1][1], dtype='int32'))

    if (args['--cost_type'] == 'dropout'):
        model = FNN_syn_dropout(drate=float(args['--dropout_rate']))
    else:
        model = FNN_syn()
    x = T.matrix()
    t = T.ivector()

    if (args['--cost_type'] == 'MLE' or args['--cost_type'] == 'dropout'):
        cost = costs.cross_entropy_loss(x=x,
                                        t=t,
                                        forward_func=model.forward_train)
    elif (args['--cost_type'] == 'L2'):
        cost = costs.cross_entropy_loss(x=x,t=t,forward_func=model.forward_train) \
               + costs.weight_decay(params=model.params,coeff=float(args['--lamb']))
    elif (args['--cost_type'] == 'AT'):
        cost = costs.adversarial_training(
            x,
            t,
            model.forward_train,
            'CE',
            epsilon=float(args['--epsilon']),
            lamb=float(args['--lamb']),
            norm_constraint=args['--norm_constraint'])
    elif (args['--cost_type'] == 'VAT'):
        cost = costs.virtual_adversarial_training(
            x,
            t,
            model.forward_train,
            'CE',
            epsilon=float(args['--epsilon']),
            norm_constraint=args['--norm_constraint'],
            num_power_iter=int(args['--num_power_iter']))
    elif (args['--cost_type'] == 'VAT_finite_diff'):
        cost = costs.virtual_adversarial_training_finite_diff(
            x,
            t,
            model.forward_train,
            'CE',
            epsilon=float(args['--epsilon']),
            norm_constraint=args['--norm_constraint'],
            num_power_iter=int(args['--num_power_iter']))
    nll = costs.cross_entropy_loss(x=x, t=t, forward_func=model.forward_test)
    error = costs.error(x=x, t=t, forward_func=model.forward_test)

    optimizer = optimizers.MomentumSGD(
        cost=cost,
        params=model.params,
        lr=float(args['--initial_learning_rate']),
        momentum_ratio=float(args['--momentum_ratio']))

    f_train = theano.function(inputs=[],
                              outputs=cost,
                              updates=optimizer.updates,
                              givens={
                                  x: x_train,
                                  t: t_train
                              })
    f_nll_train = theano.function(inputs=[],
                                  outputs=nll,
                                  givens={
                                      x: x_train,
                                      t: t_train
                                  })
    f_nll_test = theano.function(inputs=[],
                                 outputs=nll,
                                 givens={
                                     x: x_test,
                                     t: t_test
                                 })

    f_error_train = theano.function(inputs=[],
                                    outputs=error,
                                    givens={
                                        x: x_train,
                                        t: t_train
                                    })
    f_error_test = theano.function(inputs=[],
                                   outputs=error,
                                   givens={
                                       x: x_test,
                                       t: t_test
                                   })
    if (args['--monitoring_LDS']):
        LDS = costs.average_LDS_finite_diff(
            x,
            model.forward_test,
            main_obj_type='CE',
            epsilon=float(args['--epsilon']),
            norm_constraint=args['--norm_constraint'],
            num_power_iter=int(args['--num_power_iter_for_monitoring_LDS']))
        f_LDS_train = theano.function(inputs=[],
                                      outputs=LDS,
                                      givens={x: x_train})
        f_LDS_test = theano.function(inputs=[],
                                     outputs=LDS,
                                     givens={x: x_test})
    f_lr_decay = theano.function(
        inputs=[],
        outputs=optimizer.lr,
        updates={
            optimizer.lr:
            theano.shared(
                numpy.array(args['--learning_rate_decay']).astype(
                    theano.config.floatX)) * optimizer.lr
        })

    statuses = {}
    statuses['nll_train'] = []
    statuses['error_train'] = []
    statuses['nll_test'] = []
    statuses['error_test'] = []
    if (args['--monitoring_LDS'] == True):
        statuses['LDS_train'] = []
        statuses['LDS_test'] = []

    statuses['nll_train'].append(f_nll_train())
    statuses['error_train'].append(f_error_train())
    statuses['nll_test'].append(f_nll_test())
    statuses['error_test'].append(f_error_test())
    print "[Epoch]", str(0)
    print  "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
            "nll_test : " , statuses['nll_test'][-1],  "error_test : ", statuses['error_test'][-1]
    if (args['--monitoring_LDS']):
        statuses['LDS_train'].append(f_LDS_train())
        statuses['LDS_test'].append(f_LDS_test())
        print "LDS_train : ", statuses['LDS_train'][
            -1], "LDS_test : ", statuses['LDS_test'][-1]

    print "training..."

    for epoch in xrange(int(args['--num_epochs'])):
        train_cost = f_train()
        if ((epoch + 1) % 20 == 0):
            statuses['nll_train'].append(f_nll_train())
            statuses['error_train'].append(f_error_train())
            statuses['nll_test'].append(f_nll_test())
            statuses['error_test'].append(f_error_test())
            print "[Epoch]", str(epoch)
            print  "nll_train : " , statuses['nll_train'][-1], "error_train : ", statuses['error_train'][-1], \
                    "nll_test : " , statuses['nll_test'][-1],  "error_test : ", statuses['error_test'][-1]
            if (args['--monitoring_LDS']):
                statuses['LDS_train'].append(f_LDS_train())
                statuses['LDS_test'].append(f_LDS_test())
                print "LDS_train : ", statuses['LDS_train'][
                    -1], "LDS_test : ", statuses['LDS_test'][-1]

        f_lr_decay()
    make_sure_path_exists("./trained_model")
    cPickle.dump((model, statuses, args),
                 open('./trained_model/' + args['--save_filename'], 'wb'),
                 cPickle.HIGHEST_PROTOCOL)
コード例 #8
0
ファイル: test_mnist.py プロジェクト: ilovecv/vat
import cPickle

from load_data import load_mnist_full
import theano
import theano.tensor as T

from source.costs import error

if __name__ == '__main__':
    args = docopt(__doc__)


    m_batch_size = 100
    dataset = load_mnist_full()
    test_set_x,test_set_y = dataset[1]
    n_test_batches = numpy.ceil((test_set_x.get_value(borrow=True).shape[0]) / numpy.float(m_batch_size))

    trained_model = cPickle.load(open("trained_model/" + args['--load_filename'],'rb'))[0]

    index = T.iscalar()
    x = T.matrix()
    t = T.ivector()
    test_error = theano.function(inputs=[index],
                                       outputs=error(x=x,t=t,forward_func=trained_model.forward_test),
                                       givens={
                                           x: test_set_x[m_batch_size * index:m_batch_size * (index + 1)],
                                           t: test_set_y[m_batch_size * index:m_batch_size * (index + 1)]}
                                       )

    test_errors = [test_error(i) for i in xrange(numpy.int(numpy.ceil(n_test_batches)))]
    print "the number of misclassified examples on test set:" + str(numpy.sum(test_errors)) + ", and test error rate(%):" + str(100*numpy.sum(test_errors)/numpy.float(test_set_x.get_value(borrow=True).shape[0]))