Ejemplo n.º 1
0
def main():
    # ------------- Dataset -------------
    from textmenu import textmenu
    datasets = get_dataset.all_names()
    indx = textmenu(datasets)
    if indx is None:
        return
    dataset = datasets[indx]
    #x_tra, y_tra, x_val, y_val = get_dataset.get_dataset(dataset)
    model_name_suffix = dataset

    # default batch size
    batch_size = 50

    # params
    if dataset == 'mnist':
        n_layers = 1
        n_total_inter_dims = 1024
        lr = 1e-4
        dataset = get_dataset.MNISTDataset()
        dims = dataset.dims
        mean_type = tf.nn.relu
        loss_type = tf.nn.softmax_cross_entropy_with_logits
        opt_type = tf.train.AdamOptimizer
        eval_type = multi_clf_err
        #utils_params = {'res_inter_dim': n_total_inter_dims // n_layers, 'mean_type': mean_type}
        #utils_type = MNISTAnytimeNNUtils
        utils_params = {
            'image_side': 28,
            'image_channels': 1,
            'width': 4,
            'channels': [8, 16],
            'strides': [2, 2],
            'mean_type': mean_type,
            'weak_predictions': 'row_sum'
        }
        utils_type = ImageAnytimeNN2DUtils
    elif dataset == 'cifar':
        lr = 0.1
        batch_size = 128
        dataset = get_dataset.CIFARDatasetTensorflow(batch_size=batch_size)
        dims = dataset.dims
        mean_type = tf.nn.relu
        loss_type = tf.nn.softmax_cross_entropy_with_logits
        opt_type = lambda lr: tf.train.MomentumOptimizer(lr, momentum=0.9)
        eval_type = multi_clf_err

        def build_resnet_params(n=3, init_total_channel=32, width=2):
            channel = init_total_channel / width
            channels = [channel]
            layer_type = ['conv']
            res_add = [False]
            conv_kernel = [3]
            pool_kernel = [1]
            strides = [1]
            for i in range(3):  # feat map size shrink 4 times.
                for j in range(n *
                               2):  # at each channel size, there are 2n conv
                    channels.append(channel)
                    layer_type.append('conv')
                    conv_kernel.append(3)
                    res_add.append(j % 2 == 1)
                    if channels[-1] != channels[-2]:
                        strides.append(2)
                        pool_kernel.append(2)
                    else:
                        strides.append(1)
                        pool_kernel.append(1)
                channel *= 2

            return {
                'width': width,
                'channels': channels,
                'layer_type': layer_type,
                'res_add': res_add,
                'conv_kernel': conv_kernel,
                'pool_kernel': pool_kernel,
                'strides': strides
            }

        utils_params = build_resnet_params()
        utils_params.update({
            'mean_type': mean_type,
            'weak_predictions': 'row_sum',
            'pred_sum_gamma': 1.0
        })
        utils_type = ImageAnytimeNN2DUtils

    # ann = AnytimeNeuralNet(n_layers, dims, utils_type, loss_type, \
    #                       opt_type, eval_type, utils_params)
    ann = AnytimeNeuralNet2D(dims, utils_type, loss_type, opt_type, eval_type,
                             utils_params)

    # Model saving paths.
    model_dir = '../model/'
    if not os.path.isdir(model_dir):
        os.mkdir(model_dir)
    best_model_fname = 'best_model_{}.ckpt'.format(model_name_suffix)
    init_model_fname = 'initial_model_{}.ckpt'.format(model_name_suffix)
    best_model_path = os.path.join(model_dir, best_model_fname)
    init_model_path = os.path.join(model_dir, init_model_fname)
    tf.train.SummaryWriter(logdir='../log/', graph=tf.get_default_graph())

    # sessions and initialization
    init = tf.initialize_all_variables()
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
    print 'Initializing...'
    sess.run(init)
    tf.train.start_queue_runners(sess=sess)
    print 'Initialization done'

    # Signal Handling:
    shandler = SignalHandler()

    # training epochs
    val_interval = 48000
    max_epoch = 2000
    lr_decay_step = 80  # Never decays for adam?
    lr_decay_gamma = 0.1
    t = 0
    last_epoch = -1

    # load saved_model HACK/TODO
    restore_model_path = None
    #restore_model_path = '../model/best_model_cifar.ckpt'
    #ann.saver.restore(sess, restore_model_path)
    while dataset.epoch < max_epoch:
        if last_epoch != dataset.epoch:
            print("-----Epoch {:d}-----\n".format(dataset.epoch))
            last_epoch = dataset.epoch
            if dataset.epoch > 0 and dataset.epoch % lr_decay_step == 0:
                lr *= lr_decay_gamma

        x, y = dataset.next_batch(batch_size, sess)
        actual_batch_size = x.shape[0]

        print_sameline('...epoch={},t={}'.format(dataset.epoch, t))
        sess.run(ann.training(method='last'),
                 feed_dict=ann.fill_feed_dict(x, y, lr))

        # Evaluate
        t += actual_batch_size
        if t >= val_interval:
            t = 0
        if t % val_interval == 0 or (dataset.epoch == 0
                                     and t == actual_batch_size
                                     and restore_model_path is not None):
            print '\n'
            #n_tra_eval_samples = 1000
            #n_tra_eval_batches = n_tra_eval_samples // batch_size
            ##x_tra_samples, y_tra_samples = dataset.sample_training(n_tra_eval_samples)
            #l_last_loss_tra = []
            #l_eval_tra = []
            #for tra_eval_i in range(n_tra_eval_batches):
            #    #indx_s = tra_eval_i * batch_size
            #    #indx_e = indx_s + batch_size
            #    x_tra_samples, y_tra_samples = dataset.sample_training(batch_size, sess)
            #    last_loss_tra, eval_tra = \
            #        sess.run([ann.last_loss(), ann.evaluation()],
            #                 feed_dict=ann.fill_feed_dict(x_tra_samples,  # [indx_s:indx_e],
            #                                              y_tra_samples, lr))  # [indx_s:indx_e], lr))
            #    assert(not np.isnan(last_loss_tra))
            #    l_last_loss_tra.append(last_loss_tra)
            #    l_eval_tra.append(eval_tra)
            #last_loss_tra = np.mean(l_last_loss_tra)
            #eval_tra = np.mean(l_eval_tra)

            #ps_losses = sess.run(ann.losses, feed_dict=ann.fill_feed_dict(x_tra_samples, y_tra_samples,lr))
            # plt.figure(1)
            # plt.clf()
            #plt.plot(np.arange(len(ps_losses)), np.log(ps_losses))
            #plt.title('log scale loss vs. learner id')
            # plt.draw()
            # plt.show(block=False)

            n_val = 5000
            n_val_batches = n_val // batch_size
            l_loss_val = []
            l_eval_val = []
            for vali in range(n_val_batches):
                x_val, y_val = dataset.next_test(batch_size, sess)
                run_ret = sess.run(ann.losses + ann.anytime_eval_results,
                                   feed_dict=ann.fill_feed_dict(
                                       x_val, y_val, lr))
                n_ret = len(run_ret) // 2
                losses = run_ret[:n_ret]
                evals = run_ret[n_ret:]

                assert (not np.isnan(losses[-1]))
                l_loss_val.append(losses)
                l_eval_val.append(evals)
            evals_val = np.mean(np.array(l_eval_val), axis=0)
            losses_val = np.mean(np.array(l_loss_val), axis=0)

            print 'epoch={},t={} \n evals_val:'.format(dataset.epoch, t)
            print evals_val
            print 'losses_val:'
            print losses_val
            #print 'last_loss_tra={} eval_tra={}\n'.format(last_loss_tra, eval_tra)
        # ENDIF evaluation
        if shandler.captured():
            print("----------------------")
            print(
                "Paused. Set parameters before loading the initial model again..."
            )
            print("----------------------")
            # helper functions
            save_model = lambda fname: ann.saver.save(sess, fname)
            save_best = partial(save_model, best_model_path)
            save_init = partial(save_model, init_model_path)
            pdb.set_trace()
            ann.saver.restore(sess, init_model_path)
            dataset.epoch = 0
            shandler.reset()
        # ENDIF handler of signals
    # end of epoch checks, so save out the results so far

    pdb.set_trace()
    return 0
Ejemplo n.º 2
0

def dbfname_to_plot_points(fname, N, traval, col, Kdb=None):
    if Kdb is None:
        Kdb = 50
    d = np.load(fname)
    d_n_preds = deepboost_n_samples_to_n_preds(N, d[traval][:, 0])
    Kd = d[traval].shape[0] // Kdb
    d_select_indices = np.arange(0, d_n_preds.shape[0], Kd) + Kd - 1
    if d_select_indices[-1] > d_n_preds.shape[0] - 1:
        d_select_indices[-1] = d_n_preds.shape[0] - 1
    return d_n_preds[d_select_indices], average_end_at(d[traval][:, col],
                                                       d_select_indices, 5)


datasets = get_dataset.all_names()
indx = textmenu(datasets)
if indx is None:
    exit(0)
dataset = datasets[indx]
x_tra, y_tra, x_val, y_val = get_dataset.get_dataset(dataset)
model_name_suffix = dataset

Kdb = None
if dataset == 'a9a':
    n_nodes = 8
    col = 1
elif dataset == 'mnist':
    n_nodes = 5
    cost_multiplier = 1
    col = 1
Ejemplo n.º 3
0
def main(_):
    from textmenu import textmenu
    # ------------- Dataset -------------
    datasets = get_dataset.all_names()
    indx = textmenu(datasets)
    if indx is None:
        return
    dataset = datasets[indx]
    x_tra, y_tra, x_val, y_val = get_dataset.get_dataset(dataset)
    model_name_suffix = dataset
    if dataset == 'arun_1d':
        n_nodes = [200, 1]
        n_lvls = len(n_nodes)
        mean_types = [sigmoid_clf_mean for lvl in range(n_lvls - 1)]
        mean_types.append(lambda x: x)
        loss_types = [square_loss_eltws for lvl in range(n_lvls - 1)]
        #loss_types = [logistic_loss_eltws for lvl in range(n_lvls-1) ]
        loss_types.append(square_loss_eltws)
        opt_types = [tf.train.AdamOptimizer for lvl in range(n_lvls)]
        eval_type = None

        weak_learner_params = {'type': 'linear'}

        lr_boost_adam = 0.3 * 1e-2
        lr_leaf_adam = 0.3 * 1e-1
        lr_decay_step = x_tra.shape[0] * 10
        ps_ws_val = 1.0
        reg_lambda = 0.0

    elif dataset == 'mnist':
        n_nodes = [10, 1]
        n_lvls = len(n_nodes)
        mean_types = [tf.nn.relu for lvl in range(n_lvls - 1)]
        mean_types.append(lambda x: x)
        loss_types = [logistic_loss_eltws for lvl in range(n_lvls - 1)]
        loss_types.append(tf.nn.softmax_cross_entropy_with_logits)

        opt_types = [tf.train.AdamOptimizer for lvl in range(n_lvls)]
        eval_type = multi_clf_err

        weak_learner_params = {
            'type': 'conv',
            'conv_size': [5, 5],
            'stride': [2, 2]
        }

        # mnist lr
        lr_boost_adam = 1e-8
        lr_leaf_adam = 1e-3
        lr_decay_step = x_tra.shape[0] * 5
        ps_ws_val = 1.0
        reg_lambda = 0.0

    elif dataset == 'grasp_hog':
        n_nodes = [32, 1]
        n_lvls = len(n_nodes)
        mean_types = [sigmoid_clf_mean for lvl in range(n_lvls - 1)]
        mean_types.append(lambda x: x)
        loss_types = [logistic_loss_eltws for lvl in range(n_lvls - 1)]
        loss_types.append(logistic_loss_eltws_masked)

        opt_types = [tf.train.AdamOptimizer for lvl in range(n_lvls)]
        eval_type = multi_clf_err_masked
        weak_learner_params = {'type': 'linear'}

        lr_boost_adam = 1e-3
        lr_leaf_adam = 1e-2
        lr_decay_step = x_tra.shape[0] * 3
        ps_ws_val = 1.0
        reg_lambda = 0.0

    elif dataset == 'cifar':
        n_nodes = [50, 1]
        n_lvls = len(n_nodes)
        mean_types = [tf.sin for lvl in range(n_lvls - 1)]
        mean_types.append(lambda x: x)
        loss_types = [square_loss_eltws for lvl in range(n_lvls - 1)]
        loss_types.append(tf.nn.softmax_cross_entropy_with_logits)

        #opt_types =  [ tf.train.GradientDescentOptimizer for lvl in range(n_lvls) ]
        opt_types = [tf.train.AdamOptimizer for lvl in range(n_lvls)]
        eval_type = multi_clf_err

        # Use all train and test:
        x_tra = x_all
        y_tra = y_all
        yp_tra = yp_all
        x_val = x_test
        y_val = y_test
        yp_val = yp_test

        train_set = list(range(x_tra.shape[0]))

        weak_learner_params = {'type': 'linear'}

        # cifar lr
        lr_boost_adam = 1e-3
        lr_leaf_adam = 1e-2
        lr_decay_step = x_tra.shape[0] * 3
        ps_ws_val = 0.5
        reg_lambda = 0
    else:
        raise Exception('Did not recognize datset: {}'.format(dataset))

    train_set = list(range(x_tra.shape[0]))

    input_dim = len(x_val[0].ravel())
    output_dim = len(y_val[0].ravel())

    dims = [output_dim for _ in xrange(n_lvls + 2)]
    dims[0] = input_dim
    dims[1] = input_dim  # TODO do it in better style

    lr_boost = lr_boost_adam
    lr_leaf = lr_leaf_adam

    # modify the default tensorflow graph.
    weak_classification = False
    dbg = TFDeepBoostGraph(dims, n_nodes, weak_classification, mean_types,
                           loss_types, opt_types, weak_learner_params,
                           eval_type)

    init = tf.initialize_all_variables()
    #sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.20)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
    print 'Initializing...'
    sess.run(init)
    print 'Initialization done'

    t = 0
    # As we can waste an epoch with the line search, max_epoch will be incremented when a line search
    # is done. However, to prevent infintie epochs, we set an ultimatum on the number of epochs
    # (max_epoch_ult) that stops this.
    epoch = -1
    max_epoch = 100
    max_epoch_ult = max_epoch * 2
    batch_size = 64
    val_interval = batch_size * 10

    # if line search, these will shrink learning rate until result improves.
    do_line_search = False
    min_non_ls_epochs = 4  # min. number of epochs in the beginning where we don't do line search
    gamma_boost = 0.7
    gamma_leaf = 0.7
    # linesearch variables
    worsen_cnt = 0
    best_avg_loss = np.Inf
    restore_threshold = len(train_set) / val_interval

    # Model saving paths.
    model_dir = '../model/'
    if not os.path.isdir(model_dir):
        os.mkdir(model_dir)
    best_model_fname = 'best_model_{}.ckpt'.format(model_name_suffix)
    init_model_fname = 'initial_model_{}.ckpt'.format(model_name_suffix)
    best_model_path = os.path.join(model_dir, best_model_fname)
    init_model_path = os.path.join(model_dir, init_model_fname)
    dbg.saver.save(sess, init_model_path)

    tf.train.SummaryWriter(logdir='../log/', graph=tf.get_default_graph())

    stop_program = False
    lr_gamma = 0.3
    lr_global_step = 0

    # Total number of samples
    global_step = 0
    tra_err = []
    val_err = []

    while not stop_program and epoch < max_epoch and epoch < max_epoch_ult:
        epoch += 1
        print("-----Epoch {:d}-----".format(epoch))
        np.random.shuffle(train_set)
        for si in range(0, len(train_set), batch_size):
            # print 'train epoch={}, start={}'.format(epoch, si)
            si_end = min(si + batch_size, len(train_set))
            x = x_tra[train_set[si:si_end]]
            y = y_tra[train_set[si:si_end]]

            if dbg.sigint_capture == True:
                # don't do any work this iteration, restart all computation with the next
                break
            n_applies = len(dbg.training_update())
            sess.run(dbg.training(),
                     feed_dict=dbg.fill_feed_dict(x, y, lr_boost, lr_leaf,
                                                  ps_ws_val, reg_lambda))

            # Evaluate
            t += si_end - si
            if si_end - si < batch_size:
                t = 0
            lr_global_step += si_end - si
            global_step += si_end - si
            if lr_global_step > lr_decay_step:
                lr_global_step -= lr_decay_step
                lr_boost *= lr_gamma
                lr_leaf *= lr_gamma
                print("----------------------")
                print('Decayed step size: lr_boost={:.3g}, lr_leaf={:.3g}'.
                      format(lr_boost, lr_leaf))
                print("----------------------")
            if t % val_interval == 0:
                preds_tra, avg_loss_tra, avg_tgt_loss_tra =\
                    sess.run([dbg.inference(), dbg.evaluation(), dbg.evaluation(loss=True)],
                             feed_dict=dbg.fill_feed_dict(x_tra[:5000], y_tra[:5000],
                                                          lr_boost, lr_leaf, ps_ws_val, reg_lambda))
                assert (not np.isnan(avg_loss_tra))
                preds, avg_loss, avg_tgt_loss = sess.run(
                    [
                        dbg.inference(),
                        dbg.evaluation(),
                        dbg.evaluation(loss=True)
                    ],
                    feed_dict=dbg.fill_feed_dict(x_val, y_val, lr_boost,
                                                 lr_leaf, ps_ws_val,
                                                 reg_lambda))
                assert (not np.isnan(avg_loss))

                tra_err.append((global_step, avg_loss_tra, avg_tgt_loss_tra))
                val_err.append((global_step, avg_loss, avg_tgt_loss))

                # Plotting the fit.
                if dataset == 'arun_1d':
                    weak_predictions = sess.run(dbg.weak_learner_inference(),
                                                feed_dict=dbg.fill_feed_dict(
                                                    x_val, y_val, lr_boost,
                                                    lr_leaf, ps_ws_val,
                                                    reg_lambda))
                    tgts = sess.run(dbg.ll_nodes[-1][0].children_tgts[2:],
                                    feed_dict=dbg.fill_feed_dict(
                                        x_val, y_val, lr_boost, lr_leaf,
                                        ps_ws_val, reg_lambda))
                    plt.figure(1)
                    plt.clf()
                    plt.plot(x_val, y_val, lw=3, color='green', label='GT')
                    for wi, wpreds in enumerate(weak_predictions):
                        plt.plot(x_val, -wpreds, label=str(wi))
                    # for wi, tgt in enumerate(tgts):
                    #  plt.plot(x_val, tgt, label=str(wi))
                    plt.plot(x_val, preds, lw=3, color='blue', label='Yhat')
                    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
                    plt.title('boostprop')
                    plt.tight_layout()
                    plt.draw()
                    plt.show(block=False)
                print 'epoch={},t={} \n avg_loss={} avg_tgt_loss={} \n loss_tra={} tgt_loss_tra={}'.format(
                    epoch, t, avg_loss, avg_tgt_loss, avg_loss_tra,
                    avg_tgt_loss_tra)

                if epoch < min_non_ls_epochs:
                    continue

                if do_line_search:
                    # restores if is worse than the best multiple times
                    if avg_loss > best_avg_loss:
                        worsen_cnt += 1
                        if worsen_cnt > restore_threshold:
                            print 'Restore to previous best loss: {}'.format(
                                best_avg_loss)
                            dbg.saver.restore(sess, best_model_path)
                            worsen_cnt = 0
                            max_epoch += 1
                            lr_boost *= gamma_boost
                            lr_leaf *= gamma_leaf
                    else:
                        worsen_cnt = 0
                        lr_boost = lr_boost_adam
                        lr_leaf = lr_leaf_adam
                        dbg.saver.save(sess, best_model_path)
                        best_avg_loss = avg_loss
        # endfor
        # end of epoch, so save out the results so far
        np.savez('../log/err_vs_gstep_fr_{:s}.npz'.format(model_name_suffix),
                 tra_err=np.asarray(tra_err),
                 val_err=np.asarray(val_err))
        if dbg.sigint_capture == True:
            print("----------------------")
            print(
                "Paused. Set parameters before loading the initial model again..."
            )
            print("----------------------")
            # helper functions
            save_model = lambda fname: dbg.saver.save(sess, fname)
            save_best = partial(save_model, best_model_path)
            save_init = partial(save_model, init_model_path)
            pdb.set_trace()
            dbg.saver.restore(sess, init_model_path)
            epoch = -1
            t = 0
            dbg.sigint_capture = False
    # endfor

    print("Program Finished")
    np.savez('../log/err_vs_gstep_{:s}.npz'.format(model_name_suffix),
             tra_err=np.asarray(tra_err),
             val_err=np.asarray(val_err))
    pdb.set_trace()
Ejemplo n.º 4
0
def main(online_boost):
    print("------------------------")
    print("Running Online Boosting = {:s}".format(str(online_boost)))
    print("------------------------")
    from textmenu import textmenu
    # ------------- Dataset -------------
    datasets = get_dataset.all_names()
    indx = textmenu(datasets)
    if indx is None:
        return
    dataset = datasets[indx]
    x_tra, y_tra, x_val, y_val = get_dataset.get_dataset(dataset)
    model_name_suffix = dataset

    # default params:
    lr_gamma = 0.3
    max_epoch = 200
    batch_weak_learner_max_epoch = 40

    if dataset == 'arun_1d':
        n_nodes = [20, 10, 1]
        n_lvls = len(n_nodes)
        mean_types = [sigmoid_clf_mean for lvl in range(n_lvls - 1)]
        mean_types.append(lambda x: x)
        loss_types = [square_loss_eltws for lvl in range(n_lvls - 1)]
        #loss_types = [logistic_loss_eltws for lvl in range(n_lvls-1) ]
        loss_types.append(square_loss_eltws)
        opt_types = [tf.train.AdamOptimizer for lvl in range(n_lvls)]
        eval_type = None

        weak_learner_params = {
            'type': 'res',
            'res_inter_dim': 1,
            'res_add_linear': False
        }
        weak_classification = False

        lr_boost_adam = 0.3 * 1e-2
        lr_leaf_adam = 0.3 * 1e-1
        lr_decay_step = x_tra.shape[0] * 10
        ps_ws_val = 1.0
        reg_lambda = 0.0

    elif dataset == 'mnist':
        n_nodes = [10, 1]
        batch_weak_learner_max_epoch = 24
        n_lvls = len(n_nodes)
        mean_types = [tf.nn.relu for lvl in range(n_lvls - 1)]
        mean_types.append(lambda x: x)
        loss_types = [square_loss_eltws for lvl in range(n_lvls - 1)]
        #loss_types = [logistic_loss_eltws for lvl in range(n_lvls - 1)]
        loss_types.append(tf.nn.softmax_cross_entropy_with_logits)

        opt_types = [tf.train.AdamOptimizer for lvl in range(n_lvls)]
        eval_type = multi_clf_err

        weak_learner_params = {
            'type': 'conv',
            'filter_size': [5, 5, 1, 5],
            'stride': [2, 2]
        }
        weak_classification = True

        # mnist lr
        lr_boost_adam = 1e-8
        lr_leaf_adam = 5e-4
        lr_decay_step = x_tra.shape[0] * 100
        ps_ws_val = 1.0
        reg_lambda = 0.0

    elif dataset == 'grasp_hog':
        n_nodes = [32, 1]
        n_lvls = len(n_nodes)
        mean_types = [sigmoid_clf_mean for lvl in range(n_lvls - 1)]
        mean_types.append(lambda x: x)
        loss_types = [logistic_loss_eltws for lvl in range(n_lvls - 1)]
        loss_types.append(logistic_loss_eltws_masked)

        opt_types = [tf.train.AdamOptimizer for lvl in range(n_lvls)]
        eval_type = multi_clf_err_masked
        weak_learner_params = {'type': 'res', 'res_inter_dim': x_tra.shape[1]}
        weak_classification = True

        lr_boost_adam = 1e-3
        lr_leaf_adam = 1e-2
        lr_decay_step = x_tra.shape[0] * 3
        ps_ws_val = 1.0
        reg_lambda = 0.0

    elif dataset == 'cifar':
        n_nodes = [50, 1]
        n_lvls = len(n_nodes)
        mean_types = [tf.sin for lvl in range(n_lvls - 1)]
        mean_types.append(lambda x: x)
        loss_types = [square_loss_eltws for lvl in range(n_lvls - 1)]
        loss_types.append(tf.nn.softmax_cross_entropy_with_logits)

        #opt_types =  [ tf.train.GradientDescentOptimizer for lvl in range(n_lvls) ]
        opt_types = [tf.train.AdamOptimizer for lvl in range(n_lvls)]
        eval_type = multi_clf_err

        # Use all train and test:
        x_tra = x_all
        y_tra = y_all
        yp_tra = yp_all
        x_val = x_test
        y_val = y_test
        yp_val = yp_test

        train_set = list(range(x_tra.shape[0]))

        weak_learner_params = {'type': 'linear'}
        weak_classification = True

        # cifar lr
        lr_boost_adam = 1e-3
        lr_leaf_adam = 1e-2
        lr_decay_step = x_tra.shape[0] * 3
        ps_ws_val = 0.5
        reg_lambda = 0

    elif dataset == 'a9a':
        n_nodes = [4, 1]
        batch_weak_learner_max_epoch = 5
        max_epoch = 50
        n_lvls = len(n_nodes)
        mean_types = [tf.nn.sigmoid for lvl in range(n_lvls - 1)]
        mean_types.append(lambda x: x)
        loss_types = [square_loss_eltws for lvl in range(n_lvls - 1)]
        loss_types.append(square_loss_eltws)

        opt_types = [tf.train.AdamOptimizer for lvl in range(n_lvls)]
        eval_type = logit_binary_clf_err

        weak_learner_params = {'type': 'res', 'res_inter_dim': 1}
        weak_classification = False

        # mnist lr
        lr_boost_adam = 1e-8
        lr_leaf_adam = 1e-2  #1e-1 for online. 1e-2 for batch
        lr_decay_step = x_tra.shape[0] * 5
        ps_ws_val = 1.0
        reg_lambda = 0.0

    elif dataset == 'slice':
        n_nodes = [7, 1]
        batch_weak_learner_max_epoch = 25
        max_epoch = 100
        n_lvls = len(n_nodes)
        mean_types = [
            lambda x: tf.maximum(0.3 * x, x) for lvl in range(n_lvls - 1)
        ]
        mean_types.append(lambda x: x)
        loss_types = [square_loss_eltws for lvl in range(n_lvls - 1)]
        #loss_types = [logistic_loss_eltws for lvl in range(n_lvls-1) ]
        loss_types.append(square_loss_eltws)
        opt_types = [tf.train.AdamOptimizer for lvl in range(n_lvls)]
        eval_type = None

        weak_learner_params = {'type': 'res', 'res_inter_dim': 10}
        weak_classification = False

        lr_boost_adam = 1e-3
        lr_leaf_adam = 1e-2
        lr_decay_step = x_tra.shape[0] * 4
        ps_ws_val = 1.0
        reg_lambda = 0.0
        lr_gamma = 0.5

    elif dataset == 'year':
        n_nodes = [10, 1]
        batch_weak_learner_max_epoch = 10
        n_lvls = len(n_nodes)
        mean_types = [tf.nn.relu for lvl in range(n_lvls - 1)]
        mean_types.append(lambda x: x)
        loss_types = [square_loss_eltws for lvl in range(n_lvls - 1)]
        loss_types.append(square_loss_eltws)
        opt_types = [tf.train.AdamOptimizer for lvl in range(n_lvls)]
        eval_type = None

        weak_learner_params = {'type': 'res', 'res_inter_dim': 10}
        weak_classification = False

        lr_boost_adam = 1e-3
        lr_leaf_adam = 1e-3
        lr_decay_step = x_tra.shape[0] * 200
        ps_ws_val = 1.0
        reg_lambda = 0.0
        lr_gamma = 0.5

    elif dataset == 'abalone':
        n_nodes = [3, 1]
        batch_weak_learner_max_epoch = 25
        max_epoch = 100
        n_lvls = len(n_nodes)
        mean_types = [tf.nn.sigmoid for lvl in range(n_lvls - 1)]
        mean_types.append(lambda x: x)
        loss_types = [square_loss_eltws for lvl in range(n_lvls - 1)]
        loss_types.append(square_loss_eltws)
        opt_types = [tf.train.AdamOptimizer for lvl in range(n_lvls)]
        eval_type = None

        weak_learner_params = {
            'type': 'res',
            'res_inter_dim': 1,
            'res_add_linear': False
        }
        weak_classification = False

        lr_boost_adam = 1e-3
        lr_leaf_adam = 1e-2
        lr_decay_step = x_tra.shape[0] * 1000
        ps_ws_val = 1.0
        reg_lambda = 0.0
        lr_gamma = 0.5

    else:
        raise Exception('Did not recognize datset: {}'.format(dataset))

    # modify the default tensorflow graph.
    train_set = list(range(x_tra.shape[0]))

    input_dim = len(x_val[0].ravel())
    output_dim = len(y_val[0].ravel())

    dims = [output_dim for _ in xrange(n_lvls + 2)]
    dims[0] = input_dim

    lr_boost = lr_boost_adam
    lr_leaf = lr_leaf_adam
    lr_global_step = 0

    dbg = TFDeepBoostGraph(dims, n_nodes, weak_classification, mean_types,
                           loss_types, opt_types, weak_learner_params,
                           eval_type)

    init = tf.initialize_all_variables()
    #sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.20)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
    print 'Initializing...'
    sess.run(init)
    print 'Initialization done'

    t = 0
    # As we can waste an epoch with the line search, max_epoch will be incremented when a line search
    # is done. However, to prevent infintie epochs, we set an ultimatum on the number of epochs
    # (max_epoch_ult) that stops this.
    epoch = -1
    max_epoch_ult = max_epoch * 2
    batch_size = 64
    val_interval = batch_size * 10

    # if line search, these will shrink learning rate until result improves.
    do_line_search = False
    min_non_ls_epochs = 4  # min. number of epochs in the beginning where we don't do line search
    # linesearch variables
    worsen_cnt = 0
    best_avg_loss = np.Inf
    restore_threshold = len(train_set) / val_interval

    # Model saving paths.
    model_dir = '../model/'
    if not os.path.isdir(model_dir):
        os.mkdir(model_dir)
    best_model_fname = 'best_model_{}.ckpt'.format(model_name_suffix)
    init_model_fname = 'initial_model_{}.ckpt'.format(model_name_suffix)
    best_model_path = os.path.join(model_dir, best_model_fname)
    init_model_path = os.path.join(model_dir, init_model_fname)
    dbg.saver.save(sess, init_model_path)

    tf.train.SummaryWriter(logdir='../log/', graph=tf.get_default_graph())

    stop_program = False

    # Total number of samples
    global_step = 0
    num_preds = 0
    tra_err = []
    val_err = []

    if online_boost:
        while not stop_program and epoch < max_epoch and epoch < max_epoch_ult:
            epoch += 1
            print("-----Epoch {:d}-----".format(epoch))
            np.random.shuffle(train_set)
            for si in range(0, len(train_set), batch_size):
                # print 'train epoch={}, start={}'.format(epoch, si)
                si_end = min(si + batch_size, len(train_set))
                x = x_tra[train_set[si:si_end]]
                y = y_tra[train_set[si:si_end]]

                if dbg.sigint_capture == True:
                    # don't do any work this iteration, restart all computation with the next
                    break
                n_applies = len(dbg.training_update())
                sess.run(dbg.training(),
                         feed_dict=dbg.fill_feed_dict(x, y, lr_boost, lr_leaf,
                                                      ps_ws_val, reg_lambda))

                # Evaluate
                t += si_end - si
                if si_end - si < batch_size:
                    t = 0
                lr_global_step += si_end - si
                global_step += si_end - si
                num_preds += (si_end - si) * n_nodes[0]
                if lr_global_step > lr_decay_step:
                    lr_global_step -= lr_decay_step
                    lr_boost *= lr_gamma
                    lr_leaf *= lr_gamma
                    print("----------------------")
                    print('Decayed step size: lr_boost={:.3g}, lr_leaf={:.3g}'.
                          format(lr_boost, lr_leaf))
                    print("----------------------")
                if t % val_interval == 0:
                    preds_tra, avg_loss_tra, avg_tgt_loss_tra =\
                        sess.run([dbg.inference(), dbg.evaluation(), dbg.evaluation(loss=True)],
                                 feed_dict=dbg.fill_feed_dict(x_tra[:5000], y_tra[:5000],
                                                              lr_boost, lr_leaf, ps_ws_val, reg_lambda))
                    assert (not np.isnan(avg_loss_tra))
                    preds, avg_loss, avg_tgt_loss = sess.run(
                        [
                            dbg.inference(),
                            dbg.evaluation(),
                            dbg.evaluation(loss=True)
                        ],
                        feed_dict=dbg.fill_feed_dict(x_val, y_val, lr_boost,
                                                     lr_leaf, ps_ws_val,
                                                     reg_lambda))
                    assert (not np.isnan(avg_loss))

                    tra_err.append((global_step, avg_loss_tra,
                                    avg_tgt_loss_tra, num_preds))
                    val_err.append(
                        (global_step, avg_loss, avg_tgt_loss, num_preds))

                    # Plotting the fit.
                    #if dataset == 'arun_1d':
                    #    weak_predictions = sess.run(dbg.weak_learner_inference(),
                    #                                feed_dict=dbg.fill_feed_dict(x_val, y_val,
                    #                                                             lr_boost, lr_leaf, ps_ws_val, reg_lambda))
                    #    tgts = sess.run(dbg.ll_nodes[-1][0].children_tgts,
                    #                    feed_dict=dbg.fill_feed_dict(x_val, y_val,
                    #                                                 lr_boost, lr_leaf, ps_ws_val, reg_lambda))
                    #    plt.figure(1)
                    #    plt.clf()
                    #    plt.plot(x_val, y_val, lw=3, color='green', label='GT')
                    #    for wi, wpreds in enumerate(weak_predictions):
                    #        plt.plot(x_val, -wpreds, label='w' + str(wi))
                    #    # for wi, tgt in enumerate(tgts):
                    #    #  plt.plot(x_val, -tgt, label='t'+str(wi))
                    #    plt.plot(x_val, preds, lw=3, color='blue', label='Yhat')
                    #    plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
                    #    plt.title('deepboost')
                    #    plt.tight_layout()
                    #    plt.draw()
                    #    plt.show(block=False)
                    print 'epoch={},t={} \n avg_loss={} avg_tgt_loss={} \n loss_tra={} tgt_loss_tra={}'.format(
                        epoch, t, avg_loss, avg_tgt_loss, avg_loss_tra,
                        avg_tgt_loss_tra)

                    #if epoch < min_non_ls_epochs:
                    #    continue

                    #if do_line_search:
                    #    # restores if is worse than the best multiple times
                    #    if avg_loss > best_avg_loss:
                    #        worsen_cnt += 1
                    #        if worsen_cnt > restore_threshold:
                    #            print 'Restore to previous best loss: {}'.format(best_avg_loss)
                    #            dbg.saver.restore(sess, best_model_path)
                    #            worsen_cnt = 0
                    #            max_epoch += 1
                    #            lr_boost *= gamma_boost
                    #            lr_leaf *= gamma_leaf
                    #    else:
                    #        worsen_cnt = 0
                    #        lr_boost = lr_boost_adam
                    #        lr_leaf = lr_leaf_adam
                    #        dbg.saver.save(sess, best_model_path)
                    #        best_avg_loss = avg_loss
            # endfor
            # end of epoch, so save out the results so far
            np.savez('../log/err_vs_gstep_{:s}_{:d}.npz'.format(
                model_name_suffix, n_nodes[0]),
                     tra_err=np.asarray(tra_err),
                     val_err=np.asarray(val_err))
            if dbg.sigint_capture == True:
                print("----------------------")
                print(
                    "Paused. Set parameters before loading the initial model again..."
                )
                print("----------------------")
                # helper functions
                save_model = lambda fname: dbg.saver.save(sess, fname)
                save_best = partial(save_model, best_model_path)
                save_init = partial(save_model, init_model_path)
                pdb.set_trace()
                dbg.saver.restore(sess, init_model_path)
                epoch = -1
                t = 0
                dbg.sigint_capture = False
        # endwhile
        np.savez('../log/err_vs_gstep_{:s}_{:d}.npz'.format(
            model_name_suffix, n_nodes[0]),
                 tra_err=np.asarray(tra_err),
                 val_err=np.asarray(val_err))

    #### Batch boost####
    else:

        for learneri in range(1, n_nodes[0] + 1):
            max_epoch = batch_weak_learner_max_epoch  #12
            epoch = -1
            t = 0
            print("---------------------")
            print(" Weak learner: {:d}".format(learneri))
            # for a new weak learner, reset the learning rates
            lr_global_step = 0
            lr_boost = lr_boost_adam
            lr_leaf = lr_leaf_adam
            while not stop_program and epoch < max_epoch:
                epoch += 1
                print("-----Epoch {:d}-----".format(epoch))
                np.random.shuffle(train_set)
                for si in range(0, len(train_set), batch_size):
                    # print 'train epoch={}, start={}'.format(epoch, si)
                    si_end = min(si + batch_size, len(train_set))
                    x = x_tra[train_set[si:si_end]]
                    y = y_tra[train_set[si:si_end]]

                    if dbg.sigint_capture == True:
                        # don't do any work this iteration, restart all computation with the next
                        break
                    n_applies = len(dbg.training_update())

                    if learneri == 0:  # bias
                        # ll_train_ops is a list of list of 3-tuples of (grads, apply_ops, child_tgts)
                        # Each element of the 3-tuple is a list.
                        #
                        # Get the last node (boostnode a.k.a. root), and access its first gradient and first
                        # apply ops, which are for the global bias.
                        # NVM
                        # NVM ... when convert_y == weak_classification == False, ps_w and ps_b are not learned so this is
                        # empty.
                        train_op = [
                            dbg.ll_train_ops[-1][0][0][0],
                            dbg.ll_train_ops[-1][0][1][0]
                        ]
                    else:
                        # For each learneri = 1... ,n_nodes[0]+1,
                        # we access the associated leaf node to get its gradients ans apply_ops
                        train_op = dbg.ll_train_ops[0][
                            learneri - 1][0] + dbg.ll_train_ops[0][learneri -
                                                                   1][1]
                    sess.run(train_op,
                             feed_dict=dbg.fill_feed_dict(
                                 x, y, lr_boost, lr_leaf, ps_ws_val,
                                 reg_lambda))

                    # Evaluate
                    t += si_end - si
                    if si_end - si < batch_size:
                        t = 0
                    lr_global_step += si_end - si
                    global_step += si_end - si
                    num_preds += learneri * (si_end - si)
                    if lr_global_step > lr_decay_step:
                        lr_global_step -= lr_decay_step
                        lr_boost *= lr_gamma
                        lr_leaf *= lr_gamma
                        print("----------------------")
                        print(
                            'Decayed step size: lr_boost={:.3g}, lr_leaf={:.3g}'
                            .format(lr_boost, lr_leaf))
                        print("----------------------")
                    if t % val_interval == 0:
                        prediction_tensor = dbg.ll_nodes[-1][0].psums[learneri]
                        tgt_loss_tensor = dbg.ll_nodes[-1][0].losses[learneri]
                        preds_tra, avg_loss_tra, avg_tgt_loss_tra =\
                            sess.run([prediction_tensor, dbg.evaluation(False, prediction_tensor), tgt_loss_tensor],
                                     feed_dict=dbg.fill_feed_dict(x_tra[:5000], y_tra[:5000],
                                                                  lr_boost, lr_leaf, ps_ws_val, reg_lambda))
                        preds, avg_loss, avg_tgt_loss = \
                            sess.run([prediction_tensor, dbg.evaluation(False, prediction_tensor), tgt_loss_tensor],
                                     feed_dict=dbg.fill_feed_dict(x_val, y_val,
                                                                  lr_boost, lr_leaf, ps_ws_val, reg_lambda))

                        tra_err.append((global_step, avg_loss_tra,
                                        avg_tgt_loss_tra, num_preds))
                        val_err.append(
                            (global_step, avg_loss, avg_tgt_loss, num_preds))

                        assert (not np.isnan(avg_loss))
                        # Plotting the fit.
                        if dataset == 'arun_1d':
                            # weak_predictions = sess.run(dbg.weak_learner_inference(),
                            #  feed_dict=dbg.fill_feed_dict(x_val, y_val,
                            #                               lr_boost, lr_leaf, ps_ws_val, reg_lambda))
                            # tgts = sess.run(dbg.ll_nodes[-1][0].children_tgts[2:],
                            #  feed_dict=dbg.fill_feed_dict(x_val, y_val,
                            #                               lr_boost, lr_leaf, ps_ws_val, reg_lambda))
                            plt.figure(1)
                            plt.clf()
                            plt.plot(x_val,
                                     y_val,
                                     lw=3,
                                     color='green',
                                     label='Ground Truth')
                            # for wi, wpreds in enumerate(weak_predictions):
                            #  if wi==0:
                            #    # recall the first one learns y directly.
                            #    plt.plot(x_val, wpreds, label=str(wi))
                            #  else:
                            #    plt.plot(x_val, -wpreds, label=str(wi))
                            # for wi, tgt in enumerate(tgts):
                            #  plt.plot(x_val, tgt, label=str(wi))
                            # plt.legend(loc=4)
                            plt.plot(x_val,
                                     preds,
                                     lw=3,
                                     color='blue',
                                     label='Prediction')
                            plt.draw()
                            plt.show(block=False)
                        print 'learner={},epoch={},t={} \n avg_loss={} avg_tgt_loss={} \n loss_tra={} tgt_loss_tra={}'.format(
                            learneri, epoch, t, avg_loss, avg_tgt_loss,
                            avg_loss_tra, avg_tgt_loss_tra)

                # endfor
                save_fname = '../log/batch_err_vs_gstep_{:s}.npz'.format(
                    model_name_suffix)
                np.savez(save_fname,
                         tra_err=np.asarray(tra_err),
                         val_err=np.asarray(val_err),
                         learners=learneri)
                print('Saved error rates to {}'.format(save_fname))
                if dbg.sigint_capture == True:
                    print("----------------------")
                    print(
                        "Paused. Set parameters before loading the initial model again..."
                    )
                    print("----------------------")
                    # helper functions
                    save_model = lambda fname: dbg.saver.save(sess, fname)
                    save_best = partial(save_model, best_model_path)
                    save_init = partial(save_model, init_model_path)
                    pdb.set_trace()
                    dbg.saver.restore(sess, init_model_path)
                    epoch = -1
                    t = 0
                    dbg.sigint_capture = False
            # endfor
        # endfor
        np.savez(
            '../log/batch_err_vs_gstep_{:s}.npz'.format(model_name_suffix),
            tra_err=np.asarray(tra_err),
            val_err=np.asarray(val_err))
    # endif

    print("Program Finished")

    if online_boost:
        save_fname = '../log/err_vs_gstep_{:s}.npz'.format(model_name_suffix)
    else:
        save_fname = '../log/batch_err_vs_gstep_{:s}.npz'.format(
            model_name_suffix)
    np.savez(save_fname,
             tra_err=np.asarray(tra_err),
             val_err=np.asarray(val_err))
    print('Saved results to: {}'.format(save_fname))
    pdb.set_trace()
Ejemplo n.º 5
0
def main():
    #import sklearn.linear_model as lm

    datasets = get_dataset.all_names()
    indx = textmenu(datasets)
    if indx == None:
        return
    dataset = datasets[indx]
    x_tra, y_tra, x_val, y_val = get_dataset.get_dataset(dataset)

    d = np.load('/data/data/mnist.npz')
    X = d['X']
    Y = d['Y'].ravel()
    Xtest = d['Xtest']
    Ytest = d['Ytest'].ravel()
    print 'data loaded'

    filter_size = [5, 5, 1, 200]
    stride = 2

    # sample patches to determine median of patch distance.

    # sample filters (W, B) to create RBF

    # apply filters

    # sample patches to determine patch mean.

    # PCA patches.

    #PCA first
    n_pca_dim = 50
    X_m = np.mean(X, axis=0)  # mean
    X_zm = X - X_m  # X with zero mean
    X_cov = X_zm.T.dot(X_zm)  # X covariance
    eigval, eigvec = la.eig(X_cov)
    eigvec = eigvec[:, :n_pca_dim]  # choose the dominanting 50 dimensions
    Xp = X.dot(eigvec)  # projections of X,Xtest to these 50 dim.
    Xtestp = Xtest.dot(eigvec)

    # Compute kernel step size s (median of dist among points)
    n_trials = int(Xp.shape[0]**1.5)
    I = random.randint(0, Xp.shape[0], n_trials)
    deltI = random.randint(1, Xp.shape[0], n_trials)
    J = (I + deltI) % X.shape[0]
    dists = sorted(
        map(lambda i: la.norm(Xp[I[i], :] - Xp[J[i], :]), range(n_trials)))
    s = dists[n_trials / 2]

    # generate rbf params
    n_rbf = 4000
    W = random.randn(Xp.shape[1], n_rbf) / s / np.sqrt(2)
    B = random.uniform(0, 2 * np.pi, n_rbf)

    #Xf = np.cos(Xp.dot(W)+ B)
    #Xtestf = np.cos(Xtestp.dot(W)+B)

    np.savez('mnist_pca_rbf_param.npz', P=eigvec, W=W, B=B)
    np.savez('hw2_mnist.npz',
             X=X,
             Y=Y,
             Xtest=Xtest,
             Ytest=Ytest,
             P=eigvec,
             W=W,
             B=B)
    d2 = np.loadz('hw2_mnist.npz')
    scipy.io.savemat('hw2_mnist.mat', d2)