Ejemplo n.º 1
0
def bnn(args):

    # %% Model

    class Dummy():
        pass

    hvd.init()

    print("Rank is:", hvd.rank())

    tf.reset_default_graph()

    tf.set_random_seed(args.seed + hvd.rank())
    np.random.seed(args.seed + hvd.rank())

    iter_ds = minibatcher((args.X_train, args.Y_train),
                          batch_size=args.batch_size,
                          shuffle=True)
    num_batch = len(iter_ds)

    # Use num_iters if specified by user. OW, use epochs.
    # These arguments are the same for each rank.
    if args.num_iters > 0:
        max_iter = args.num_iters
    else:
        max_iter = num_batch * args.training_epochs

    print("max iteration is", max_iter)

    N = args.X_train.shape[0]
    dim = list(args.X_train.shape[1:])
    K = args.Y_train.shape[1]  # num of class

    X = tf.placeholder(tf.float32, [None] + dim)
    y = tf.placeholder(tf.float32, [None, K])

    if args.data.lower() in ("cifar", "cifar10"):
        neural_net = nnet.convnet(activation=args.activation,
                                  numclass=K,
                                  inshape=args.inshape,
                                  isBay=True,
                                  priorstd=args.priorstd,
                                  poststd=args.poststd)
    else:
        neural_net = nnet.fullnet(activation=args.activation,
                                  numclass=K,
                                  layer_sizes=args.layer_sizes,
                                  isBay=True,
                                  priorstd=args.priorstd,
                                  poststd=args.poststd)

    logits = neural_net(X)

    labels_distribution = tfd.Categorical(logits=logits)

    # %% Loss

    neg_log_likelihood = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=logits))
    kl = sum(neural_net.losses) / N
    elbo_loss = neg_log_likelihood + args.KLscale * kl

    # %% Metrics

    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # %% Posterior

    # names = []
    # qmeans = []
    # qstds = []
    # Wsample = []

    # for i, layer in enumerate(neural_net.layers):
    #     if hasattr(layer, "kernel_posterior"):
    #         q = layer.kernel_posterior
    #         names.append("Layer {}".format(i))
    #         qmeans.append(q.mean())
    #         qstds.append(q.stddev())
    #         Wsample.append(q.sample(args.num_monte_carlo))

    # Horovod training

    opt = tf.train.AdamOptimizer(args.learning_rate * hvd.size())
    opt = hvd.DistributedOptimizer(opt)
    global_step = tf.train.get_or_create_global_step()
    train_opt = opt.minimize(elbo_loss, global_step=global_step)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.visible_device_list = str(hvd.local_rank())

    # checkpoint_dir = './train_logs' if hvd.rank() == 0 else None

    hooks = [
        hvd.BroadcastGlobalVariablesHook(0)
        # tf.train.StopAtStepHook(last_step=max_iter // hvd.size())
    ]

    res_return = Dummy()
    res_return.plot = Dummy()
    res_return.plot.niter = []
    res_return.plot.runtime = []
    res_return.plot.loss = []
    res_return.plot.devAcc = []
    # res_return.plot.devAccMean = []
    # res_return.plot.devAccUp = []
    # res_return.plot.devAccDown = []

    # print("total iteration is", max_iter // hvd.size())

    with tf.train.MonitoredTrainingSession(hooks=hooks,
                                           config=config) as mon_sess:

        start_time, algstart = time.time(), time.time()
        if hvd.rank() == 0:
            print("=" * 21 + "Optimization Start" + "=" * 21)

        niter = 0

        while niter <= max_iter:

            batch_x, batch_y = next(iter_ds)  # mini-batch
            _, loss_val, acc_val, niter = mon_sess.run(
                [train_opt, elbo_loss, accuracy, global_step],
                feed_dict={
                    X: batch_x,
                    y: batch_y
                })

            # print(niter)

            # default sample 50 times for each rank
            if niter % args.viz_steps == 0:
                end_time = time.time()
                # eval on dev set
                acc_val_dev = np.asarray([
                    mon_sess.run(accuracy,
                                 feed_dict={
                                     X: args.X_test,
                                     y: args.Y_test
                                 }) for xyz in range(args.num_monte_carlo)
                ])

                # save
                timediff = end_time - start_time
                AccMean = np.mean(acc_val_dev)
                AccStd = np.std(acc_val_dev)
                timediff = end_time - start_time
                res_return.plot.niter.append(niter)
                res_return.plot.runtime.append(timediff)
                res_return.plot.loss.append(loss_val)
                res_return.plot.devAcc.append(acc_val_dev)
                # res_return.plot.devAccMean.append(AccMean)
                # res_return.plot.devAccUp.append(AccMean + AccStd)
                # res_return.plot.devAccDown.append(AccMean - AccStd)

                if hvd.rank() == 0:
                    print("Step: {:>3d} RunTime: {:.3f} Loss: {:.3f}"
                          "ACC: {:.3f} AccDevM: {:.3f} AccDevU: {:.3f}".format(
                              niter, timediff, loss_val, acc_val, AccMean,
                              AccMean + AccStd))
                start_time = time.time()

        eval_start = time.time()
        if hvd.rank() == 0:
            print("=" * 21 + "Optimization Finish" + "=" * 21)

        tmp = [
            mon_sess.run([accuracy, labels_distribution.probs],
                         feed_dict={
                             X: args.X_test,
                             y: args.Y_test
                         }) for xyz in range(args.num_monte_carlo_test)
        ]
        [acc_val_test, probs] = list(zip(*tmp))
        acc_val_test = np.asarray(acc_val_test)

        eval_end = time.time()
        tot_time = eval_end - algstart
        eval_time = eval_end - eval_start

        if hvd.rank() == 0:
            print("Step: {:>3d} RunTime: {:.3f} TestAcc:{:.3f}".format(
                niter, end_time - algstart, np.mean(acc_val_test)))

        res_return.tot_time = tot_time
        res_return.eval_time = eval_time

    # Return result

    res_return.probs = np.asarray(probs)
    res_return.acc = np.asarray(acc_val_test)
    # res_return.posterior = Dummy()
    # res_return.posterior.mean = qm_vals
    # res_return.posterior.std = qs_vals
    # res_return.posterior.samples = W_postsam
    # res_return.names = names

    return res_return
Ejemplo n.º 2
0
def snn(args):

    hvd.init()
    tf.reset_default_graph()

    tf.set_random_seed(args.seed + hvd.rank())  # start from 0
    np.random.seed(args.seed + hvd.rank())  # start from 0

    # N = args.X_train.shape[0]
    dim = list(args.X_train.shape[1:])
    K = args.Y_train.shape[1]  # num of class

    # Dataset
    iter_ds = minibatcher((args.X_train, args.Y_train),
                          batch_size=args.batch_size,
                          shuffle=True)
    num_batch = len(iter_ds)

    # Use num_iters if specified by user. OW, use epochs.
    # These arguments are the same for each rank.
    if args.num_iters > 0:
        max_iter = args.num_iters
    else:
        max_iter = num_batch * args.training_epochs

    X = tf.placeholder(tf.float32, [None] + dim)
    y = tf.placeholder(tf.float32, [None, K])

    if args.data.lower() in ("cifar", "cifar10"):
        neural_net = nnet.convnet(activation=args.activation,
                                  inshape=args.inshape,
                                  numclass=K,
                                  isBay=False,
                                  regularizer=args.regularizer)
    else:
        neural_net = nnet.fullnet(activation=args.activation,
                                  layer_sizes=args.layer_sizes,
                                  numclass=K,
                                  isBay=False,
                                  regularizer=args.regularizer)

    logits = neural_net(X)
    labels_distribution = tfd.Categorical(logits=logits)
    pred = tf.nn.softmax(logits, name="pred")

    lossreg = sum(neural_net.losses)  # / N
    losscost = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=logits))
    cost = lossreg + losscost

    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # begin training

    # Horovod training
    opt = tf.train.AdamOptimizer(args.learning_rate * hvd.size())
    opt = hvd.DistributedOptimizer(opt)
    global_step = tf.train.get_or_create_global_step()
    train_opt = opt.minimize(cost, global_step=global_step)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.visible_device_list = str(hvd.local_rank())

    # checkpoint_dir = './train_logs' if hvd.rank() == 0 else None

    hooks = [hvd.BroadcastGlobalVariablesHook(0)]

    #     tf.train.StopAtStepHook(last_step=max_iter // hvd.size())]
    #     tf.train.LoggingTensorHook(
    #         tensors={'step': global_step, 'loss': cost},
    #         every_n_iter=100)
    # ]
    # print out every 100 iterations

    class Dummy():
        pass

    res_return = Dummy()
    res_return.plot = Dummy()
    res_return.plot.niter = []
    res_return.plot.runtime = []
    res_return.plot.loss = []
    res_return.plot.devAcc = []

    with tf.train.MonitoredTrainingSession(hooks=hooks,
                                           config=config) as mon_sess:

        start_time, algstart = time.time(), time.time()
        if hvd.rank() == 0:
            print("=" * 21 + "Optimization Start" + "=" * 21)

        niter = 0
        while niter <= max_iter:

            batch_x, batch_y = next(iter_ds)  # mini-batch
            _, cost_val, acc_val, niter = mon_sess.run(
                [train_opt, cost, accuracy, global_step],
                feed_dict={
                    X: batch_x,
                    y: batch_y
                })

            if niter % args.viz_steps == 0:
                end_time = time.time()
                # eval on dev set
                acc_val_dev = mon_sess.run(accuracy,
                                           feed_dict={
                                               X: args.X_test,
                                               y: args.Y_test
                                           })

                # save
                timediff = end_time - start_time
                res_return.plot.niter.append(niter)
                res_return.plot.runtime.append(timediff)
                res_return.plot.loss.append(cost_val)
                res_return.plot.devAcc.append(acc_val_dev)

                if hvd.rank() == 0:
                    print("Step: {:>3d} RunTime: {:.3f} "
                          "Loss: {:.3f} Acc: {:.3f} DevAcc: {:.3f}".format(
                              niter, timediff, cost_val, acc_val, acc_val_dev))
                start_time = time.time()

        eval_start = time.time()
        if hvd.rank() == 0:
            print("=" * 21 + "Optimization Finish" + "=" * 21)

        acc_val_test, probs = mon_sess.run(
            [accuracy, labels_distribution.probs],
            feed_dict={
                X: args.X_test,
                y: args.Y_test
            })
        eval_end = time.time()
        tot_time = eval_end - algstart
        eval_time = eval_end - eval_start
        if hvd.rank() == 0:
            print("Step: {:>3d} RunTime: {:.3f} TestAcc:{:.3f}".format(
                niter, tot_time, acc_val_test))
        res_return.tot_time = tot_time
        res_return.eval_time = eval_time


# extract weights & bias

    res_return.probs = np.asarray(probs)
    res_return.acc = np.asarray(acc_val_test)

    return res_return
Ejemplo n.º 3
0
def bnn(args):

    # %% Model

    class Dummy():
        pass

    tf.reset_default_graph()

    tf.set_random_seed(args.seed)
    np.random.seed(args.seed)

    tfd = tf.contrib.distributions

    N = args.X_train.shape[0]
    dim = list(args.X_train.shape[1:])
    K = args.Y_train.shape[1]  # num of class

    X = tf.placeholder(tf.float32, [None] + dim)
    y = tf.placeholder(tf.float32, [None, K])

    neural_net = nnet.convnet(
        numclass=K, inshape=args.inshape, isBay=True,
        priorstd=args.priorstd, poststd=args.poststd
    )
    logits = neural_net(X)

    labels_distribution = tfd.Categorical(logits=logits)

    # %% Loss

    neg_log_likelihood = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=logits)
    )
    kl = sum(neural_net.losses) / N
    elbo_loss = neg_log_likelihood + args.KLscale * kl

    # %% Metrics

    correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # %% Posterior

    names = []
    qmeans = []
    qstds = []
    Wsample = []

    for i, layer in enumerate(neural_net.layers):
        if hasattr(layer, "kernel_posterior"):
            q = layer.kernel_posterior
            names.append("Layer {}".format(i))
            qmeans.append(q.mean())
            qstds.append(q.stddev())
            Wsample.append(q.sample(args.num_monte_carlo))

    # %% Train

    optimizer = tf.train.AdamOptimizer(
        args.learning_rate).minimize(elbo_loss)

    init_global = tf.global_variables_initializer()
    init_local = tf.local_variables_initializer()

    res_return = Dummy()
    res_return.plot = Dummy()
    res_return.plot.niter = []
    res_return.plot.runtime = []
    res_return.plot.loss = []
    res_return.plot.devAccMean = []
    res_return.plot.devAccUp = []
    res_return.plot.devAccDown = []

    with tf.Session() as sess:
        print("=" * 21 + "Optimization Start" + "=" * 21)
        start_time, algstart = time.time(), time.time()
        sess.run([init_global, init_local])
        niter = 0

        for epoch in range(args.training_epochs):

            perm = np.random.permutation(N)
            for i in range(0, N, args.batch_size):
                batch_x = args.X_train[perm[i:i + args.batch_size]]
                batch_y = args.Y_train[perm[i:i + args.batch_size]]
                _, loss_val, acc_val = sess.run(
                    [optimizer, elbo_loss, accuracy],
                    feed_dict={X: batch_x, y: batch_y}
                )
                niter += 1

                if niter % 100 == 0:
                    end_time = time.time()
                    # eval on dev set
                    acc_val_dev = np.asarray([
                        sess.run(accuracy,
                            feed_dict={X: args.X_dev, y: args.Y_dev}) for _ in range(args.num_monte_carlo)
                    ])

                    # save
                    timediff = end_time - start_time
                    AccMean = np.mean(acc_val_dev)
                    AccStd = np.std(acc_val_dev)
                    timediff = end_time - start_time
                    res_return.plot.niter.append(niter)
                    res_return.plot.runtime.append(timediff)
                    res_return.plot.loss.append(loss_val)
                    res_return.plot.devAccMean.append(AccMean)
                    res_return.plot.devAccUp.append(AccMean + AccStd)
                    res_return.plot.devAccDown.append(AccMean - AccStd)

                    print(
                        "Step: {:>3d} RunTime: {:.3f} Loss: {:.3f} "
                        "AccDevM: {:.3f} AccDevU: {:.3f}".format(
                            niter, timediff,
                            loss_val, AccMean, AccMean + AccStd
                        )
                    )
                    start_time = time.time()

        end_time = time.time()
        print("=" * 21 + "Optimization Finish" + "=" * 21)

        tmp = [sess.run(
            [accuracy, labels_distribution.probs],
            feed_dict={X: args.X_test, y: args.Y_test}
        )for _ in range(args.num_monte_carlo)]
        [acc_val_test, probs] = list(zip(* tmp))
        acc_val_test = np.asarray(acc_val_test)

        print("Step: {:>3d} RunTime: {:.3f} TestAcc:{:.3f}".format(
            niter, end_time - algstart, np.mean(acc_val_test)
        ))

        # evaluate the posterior distributions for kernel
        qm_vals, qs_vals, W_postsam = sess.run((qmeans, qstds, Wsample))

    # Return result

    res_return.probs = np.asarray(probs)
    res_return.acc = np.asarray(acc_val_test)
    res_return.posterior = Dummy()
    res_return.posterior.mean = qm_vals
    res_return.posterior.std = qs_vals
    res_return.posterior.samples = W_postsam
    res_return.names = names

    return res_return
Ejemplo n.º 4
0
def snn(args):

    tf.reset_default_graph()

    tf.set_random_seed(args.seed)
    np.random.seed(args.seed)

    N = args.X_train.shape[0]
    dim = list(args.X_train.shape[1:])
    K = args.Y_train.shape[1]  # num of class

    X = tf.placeholder(tf.float32, [None] + dim)
    y = tf.placeholder(tf.float32, [None, K])

    tfd = tf.contrib.distributions

    neural_net = nnet.convnet(activation=args.activation,
                              inshape=args.inshape, numclass=K, isBay=False)

    logits = neural_net(X)
    labels_distribution = tfd.Categorical(logits=logits)
    pred = tf.nn.softmax(logits, name="pred")

    cost = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=logits)
    )
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    optimizer = tf.train.AdamOptimizer(args.learning_rate).minimize(cost)

    # begin training

    init_global = tf.global_variables_initializer()
    init_local = tf.local_variables_initializer()

    class Dummy():
        pass

    res_return = Dummy()
    res_return.plot = Dummy()
    res_return.plot.niter = []
    res_return.plot.runtime = []
    res_return.plot.loss = []
    res_return.plot.devAcc = []

    with tf.Session() as sess:
        print("=" * 21 + "Optimization Start" + "=" * 21)
        start_time, algstart = time.time(), time.time()
        sess.run([init_global, init_local])
        niter = 0

        for epoch in range(args.training_epochs):

            # total_batch = int(N / args.batch_size)
            # Loop over all batches
            perm = np.random.permutation(N)

            for i in range(0, N, args.batch_size):
                batch_x = args.X_train[perm[i:i + args.batch_size]]
                batch_y = args.Y_train[perm[i:i + args.batch_size]]
                _, cost_val, acc_val = sess.run(
                    [optimizer, cost, accuracy],
                    feed_dict={X: batch_x, y: batch_y}
                )
                niter += 1

                if niter % 100 == 0:
                    end_time = time.time()
                    # eval on dev set
                    acc_val_dev = accuracy.eval(feed_dict={X: args.X_dev,
                                                           y: args.Y_dev})

                    # save
                    timediff = end_time - start_time
                    res_return.plot.niter.append(niter)
                    res_return.plot.runtime.append(timediff)
                    res_return.plot.loss.append(cost_val)
                    res_return.plot.devAcc.append(acc_val_dev)

                    print(
                        "Step: {:>3d} RunTime: {:.3f} "
                        "Loss: {:.3f} Acc: {:.3f} DevAcc: {:.3f}".format(
                            niter, timediff,
                            cost_val, acc_val, acc_val_dev
                        )
                    )
                    start_time = time.time()

        end_time = time.time()
        print("=" * 21 + "Optimization Finish" + "=" * 21)
        acc_val_test, probs = sess.run(
            [accuracy, labels_distribution.probs],
            feed_dict={X: args.X_test, y: args.Y_test}
        )
        print("Step: {:>3d} RunTime: {:.3f} TestAcc:{:.3f}".format(
            niter, end_time - algstart, acc_val_test
        ))

# extract weights & bias

    res_return.probs = np.asarray(probs)
    res_return.acc = np.asarray(acc_val_test)

    return res_return
Ejemplo n.º 5
0
def snn(args):

    hvd.init()
    tf.reset_default_graph()

    tf.set_random_seed(args.seed + hvd.rank())  # start from 0
    np.random.seed(args.seed + hvd.rank())  # start from 0

    N = args.X_train.shape[0]
    dim = list(args.X_train.shape[1:])
    K = args.Y_train.shape[1]  # num of class

    X = tf.placeholder(tf.float32, [None] + dim)
    y = tf.placeholder(tf.float32, [None, K])

    # tfd = tf.contrib.distributions

    neural_net = nnet.convnet(activation=args.activation,
                              regularizer=args.regularizer,
                              inshape=args.inshape,
                              numclass=K,
                              isBay=False,
                              repeatConv=args.repeatConv)

    logits = neural_net(X)
    labels_distribution = tfd.Categorical(logits=logits)
    pred = tf.nn.softmax(logits, name="pred")

    lossreg = sum(neural_net.losses)
    pre_cost = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=logits))
    cost = pre_cost + lossreg

    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # begin training

    # Horovod training
    opt = tf.train.AdamOptimizer(args.learning_rate * hvd.size())
    opt = hvd.DistributedOptimizer(opt)
    global_step = tf.train.get_or_create_global_step()
    train_opt = opt.minimize(cost, global_step=global_step)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.visible_device_list = str(hvd.local_rank())

    hooks = [hvd.BroadcastGlobalVariablesHook(0)]

    # distribute value through rank 0

    class Dummy():
        pass

    res_return = Dummy()
    res_return.plot = Dummy()
    res_return.plot.niter = []
    res_return.plot.runtime = []
    res_return.plot.loss = []
    res_return.plot.devAcc = []

    with tf.train.MonitoredTrainingSession(hooks=hooks,
                                           config=config) as mon_sess:

        if hvd.rank() == 0:
            print("=" * 21 + "Optimization Start" + "=" * 21)

        start_time, algstart = time.time(), time.time()
        niter = 0

        for epoch in range(args.training_epochs // hvd.size()):

            perm = np.random.permutation(N)
            for i in range(0, N, args.batch_size):
                batch_x = args.X_train[perm[i:i + args.batch_size]]
                batch_y = args.Y_train[perm[i:i + args.batch_size]]
                _, cost_val, acc_val = mon_sess.run(
                    [train_opt, cost, accuracy],
                    feed_dict={
                        X: batch_x,
                        y: batch_y
                    })
                niter += 1

                if (niter * hvd.size()) % args.viz_steps == 0:
                    end_time = time.time()
                    # eval on dev set
                    acc_val_dev = mon_sess.run(accuracy,
                                               feed_dict={
                                                   X: args.X_test,
                                                   y: args.Y_test
                                               })

                    # save
                    timediff = end_time - start_time
                    res_return.plot.niter.append(niter)
                    res_return.plot.runtime.append(timediff)
                    res_return.plot.loss.append(cost_val)
                    res_return.plot.devAcc.append(acc_val_dev)

                    if hvd.rank() == 0:
                        print("Step: {:>3d} RunTime: {:.3f} "
                              "Loss: {:.3f} Acc: {:.3f} DevAcc: {:.3f}".format(
                                  niter, timediff, cost_val, acc_val,
                                  acc_val_dev))
                    start_time = time.time()

        eval_start = time.time()
        if hvd.rank() == 0:
            print("=" * 21 + "Optimization Finish" + "=" * 21)

        acc_val_test, probs = mon_sess.run(
            [accuracy, labels_distribution.probs],
            feed_dict={
                X: args.X_test,
                y: args.Y_test
            })
        eval_end = time.time()
        tot_time = eval_end - algstart
        eval_time = eval_end - eval_start
        if hvd.rank() == 0:
            print("Step: {:>3d} RunTime: {:.3f} TestAcc:{:.3f}".format(
                niter, tot_time, acc_val_test))
        res_return.tot_time = tot_time
        res_return.eval_time = eval_time


# extract weights & bias

    res_return.probs = np.asarray(probs)
    res_return.acc = np.asarray(acc_val_test)

    return res_return