def main(model, guide, args):
    # init
    if args.seed is not None: pyro.set_rng_seed(args.seed)
    logger = get_logger(args.log, __name__)
    logger.info(args)

    # generate data
    args.num_docs = 1000
    args.batch_size = 32
    true_topic_weights, true_topic_words, data = generate_model(args=args)

    # setup svi
    pyro.clear_param_store()
    optim = Adam({'lr': args.learning_rate})
    elbo = TraceEnum_ELBO(max_plate_nesting=2)
    svi = SVI(model.main, guide.main, optim, elbo)

    # train
    times = [time.time()]
    logger.info('\nstep\t' + 'epoch\t' + 'elbo\t' + 'time(sec)')

    for i in range(1, args.num_steps + 1):
        loss = svi.step(data, args=args, batch_size=args.batch_size)

        if (args.eval_frequency > 0
                and i % args.eval_frequency == 0) or (i == 1):
            times.append(time.time())
            logger.info(f'{i:06d}\t'
                        f'{(i * args.batch_size) / args.num_docs:.3f}\t'
                        f'{-loss:.4f}\t'
                        f'{times[-1]-times[-2]:.3f}')
def main(model, guide, args):
    # init
    if args.seed is not None: pyro.set_rng_seed(args.seed)
    logger = get_logger(args.log, __name__)
    logger.info(args)

    # setup svi
    opt = pyro.optim.Adam({'lr': args.learning_rate})
    csis = pyro.infer.CSIS(model.main,
                           guide.main,
                           opt,
                           num_inference_samples=args.num_infer_samples)

    # train
    times = [time.time()]
    logger.info("\nstep\t" + "E_p(x,y)[log q(x,y)]\t" + "time(sec)")

    for i in range(1, args.num_steps + 1):
        loss = csis.step()

        if (args.eval_frequency > 0
                and i % args.eval_frequency == 0) or (i == 1):
            times.append(time.time())
            logger.info(f"{i:06d}\t"
                        f"{-loss:.4f}  \t"
                        f"{times[-1]-times[-2]:.3f}")
Beispiel #3
0
def main(model, guide, args):
    # init
    if args.seed is not None: pyro.set_rng_seed(args.seed)
    logger = get_logger(args.log, __name__)
    logger.info(args)

    # load data
    X, true_counts = load_data()
    X_size = X.size(0)

    # setup svi
    def per_param_optim_args(module_name, param_name):
        def isBaselineParam(module_name, param_name):
            return 'bl_' in module_name or 'bl_' in param_name
        lr = args.baseline_learning_rate if isBaselineParam(module_name, param_name)\
             else args.learning_rate
        return {'lr': lr}

    opt = optim.Adam(per_param_optim_args)
    svi = SVI(model.main, guide.main, opt, loss=TraceGraph_ELBO())

    # train
    times = [time.time()]
    logger.info(f"\nstep\t" + "epoch\t" + "elbo\t" + "time(sec)")

    for i in range(1, args.num_steps + 1):
        loss = svi.step(X)

        if (args.eval_frequency > 0
                and i % args.eval_frequency == 0) or (i == 1):
            times.append(time.time())
            logger.info(f"{i:06d}\t"
                        f"{(i * args.batch_size) / X_size:.3f}\t"
                        f"{-loss / X_size:.4f}\t"
                        f"{times[-1]-times[-2]:.3f}")
Beispiel #4
0
def main(model, guide, args):
    # init
    if args.seed is not None: pyro.set_rng_seed(args.seed)
    logger = get_logger(args.log, __name__)
    logger.info(args)
    #pyro.enable_validation(True)

    # load data
    DATA_URL = "https://d2hg8soec8ck9v.cloudfront.net/datasets/rugged_data.csv"
    rugged_data = pd.read_csv(DATA_URL, encoding="ISO-8859-1")
    df = rugged_data[["cont_africa", "rugged", "rgdppc_2000"]]
    df = df[np.isfinite(df.rgdppc_2000)]
    df["rgdppc_2000"] = np.log(df["rgdppc_2000"])
    train = torch.tensor(df.values, dtype=torch.float)
    is_cont_africa, ruggedness, log_gdp = train[:, 0], train[:, 1], train[:, 2]

    # setup svi
    pyro.clear_param_store()
    opt = optim.Adam({"lr": args.learning_rate})
    elbo_train = Trace_ELBO(num_particles=args.train_particles,
                            vectorize_particles=True)
    elbo_eval = Trace_ELBO(num_particles=args.eval_particles,
                           vectorize_particles=True)
    svi_train = SVI(model.main, guide.main, opt, loss=elbo_train)
    svi_eval = SVI(model.main, guide.main, opt, loss=elbo_eval)
    svi_arg_l = [is_cont_africa, ruggedness, log_gdp]

    # # train (init)
    # loss = svi.evaluate_loss(*svi_arg_l)
    # param_state = copy.deepcopy(pyro.get_param_store().get_state())
    # elbo_l = [-loss]
    # param_state_l = [param_state]

    # train
    times = [time.time()]
    logger.info("\nstep\t" + "elbo\t" + "time(sec)")

    for i in range(1, args.num_steps + 1):
        loss = svi_train.step(*svi_arg_l)
        # elbo_l.append(-loss)
        #
        # if (i+1) % args.param_freq == 0:
        #     param_state = copy.deepcopy(pyro.get_param_store().get_state())
        #     param_state_l.append(param_state)
        #     # Here copy.copy (= shallow copy) is insufficient,
        #     # due to dicts inside the ParamStoreDict object.
        #     # So copy.deepcopy (= deep copy) must be used.

        if (args.eval_frequency > 0
                and i % args.eval_frequency == 0) or (i == 1):
            loss = svi_eval.step(*svi_arg_l)
            times.append(time.time())
            logger.info(f"{i:06d}\t"
                        f"{-loss:.4f}\t"
                        f"{times[-1]-times[-2]:.3f}")
def main(model, guide, args):
    # init
    if args.seed is not None: pyro.set_rng_seed(args.seed)
    logger = get_logger(args.log, __name__)
    logger.info(args)

    # load data
    train_loader, test_loader = setup_data_loaders(batch_size=256)

    # setup svi
    pyro.clear_param_store()
    # # WL: edited to make SCORE produce no NaNs. =====
    # # lr value:
    # # - original value in vae: 1.0e-3 --- SCORE produces NaNs at some point.
    # # - default of Adam(..)  : 1.0e-3
    # # - current value        : 1.0e-4 --- SCORE produces no NaNs.
    # learning_rate = 1.0e-4
    # # ===============================================
    opt = optim.Adam({"lr": args.learning_rate})
    elbo = Trace_ELBO()
    svi = SVI(model.main, guide.main, opt, loss=elbo)

    # # train (init)
    # loss_avg = evaluate(svi, train_loader)
    # param_state = copy.deepcopy(pyro.get_param_store().get_state())
    # elbo_l = [-loss_avg]
    # param_state_l = [param_state]

    # train
    times = [time.time()]
    logger.info(f"\nepoch\t" + "elbo\t" + "time(sec)")

    for i in range(1, args.num_epochs + 1):
        loss_avg = train_epoch(svi, train_loader)
        # elbo_l.append(-loss_avg)
        #
        # if (i+1) % param_freq == 0:
        #     param_state = copy.deepcopy(pyro.get_param_store().get_state())
        #     param_state_l.append(param_state)

        if (args.eval_frequency > 0
                and i % args.eval_frequency == 0) or (i == 1):
            times.append(time.time())
            logger.info(f"{i:06d}\t"
                        f"{-loss_avg:.4f}\t"
                        f"{times[-1]-times[-2]:.3f}")
Beispiel #6
0
def main(model, guide, args):
    # init
    if args.seed is not None: pyro.set_rng_seed(args.seed)
    logger = get_logger(args.log, __name__)
    logger.info(args)
    torch.set_default_tensor_type('torch.FloatTensor')
    #torch.set_default_tensor_type('torch.DoubleTensor')

    # load data
    dataset_directory = get_data_directory(__file__)
    dataset_path = os.path.join(dataset_directory, 'faces_training.csv')
    if not os.path.exists(dataset_path):
        try:
            os.makedirs(dataset_directory)
        except OSError as e:
            if e.errno != errno.EEXIST: raise
        wget.download('https://d2hg8soec8ck9v.cloudfront.net/datasets/faces_training.csv', dataset_path)
    data = torch.tensor(np.loadtxt(dataset_path, delimiter=',')).float()
        
    # setup svi
    pyro.clear_param_store()
    # # WL: edited to make SCORE behave well. =====
    # # (lr,mmt) values:
    # # - original values in sgdef    : (4.5, 0.1) --- SCORE decreases ELBO. 
    # # - default of AdaradRMSProp(..): (1.0, 0.1) --- SCORE decreases ELBO (from iter 200).
    # # - current values              : (0.1, 0.1) --- SCORE behaves well.
    # learning_rate = 0.1
    # momentum = 0.1
    # # ===========================================
    opt = optim.AdagradRMSProp({"eta": args.learning_rate, "t": args.momentum})
    # elbo = TraceMeanField_ELBO()
    elbo = Trace_ELBO()
    # this is the svi object we use during training; we use TraceMeanField_ELBO to
    # get analytic KL divergences
    svi      = SVI(model.main, guide.main, opt, loss=elbo)
    svi_arg_l = [data]

    # # train (init)
    # loss = svi.evaluate_loss(*svi_arg_l)
    # param_state = copy.deepcopy(pyro.get_param_store().get_state())
    # elbo_l = [-loss]
    # param_state_l = [param_state]
    
    # train
    times = [time.time()]
    logger.info(f"\nepoch\t"+"elbo\t"+"time(sec)")

    for i in range(1, args.num_epochs+1):
        loss = svi.step(*svi_arg_l)
        # elbo_l.append(-loss)
        
        clip_params()

        # if (i+1) % param_freq == 0:
        #     param_state = copy.deepcopy(pyro.get_param_store().get_state())
        #     param_state_l.append(param_state)

        if (args.eval_frequency > 0 and i % args.eval_frequency == 0) or (i == 1):
            times.append(time.time())
            logger.info(f"{i:06d}\t"
                        f"{-loss:.4f}\t"
                        f"{times[-1]-times[-2]:.3f}")
def main(model, guide, args):
    # init
    if args.seed is not None: pyro.set_rng_seed(args.seed)
    logger = get_logger(args.log, __name__)
    logger.info(args)

    # load data
    data = poly.load_data(poly.JSB_CHORALES)
    training_seq_lengths    = data['train']['sequence_lengths']
    training_data_sequences = data['train']['sequences']

    d1_training = int(len(training_seq_lengths)/args.mini_batch_size)*args.mini_batch_size
    training_seq_lengths    = training_seq_lengths   [:d1_training]
    training_data_sequences = training_data_sequences[:d1_training]

    N_train_data = len(training_seq_lengths)
    N_train_time_slices = float(torch.sum(training_seq_lengths))
    N_mini_batches = int(N_train_data / args.mini_batch_size +
                         int(N_train_data % args.mini_batch_size > 0))

    logger.info(f"N_train_data: {N_train_data}\t"
                f"avg. training seq. length: {training_seq_lengths.float().mean():.2f}\t"
                f"N_mini_batches: {N_mini_batches}")
    
    # setup svi
    pyro.clear_param_store()
    opt = optim.ClippedAdam({"lr": args.learning_rate, "betas": (args.beta1, args.beta2),
                             "clip_norm": args.clip_norm, "lrd": args.lr_decay,
                             "weight_decay": args.weight_decay})
    svi = SVI(model.main, guide.main, opt,
              loss=Trace_ELBO())
    svi_eval = SVI(model.main, guide.main, opt,
                   loss=Trace_ELBO())

    # train minibatch
    def proc_minibatch(svi_proc, epoch, which_mini_batch, shuffled_indices):
        # compute the KL annealing factor approriate for the current mini-batch in the current epoch
        if args.annealing_epochs > 0 and epoch < args.annealing_epochs:
            min_af = args.minimum_annealing_factor
            annealing_factor = min_af + (1.0 - min_af) * \
                (float(which_mini_batch + epoch * N_mini_batches + 1) /
                 float(args.annealing_epochs * N_mini_batches))
        else:
            annealing_factor = 1.0

        # compute which sequences in the training set we should grab
        mini_batch_start = (which_mini_batch * args.mini_batch_size)
        mini_batch_end = np.min([(which_mini_batch + 1) * args.mini_batch_size, N_train_data])
        mini_batch_indices = shuffled_indices[mini_batch_start:mini_batch_end]
        # grab a fully prepped mini-batch using the helper function in the data loader
        mini_batch, mini_batch_reversed, mini_batch_mask, mini_batch_seq_lengths \
            = poly.get_mini_batch(mini_batch_indices, training_data_sequences,
                                  training_seq_lengths, cuda=args.cuda)

        # do an actual gradient step (or eval)
        loss = svi_proc(mini_batch, mini_batch_reversed, mini_batch_mask,
                        mini_batch_seq_lengths, annealing_factor)
        return loss

    # train epoch
    def train_epoch(epoch):
        # take gradient
        loss, shuffled_indices = 0.0, torch.randperm(N_train_data)
        for which_mini_batch in range(N_mini_batches):
            loss += proc_minibatch(svi.step, epoch,
                                   which_mini_batch, shuffled_indices)
        loss /= N_train_time_slices
        return loss

    # eval loss of epoch
    def eval_epoch():
        # put the RNN into evaluation mode (i.e. turn off drop-out if applicable)
        guide.rnn.eval()

        # eval loss
        loss, shuffled_indices = 0.0, torch.randperm(N_train_data)
        for which_mini_batch in range(N_mini_batches):
            loss += proc_minibatch(svi_eval.evaluate_loss, 0,
                                   which_mini_batch, shuffled_indices)
        loss /= N_train_time_slices

        # put the RNN back into training mode (i.e. turn on drop-out if applicable)
        guide.rnn.train()
        return loss

    # train
    #elbo_l = []
    #param_state_l = []
    time_l = [time.time()]
    logger.info(f"\nepoch\t"+"elbo\t"+"time(sec)")

    for epoch in range(1, args.num_epochs+1):
        loss = train_epoch(epoch)
        #elbo_l.append(-loss)

        # param_state = copy.deepcopy(pyro.get_param_store().get_state())
        # param_state_l.append(param_state)

        time_l.append(time.time())
        logger.info(f"{epoch:04d}\t"
                    f"{-loss:.4f}\t"
                    f"{time_l[-1]-time_l[-2]:.3f}")

        if math.isnan(loss): break
def main(model, guide, args):
    # init
    if args.seed is not None: pyro.set_rng_seed(args.seed)
    logger = get_logger(args.log, __name__)
    logger.info(args)

    # some assertions to make sure that batching math assumptions are met
    assert args.sup_num % args.batch_size == 0, "assuming simplicity of batching math"
    assert MNISTCached.validation_size % args.batch_size == 0, \
        "batch size should divide the number of validation examples"
    assert MNISTCached.train_data_size % args.batch_size == 0, \
        "batch size doesn't divide total number of training data examples"
    assert MNISTCached.test_size % args.batch_size == 0, "batch size should divide the number of test examples"

    # setup the optimizer
    adam_params = {"lr": args.learning_rate, "betas": (args.beta_1, 0.999)}
    optimizer = Adam(adam_params)

    # set up the loss(es) for inference. wrapping the guide in config_enumerate builds the loss as a sum
    # by enumerating each class label for the sampled discrete categorical distribution in the model
    guide_enum = config_enumerate(guide.main, args.enum_discrete, expand=True)
    elbo = TraceEnum_ELBO(max_plate_nesting=1)
    loss_basic = SVI(model.main, guide_enum, optimizer, loss=elbo)

    # build a list of all losses considered
    losses = [loss_basic]

    data_loaders = setup_data_loaders(MNISTCached,
                                      args.cuda,
                                      args.batch_size,
                                      sup_num=args.sup_num)

    # how often would a supervised batch be encountered during inference
    # e.g. if sup_num is 3000, we would have every 16th = int(50000/3000) batch supervised
    # until we have traversed through the all supervised batches
    periodic_interval_batches = int(MNISTCached.train_data_size /
                                    (1.0 * args.sup_num))

    # number of unsupervised examples
    unsup_num = MNISTCached.train_data_size - args.sup_num

    # run inference for a certain number of epochs
    times = [time.time()]
    logger.info("\nepoch\t" + "elbo(sup)\t" + "elbo(unsup)\t" + "time(sec)")

    for i in range(1, args.num_epochs + 1):
        # get the losses for an epoch
        epoch_losses_sup, epoch_losses_unsup = \
            run_inference_for_epoch(data_loaders, losses, periodic_interval_batches)

        # compute average epoch losses i.e. losses per example
        avg_epoch_losses_sup = map(lambda v: v / args.sup_num,
                                   epoch_losses_sup)
        avg_epoch_losses_unsup = map(lambda v: v / unsup_num,
                                     epoch_losses_unsup)

        # print results
        times.append(time.time())
        str_elbo_sup = " ".join(
            map(lambda v: f"{-v:.4f}", avg_epoch_losses_sup))
        str_elbo_unsup = " ".join(
            map(lambda v: f"{-v:.4f}", avg_epoch_losses_unsup))
        str_print = f"{i:06d}\t"\
                    f"{str_elbo_sup}\t"\
                    f"{str_elbo_unsup}\t"\
                    f"{times[-1]-times[-2]:.3f}"
        logger.info(str_print)