def get_reinforce_ps_loss(phi, p0, reinforce = False):
    # returns pseudoloss: loss whose gradient is unbiased for the
    # true gradient

    d = len(p0)
    e_b = sigmoid(phi)

    bn_rv = Bernoulli(probs = torch.ones(d) * e_b)
    binary_samples = bn_rv.sample().detach()
    # binary_samples = (torch.rand(d) > e_b).float().detach()

    if reinforce:
        binary_samples_ = bn_rv.sample().detach()
        baseline = torch.sum((binary_samples_ - p0)**2)

    else:
        baseline = 0.0

    sampled_loss = torch.sum((binary_samples - p0)**2)

    # probs, draw_array = get_all_probs(e_b, d)
    # losses_array = get_losses_from_draw_array(draw_array, p0)
    #
    # cat_rv = Categorical(probs)
    # indx = cat_rv.sample()
    # binary_samples = draw_array[indx]
    # sampled_loss = losses_array[indx]
    #
    sampled_log_q = get_bernoulli_log_prob(e_b, binary_samples)

    ps_loss = (sampled_loss - baseline).detach() * sampled_log_q

    return ps_loss
    def forward(self, image):
        latent_means = self.encoder.forward(image)

        bernoulli_rv = Bernoulli(latent_means)
        bernoulli_samples = bernoulli_rv.sample().detach()

        image_mean = self.decoder.forward(bernoulli_samples)

        return image_mean, latent_means, bernoulli_samples
Esempio n. 3
0
 def sample_from_prior(self, bs: int, **kwargs):
     pz = Bernoulli(logits=self.prior.expand(bs, *self.prior.shape[1:]))
     z = pz.sample()
     px = Bernoulli(logits=self.generative_model(z))
     return {'px': px, 'z': [z], 'pz': [pz]}
Esempio n. 4
0
#########

#check
rebar = 0
if __name__ == "__main__":
    approx_net = RELAX_Net()
    parameters = toy_theta.repeat(batch_size)
    if rebar == 1:
        rebar_net = REBAR_Net(init_temperature, toy_func, scale_param)
        approx_net = rebar_net

    optimizer = torch.optim.SGD(approx_net.parameters(), lr=lr1)

    u = Variable(torch.arange(0.001, 0.999, 0.001))
    for i in range(iterations):
        parameters_grad = RELAX(toy_func, approx_net, Bernoulli(parameters),
                                parameters)

        ## Updating parameters
        #parameters.data += lr2* parameters_grad.data / batch_size
        for parameter in approx_net.parameters():
            if parameter.grad is not None:
                parameter.data -= lr1 * parameter.grad.data / batch_size
        approx_net.zero_grad()

        parameters.data += lr2 * torch.mean(parameters_grad.data) / batch_size

        relaxed_samples = relaxed_input(u, parameters[0])
        print(parameters[0])
        out = approx_net(relaxed_samples)
        #print(rebar_net.temp)
Esempio n. 5
0
from torch import nn, Tensor, zeros

# gradient estimator
from ovis.estimators.config import parse_estimator_id
Estimator, config = parse_estimator_id("ovis-gamma1")
estimator = Estimator(iw=16, **config)

# dataset: sample x ~ Bernoulli(0.5)
from torch.distributions import Bernoulli
dset = Bernoulli(logits=zeros((1000, 10))).sample()

# define a simple Bernoulli VAE
from ovis.models import TemplateModel
class SimpleModel(TemplateModel):
    def __init__(self, xdim, zdim):
        super().__init__()
        self.inference_network = nn.Linear(xdim, zdim)
        self.generative_model = nn.Linear(zdim, xdim)
        self.register_buffer('prior', zeros((1, zdim,)))

    def forward(self, x: Tensor, zgrads: bool = False, **kwargs):
        # q(z|x)
        qz = Bernoulli(logits=self.inference_network(x))
        # z ~ q(z | x)
        z = qz.rsample() if zgrads else qz.sample()
        # p(x)
        pz = Bernoulli(logits=self.prior)
        # p(x|z)
        px = Bernoulli(logits=self.generative_model(z))
        # store z, pz, qz as list for hierarchical models
        return {'px': px, 'z': [z], 'qz': [qz], 'pz': [pz]}
Esempio n. 6
0
 def call_rsample():
     return Bernoulli(r).rsample()
Esempio n. 7
0
 def test_bernoulli_3d(self):
     p = Variable(torch.Tensor(2, 3, 5).fill_(0.5), requires_grad=True)
     self.assertEqual(Bernoulli(p).sample().size(), (2, 3, 5))
     self.assertEqual(Bernoulli(p).sample_n(2).size(), (2, 2, 3, 5))
Esempio n. 8
0
 def proba_distribution(self, action_logits: th.Tensor) -> 'BernoulliDistribution':
     self.distribution = Bernoulli(logits=action_logits)
     return self
Esempio n. 9
0
 def call_sample_wshape_gt_2():
     return Bernoulli(r).sample((1, 2))
Esempio n. 10
0
 def observation_model(self, z:Tensor) -> Distribution:
     """return the distribution `p(x|z)`"""
     px_logits = self.decoder(z)
     px_logits = px_logits.view(-1, *self.input_shape) # reshape the output to input_shape number of columns (rows are unspecified)
     return Bernoulli(logits=px_logits)
Esempio n. 11
0
 def predict_option_termination(self, state, current_option):
     termination = self.terminations(state)[:, current_option].sigmoid()
     option_termination = Bernoulli(termination).sample()
     Q = self.get_Q(state)
     next_option = Q.argmax(dim=-1)
     return bool(option_termination.item()), next_option.item()
Esempio n. 12
0
 def _select_variable_reinforce_multi(self, data):
     logit = self.policy(data)
     prob = torch.sigmoid(logit)
     dist = Bernoulli(prob.view(-1))
     vs = dist.sample()
     return vs.nonzero(), dist.log_prob(vs).sum()
Esempio n. 13
0
def learn(model,
          model_args,
          device,
          k=5,
          batch_size=32,
          seed=666,
          smt_epoch=100,
          rl_epoch=1000):
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

    # Le probleme vient du count vectorizer qui vire certains mots
    print("Load Dataset")
    dataset = Robust2004.torch_dataset()
    dataclasses = Robust2004.dataclasses()
    dataclasses = {qt._id: qt for qt in dataclasses}
    engine = get_engine()

    collate_fn = embedding_collate_decorator(sequence_collate_fn)

    indices = list(range(len(dataset)))
    random.shuffle(indices)
    for i, (trainindices, testindices) in enumerate(all_but_one(indices, k=k)):
        trainindices = chain(*trainindices)
        trainset = Subset(dataset, list(trainindices))
        testset = Subset(dataset, list(testindices))
        trainloader = DataLoader(trainset, 1, True, collate_fn=collate_fn)
        testloader = DataLoader(testset, 1, True, collate_fn=collate_fn)

        print("Build model")

        model = model(*model_args)
        try:
            model = model.to(device)
        except RuntimeError:
            print("cudnn error")
        model = model.to(device)

        optimizer = optim.Adam(model.parameters())
        loss_function = nn.BCELoss()

        print("Train")
        best_model = 0
        delay = 0
        max_delay = 5
        print("Supervised Machine Translation")
        for epoch in range(smt_epoch):
            model.train()
            n, mean = 0, 0
            train_predictions = []
            train_ids = []
            for x, y, q_id, qrels, _ in trainloader:
                x = x.to(device)
                y = y.to(device)
                pred = model(x)

                pred__ = pred > 0.5
                pred_ = pred__.detach().cpu().long().t().numpy().tolist()
                train_predictions.extend(pred_)
                train_ids.extend(map(lambda x: x.long().tolist(), q_id))

                loss = loss_function(pred, y.float())
                n += 1
                mean = ((n - 1) * mean + loss.item()) / n
                print(f"\rFold {i}, Epoch {epoch}\tTrain : {mean}", end="")

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            train_queries = {
                id_: dataclasses[str(id_)].get_text(pred)
                for id_, pred in zip(train_ids, train_predictions)
            }
            train_qrel = {
                id_: dataclasses[str(id_)].qrels
                for id_, pred in zip(train_ids, train_predictions)
            }
            train_map = eval_queries(train_queries, train_qrel, engine)
            print(
                f"\rFold {i}, Epoch {epoch}\tTrain Loss: {mean}, Train MAP {train_map}",
                end="")

            model.eval()
            train_mean = mean
            n, mean = 0, 0
            test_predictions = []
            test_ids = []
            for x, y, q_id, qrels, _ in testloader:
                x = x.to(device)
                y = y.to(device)

                pred = model(x)
                pre__ = pred > 0.5
                pred_ = pred__.detach().cpu().long().t().numpy().tolist()
                test_predictions.extend(pred_)
                test_ids.extend(map(lambda x: x.long().tolist(), q_id))

                loss = loss_function(pred, y.float())

                n += 1
                mean = ((n - 1) * mean + loss.item()) / n
                print(
                    f"\rFold {i}, Epoch {epoch}\tTrain Loss: {train_mean}\tTest : {mean}",
                    end="")

            test_queries = {
                id_: dataclasses[str(id_)].get_text(pred)
                for id_, pred in zip(test_ids, test_predictions)
            }
            test_qrel = {
                id_: dataclasses[str(id_)].qrels
                for id_, pred in zip(test_ids, test_predictions)
            }
            test_map = eval_queries(test_queries, test_qrel, engine)

            dataset_queries = {**train_queries, **test_queries}
            dataset_qrel = {**train_qrel, **test_qrel}
            dataset_map = eval_queries(dataset_queries, dataset_qrel, engine)

            print(
                "\b" * 500 +
                f"\nFold {i}, Epoch {epoch}\tTrain MAP {train_map}\tTest MAP : {test_map}\tDataset MAP : {dataset_map}"
            )

            if test_map > best_model:
                best_model = test_map
                delay = 0
            elif test_map < best_model:
                delay += 1
                if delay > max_delay:
                    print(best_model)
                    break

        print("Reinforcement Learning")
        mean_maps = {id_: [] for id_ in dataclasses.keys()}
        for epoch in range(rl_epoch):
            model.train()
            n, mean = 0, 0
            train_predictions = []
            train_ids = []
            for x, y, q_id, qrels, seq_lens in trainloader:
                x = x.to(device)
                y = y.to(device)
                pred = model(x)

                sampler = Bernoulli(pred)

                batch_pred = sampler.sample()
                log_probs = sampler.log_prob(batch_pred)
                loss = log_probs.sum()

                batch_ids = list(map(lambda x: x.long().tolist(), q_id))

                batch_queries = {
                    id_: dataclasses[str(id_)].get_text(pred)
                    for id_, pred in zip(batch_ids, batch_pred)
                }
                batch_qrel = {
                    id_: dataclasses[str(id_)].qrels
                    for id_, pred in zip(batch_ids, batch_pred)
                }

                batch_map = eval_queries(batch_queries, batch_qrel, engine)

                n += 1
                mean = ((n - 1) * mean + batch_map) / n
                print(f"\rTrain Map : {mean: .3f}", end="")
                loss = -batch_map * loss

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            train_mean = mean
            n, mean = 0, 0
            test_predictions = []
            test_ids = []
            print()
            for x, y, q_id, qrels, seq_lens in testloader:
                x = x.to(device)
                y = y.to(device)

                pred = model(x)

                sampler = Bernoulli(pred)
                batch_pred = sampler.sample()
                log_probs = sampler.log_prob(batch_pred)
                loss = log_probs.sum()
                batch_qrel = {
                    id_: dataclasses[str(id_)].qrels
                    for id_, pred in zip(batch_ids, batch_pred)
                }

                batch_map = eval_queries(batch_queries, batch_qrel, engine)
                n += 1
                mean = ((n - 1) * mean + batch_map) / n
                print(
                    f"\rTrain MAP : {train_mean: .3f}\tTest Map : {mean: .3f}",
                    end="")
            print()
Esempio n. 14
0
 def forward(self, inputs, targets):
     return Bernoulli(0.5 * torch.ones_like(targets))
Esempio n. 15
0
 def forward(self, x):
     logits = self.logit_layer(x)
     return Bernoulli(logits = logits)
Esempio n. 16
0
def main():

    if not args.evaluate:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt'))
    else:
        sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt'))
    print("==========\nArgs:{}\n==========".format(args))

    if use_gpu:
        print("Currently using GPU {}".format(args.gpu))
        cudnn.benchmark = True
        torch.cuda.manual_seed_all(args.seed)
    else:
        print("Currently using CPU")

    print("Initialize dataset {}".format(args.dataset))
    if args.dataset is None:
        datasets = [
            'datasets/eccv16_dataset_summe_google_pool5.h5',
            'datasets/eccv16_dataset_tvsum_google_pool5.h5',
            'datasets/eccv16_dataset_ovp_google_pool5.h5',
            'datasets/eccv16_dataset_youtube_google_pool5.h5'
        ]

        dataset = {}
        for name in datasets:
            _, base_filename = os.path.split(name)
            base_filename = os.path.splitext(base_filename)
            dataset[base_filename[0]] = h5py.File(name, 'r')
        # Load split file
        splits = read_json(args.split)
        assert args.split_id < len(
            splits), "split_id (got {}) exceeds {}".format(
                args.split_id, len(splits))
        split = splits[args.split_id]
        train_keys = split['train_keys']
        test_keys = split['test_keys']
        print("# train videos {}. # test videos {}".format(
            len(train_keys), len(test_keys)))

    else:
        dataset = h5py.File(args.dataset, 'r')
        num_videos = len(dataset.keys())
        splits = read_json(args.split)
        assert args.split_id < len(
            splits), "split_id (got {}) exceeds {}".format(
                args.split_id, len(splits))
        split = splits[args.split_id]
        train_keys = split['train_keys']
        test_keys = split['test_keys']
        print("# total videos {}. # train videos {}. # test videos {}".format(
            num_videos, len(train_keys), len(test_keys)))

    #### Set User Score Dataset ####
    userscoreset = h5py.File(args.userscore, 'r')

    print("Initialize model")
    model = DSRRL(in_dim=args.input_dim,
                  hid_dim=args.hidden_dim,
                  num_layers=args.num_layers,
                  cell=args.rnn_cell)

    optimizer = torch.optim.Adam(model.parameters(),
                                 betas=(0.5, 0.999),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    if args.stepsize > 0:
        scheduler = lr_scheduler.StepLR(optimizer,
                                        step_size=args.stepsize,
                                        gamma=args.gamma)

    if args.resume:
        print("Loading checkpoint from '{}'".format(args.resume))
        checkpoint = torch.load(args.resume)
        model.load_state_dict(checkpoint)
    else:
        start_epoch = 0

    if use_gpu:
        model = nn.DataParallel(model).cuda()

    if args.evaluate:
        print("Evaluate only")
        evaluate(model, dataset, test_keys, use_gpu)
        return

    if args.dataset is None:
        print("==> Start training")
        start_time = time.time()
        model.train()
        baselines = {key: 0.
                     for key in train_keys}  # baseline rewards for videos
        reward_writers = {key: []
                          for key in train_keys
                          }  # record reward changes for each video

        for epoch in range(start_epoch, args.max_epoch):
            idxs = np.arange(len(train_keys))
            np.random.shuffle(idxs)  # shuffle indices

            for idx in idxs:
                key_parts = train_keys[idx].split('/')
                name, key = key_parts
                seq = dataset[name][key]['features'][
                    ...]  # sequence of features, (seq_len, dim)
                seq = torch.from_numpy(seq).unsqueeze(
                    0)  # input shape (1, seq_len, dim)
                if use_gpu: seq = seq.cuda()
                probs, out_feats, att_score = model(
                    seq)  # output shape (1, seq_len, 1)

                cost = args.beta * (
                    probs.mean() -
                    0.5)**2  # minimize summary length penalty term
                m = Bernoulli(probs)
                epis_rewards = []
                for _ in range(args.num_episode):
                    actions = m.sample()
                    log_probs = m.log_prob(actions)
                    reward = compute_reward(seq, actions, use_gpu=use_gpu)
                    expected_reward = log_probs.mean() * (
                        reward - baselines[train_keys[idx]])
                    cost -= expected_reward
                    epis_rewards.append(reward.item())

                recon_loss = reconstruction_loss(seq, out_feats)
                spar_loss = sparsity_loss(att_score)

                total_loss = cost + recon_loss + spar_loss

                optimizer.zero_grad()
                total_loss.backward()
                optimizer.step()
                torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0)
                baselines[train_keys[
                    idx]] = 0.9 * baselines[train_keys[idx]] + 0.1 * np.mean(
                        epis_rewards
                    )  # update baseline reward via moving average
                reward_writers[train_keys[idx]].append(np.mean(epis_rewards))

            epoch_reward = np.mean(
                [reward_writers[key][epoch] for key in train_keys])
            #print("epoch {}/{}\t reward {}\t loss {}".format(epoch+1, args.max_epoch, epoch_reward, total_loss))
    else:
        print("==> Start training")
        start_time = time.time()
        model.train()
        baselines = {key: 0.
                     for key in train_keys}  # baseline rewards for videos
        reward_writers = {key: []
                          for key in train_keys
                          }  # record reward changes for each video

        for epoch in range(start_epoch, args.max_epoch):
            idxs = np.arange(len(train_keys))
            np.random.shuffle(idxs)  # shuffle indices

            for idx in idxs:
                key = train_keys[idx]
                seq = dataset[key]['features'][
                    ...]  # sequence of features, (seq_len, dim)
                seq = torch.from_numpy(seq).unsqueeze(
                    0)  # input shape (1, seq_len, dim)
                if use_gpu: seq = seq.cuda()
                probs, out_feats, att_score = model(
                    seq)  # output shape (1, seq_len, 1)

                cost = args.beta * (
                    probs.mean() -
                    0.5)**2  # minimize summary length penalty term
                m = Bernoulli(probs)
                epis_rewards = []
                for _ in range(args.num_episode):
                    actions = m.sample()
                    log_probs = m.log_prob(actions)
                    reward = compute_reward(seq, actions, use_gpu=use_gpu)
                    expected_reward = log_probs.mean() * (reward -
                                                          baselines[key])
                    cost -= expected_reward
                    epis_rewards.append(reward.item())

                recon_loss = reconstruction_loss(seq, out_feats)
                spar_loss = sparsity_loss(att_score)

                total_loss = cost + recon_loss + spar_loss

                #print(cost.item(), recon_loss.item(), spar_loss.item())

                optimizer.zero_grad()
                total_loss.backward()
                optimizer.step()
                torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0)
                baselines[key] = 0.9 * baselines[key] + 0.1 * np.mean(
                    epis_rewards)  # update baseline reward via moving average
                reward_writers[key].append(np.mean(epis_rewards))

            epoch_reward = np.mean(
                [reward_writers[key][epoch] for key in train_keys])
            #print("epoch {}/{}\t reward {}\t loss {}".format(epoch+1, args.max_epoch, epoch_reward, total_loss))

    write_json(reward_writers, osp.join(args.save_dir, 'rewards.json'))
    evaluate(model, dataset, userscoreset, test_keys, use_gpu)

    elapsed = round(time.time() - start_time)
    elapsed = str(datetime.timedelta(seconds=elapsed))
    print("Finished. Total elapsed time (h:m:s): {}".format(elapsed))

    model_state_dict = model.module.state_dict(
    ) if use_gpu else model.state_dict()
    model_save_path = osp.join(
        args.save_dir,
        args.metric + '_model_epoch_' + str(args.max_epoch) + '_split_id_' +
        str(args.split_id) + '-' + str(args.rnn_cell) + '.pth.tar')
    save_checkpoint(model_state_dict, model_save_path)
    print("Model saved to {}".format(model_save_path))