def get_reinforce_ps_loss(phi, p0, reinforce = False): # returns pseudoloss: loss whose gradient is unbiased for the # true gradient d = len(p0) e_b = sigmoid(phi) bn_rv = Bernoulli(probs = torch.ones(d) * e_b) binary_samples = bn_rv.sample().detach() # binary_samples = (torch.rand(d) > e_b).float().detach() if reinforce: binary_samples_ = bn_rv.sample().detach() baseline = torch.sum((binary_samples_ - p0)**2) else: baseline = 0.0 sampled_loss = torch.sum((binary_samples - p0)**2) # probs, draw_array = get_all_probs(e_b, d) # losses_array = get_losses_from_draw_array(draw_array, p0) # # cat_rv = Categorical(probs) # indx = cat_rv.sample() # binary_samples = draw_array[indx] # sampled_loss = losses_array[indx] # sampled_log_q = get_bernoulli_log_prob(e_b, binary_samples) ps_loss = (sampled_loss - baseline).detach() * sampled_log_q return ps_loss
def forward(self, image): latent_means = self.encoder.forward(image) bernoulli_rv = Bernoulli(latent_means) bernoulli_samples = bernoulli_rv.sample().detach() image_mean = self.decoder.forward(bernoulli_samples) return image_mean, latent_means, bernoulli_samples
def sample_from_prior(self, bs: int, **kwargs): pz = Bernoulli(logits=self.prior.expand(bs, *self.prior.shape[1:])) z = pz.sample() px = Bernoulli(logits=self.generative_model(z)) return {'px': px, 'z': [z], 'pz': [pz]}
######### #check rebar = 0 if __name__ == "__main__": approx_net = RELAX_Net() parameters = toy_theta.repeat(batch_size) if rebar == 1: rebar_net = REBAR_Net(init_temperature, toy_func, scale_param) approx_net = rebar_net optimizer = torch.optim.SGD(approx_net.parameters(), lr=lr1) u = Variable(torch.arange(0.001, 0.999, 0.001)) for i in range(iterations): parameters_grad = RELAX(toy_func, approx_net, Bernoulli(parameters), parameters) ## Updating parameters #parameters.data += lr2* parameters_grad.data / batch_size for parameter in approx_net.parameters(): if parameter.grad is not None: parameter.data -= lr1 * parameter.grad.data / batch_size approx_net.zero_grad() parameters.data += lr2 * torch.mean(parameters_grad.data) / batch_size relaxed_samples = relaxed_input(u, parameters[0]) print(parameters[0]) out = approx_net(relaxed_samples) #print(rebar_net.temp)
from torch import nn, Tensor, zeros # gradient estimator from ovis.estimators.config import parse_estimator_id Estimator, config = parse_estimator_id("ovis-gamma1") estimator = Estimator(iw=16, **config) # dataset: sample x ~ Bernoulli(0.5) from torch.distributions import Bernoulli dset = Bernoulli(logits=zeros((1000, 10))).sample() # define a simple Bernoulli VAE from ovis.models import TemplateModel class SimpleModel(TemplateModel): def __init__(self, xdim, zdim): super().__init__() self.inference_network = nn.Linear(xdim, zdim) self.generative_model = nn.Linear(zdim, xdim) self.register_buffer('prior', zeros((1, zdim,))) def forward(self, x: Tensor, zgrads: bool = False, **kwargs): # q(z|x) qz = Bernoulli(logits=self.inference_network(x)) # z ~ q(z | x) z = qz.rsample() if zgrads else qz.sample() # p(x) pz = Bernoulli(logits=self.prior) # p(x|z) px = Bernoulli(logits=self.generative_model(z)) # store z, pz, qz as list for hierarchical models return {'px': px, 'z': [z], 'qz': [qz], 'pz': [pz]}
def call_rsample(): return Bernoulli(r).rsample()
def test_bernoulli_3d(self): p = Variable(torch.Tensor(2, 3, 5).fill_(0.5), requires_grad=True) self.assertEqual(Bernoulli(p).sample().size(), (2, 3, 5)) self.assertEqual(Bernoulli(p).sample_n(2).size(), (2, 2, 3, 5))
def proba_distribution(self, action_logits: th.Tensor) -> 'BernoulliDistribution': self.distribution = Bernoulli(logits=action_logits) return self
def call_sample_wshape_gt_2(): return Bernoulli(r).sample((1, 2))
def observation_model(self, z:Tensor) -> Distribution: """return the distribution `p(x|z)`""" px_logits = self.decoder(z) px_logits = px_logits.view(-1, *self.input_shape) # reshape the output to input_shape number of columns (rows are unspecified) return Bernoulli(logits=px_logits)
def predict_option_termination(self, state, current_option): termination = self.terminations(state)[:, current_option].sigmoid() option_termination = Bernoulli(termination).sample() Q = self.get_Q(state) next_option = Q.argmax(dim=-1) return bool(option_termination.item()), next_option.item()
def _select_variable_reinforce_multi(self, data): logit = self.policy(data) prob = torch.sigmoid(logit) dist = Bernoulli(prob.view(-1)) vs = dist.sample() return vs.nonzero(), dist.log_prob(vs).sum()
def learn(model, model_args, device, k=5, batch_size=32, seed=666, smt_epoch=100, rl_epoch=1000): torch.manual_seed(seed) np.random.seed(seed) random.seed(seed) # Le probleme vient du count vectorizer qui vire certains mots print("Load Dataset") dataset = Robust2004.torch_dataset() dataclasses = Robust2004.dataclasses() dataclasses = {qt._id: qt for qt in dataclasses} engine = get_engine() collate_fn = embedding_collate_decorator(sequence_collate_fn) indices = list(range(len(dataset))) random.shuffle(indices) for i, (trainindices, testindices) in enumerate(all_but_one(indices, k=k)): trainindices = chain(*trainindices) trainset = Subset(dataset, list(trainindices)) testset = Subset(dataset, list(testindices)) trainloader = DataLoader(trainset, 1, True, collate_fn=collate_fn) testloader = DataLoader(testset, 1, True, collate_fn=collate_fn) print("Build model") model = model(*model_args) try: model = model.to(device) except RuntimeError: print("cudnn error") model = model.to(device) optimizer = optim.Adam(model.parameters()) loss_function = nn.BCELoss() print("Train") best_model = 0 delay = 0 max_delay = 5 print("Supervised Machine Translation") for epoch in range(smt_epoch): model.train() n, mean = 0, 0 train_predictions = [] train_ids = [] for x, y, q_id, qrels, _ in trainloader: x = x.to(device) y = y.to(device) pred = model(x) pred__ = pred > 0.5 pred_ = pred__.detach().cpu().long().t().numpy().tolist() train_predictions.extend(pred_) train_ids.extend(map(lambda x: x.long().tolist(), q_id)) loss = loss_function(pred, y.float()) n += 1 mean = ((n - 1) * mean + loss.item()) / n print(f"\rFold {i}, Epoch {epoch}\tTrain : {mean}", end="") optimizer.zero_grad() loss.backward() optimizer.step() train_queries = { id_: dataclasses[str(id_)].get_text(pred) for id_, pred in zip(train_ids, train_predictions) } train_qrel = { id_: dataclasses[str(id_)].qrels for id_, pred in zip(train_ids, train_predictions) } train_map = eval_queries(train_queries, train_qrel, engine) print( f"\rFold {i}, Epoch {epoch}\tTrain Loss: {mean}, Train MAP {train_map}", end="") model.eval() train_mean = mean n, mean = 0, 0 test_predictions = [] test_ids = [] for x, y, q_id, qrels, _ in testloader: x = x.to(device) y = y.to(device) pred = model(x) pre__ = pred > 0.5 pred_ = pred__.detach().cpu().long().t().numpy().tolist() test_predictions.extend(pred_) test_ids.extend(map(lambda x: x.long().tolist(), q_id)) loss = loss_function(pred, y.float()) n += 1 mean = ((n - 1) * mean + loss.item()) / n print( f"\rFold {i}, Epoch {epoch}\tTrain Loss: {train_mean}\tTest : {mean}", end="") test_queries = { id_: dataclasses[str(id_)].get_text(pred) for id_, pred in zip(test_ids, test_predictions) } test_qrel = { id_: dataclasses[str(id_)].qrels for id_, pred in zip(test_ids, test_predictions) } test_map = eval_queries(test_queries, test_qrel, engine) dataset_queries = {**train_queries, **test_queries} dataset_qrel = {**train_qrel, **test_qrel} dataset_map = eval_queries(dataset_queries, dataset_qrel, engine) print( "\b" * 500 + f"\nFold {i}, Epoch {epoch}\tTrain MAP {train_map}\tTest MAP : {test_map}\tDataset MAP : {dataset_map}" ) if test_map > best_model: best_model = test_map delay = 0 elif test_map < best_model: delay += 1 if delay > max_delay: print(best_model) break print("Reinforcement Learning") mean_maps = {id_: [] for id_ in dataclasses.keys()} for epoch in range(rl_epoch): model.train() n, mean = 0, 0 train_predictions = [] train_ids = [] for x, y, q_id, qrels, seq_lens in trainloader: x = x.to(device) y = y.to(device) pred = model(x) sampler = Bernoulli(pred) batch_pred = sampler.sample() log_probs = sampler.log_prob(batch_pred) loss = log_probs.sum() batch_ids = list(map(lambda x: x.long().tolist(), q_id)) batch_queries = { id_: dataclasses[str(id_)].get_text(pred) for id_, pred in zip(batch_ids, batch_pred) } batch_qrel = { id_: dataclasses[str(id_)].qrels for id_, pred in zip(batch_ids, batch_pred) } batch_map = eval_queries(batch_queries, batch_qrel, engine) n += 1 mean = ((n - 1) * mean + batch_map) / n print(f"\rTrain Map : {mean: .3f}", end="") loss = -batch_map * loss optimizer.zero_grad() loss.backward() optimizer.step() train_mean = mean n, mean = 0, 0 test_predictions = [] test_ids = [] print() for x, y, q_id, qrels, seq_lens in testloader: x = x.to(device) y = y.to(device) pred = model(x) sampler = Bernoulli(pred) batch_pred = sampler.sample() log_probs = sampler.log_prob(batch_pred) loss = log_probs.sum() batch_qrel = { id_: dataclasses[str(id_)].qrels for id_, pred in zip(batch_ids, batch_pred) } batch_map = eval_queries(batch_queries, batch_qrel, engine) n += 1 mean = ((n - 1) * mean + batch_map) / n print( f"\rTrain MAP : {train_mean: .3f}\tTest Map : {mean: .3f}", end="") print()
def forward(self, inputs, targets): return Bernoulli(0.5 * torch.ones_like(targets))
def forward(self, x): logits = self.logit_layer(x) return Bernoulli(logits = logits)
def main(): if not args.evaluate: sys.stdout = Logger(osp.join(args.save_dir, 'log_train.txt')) else: sys.stdout = Logger(osp.join(args.save_dir, 'log_test.txt')) print("==========\nArgs:{}\n==========".format(args)) if use_gpu: print("Currently using GPU {}".format(args.gpu)) cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) else: print("Currently using CPU") print("Initialize dataset {}".format(args.dataset)) if args.dataset is None: datasets = [ 'datasets/eccv16_dataset_summe_google_pool5.h5', 'datasets/eccv16_dataset_tvsum_google_pool5.h5', 'datasets/eccv16_dataset_ovp_google_pool5.h5', 'datasets/eccv16_dataset_youtube_google_pool5.h5' ] dataset = {} for name in datasets: _, base_filename = os.path.split(name) base_filename = os.path.splitext(base_filename) dataset[base_filename[0]] = h5py.File(name, 'r') # Load split file splits = read_json(args.split) assert args.split_id < len( splits), "split_id (got {}) exceeds {}".format( args.split_id, len(splits)) split = splits[args.split_id] train_keys = split['train_keys'] test_keys = split['test_keys'] print("# train videos {}. # test videos {}".format( len(train_keys), len(test_keys))) else: dataset = h5py.File(args.dataset, 'r') num_videos = len(dataset.keys()) splits = read_json(args.split) assert args.split_id < len( splits), "split_id (got {}) exceeds {}".format( args.split_id, len(splits)) split = splits[args.split_id] train_keys = split['train_keys'] test_keys = split['test_keys'] print("# total videos {}. # train videos {}. # test videos {}".format( num_videos, len(train_keys), len(test_keys))) #### Set User Score Dataset #### userscoreset = h5py.File(args.userscore, 'r') print("Initialize model") model = DSRRL(in_dim=args.input_dim, hid_dim=args.hidden_dim, num_layers=args.num_layers, cell=args.rnn_cell) optimizer = torch.optim.Adam(model.parameters(), betas=(0.5, 0.999), lr=args.lr, weight_decay=args.weight_decay) if args.stepsize > 0: scheduler = lr_scheduler.StepLR(optimizer, step_size=args.stepsize, gamma=args.gamma) if args.resume: print("Loading checkpoint from '{}'".format(args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint) else: start_epoch = 0 if use_gpu: model = nn.DataParallel(model).cuda() if args.evaluate: print("Evaluate only") evaluate(model, dataset, test_keys, use_gpu) return if args.dataset is None: print("==> Start training") start_time = time.time() model.train() baselines = {key: 0. for key in train_keys} # baseline rewards for videos reward_writers = {key: [] for key in train_keys } # record reward changes for each video for epoch in range(start_epoch, args.max_epoch): idxs = np.arange(len(train_keys)) np.random.shuffle(idxs) # shuffle indices for idx in idxs: key_parts = train_keys[idx].split('/') name, key = key_parts seq = dataset[name][key]['features'][ ...] # sequence of features, (seq_len, dim) seq = torch.from_numpy(seq).unsqueeze( 0) # input shape (1, seq_len, dim) if use_gpu: seq = seq.cuda() probs, out_feats, att_score = model( seq) # output shape (1, seq_len, 1) cost = args.beta * ( probs.mean() - 0.5)**2 # minimize summary length penalty term m = Bernoulli(probs) epis_rewards = [] for _ in range(args.num_episode): actions = m.sample() log_probs = m.log_prob(actions) reward = compute_reward(seq, actions, use_gpu=use_gpu) expected_reward = log_probs.mean() * ( reward - baselines[train_keys[idx]]) cost -= expected_reward epis_rewards.append(reward.item()) recon_loss = reconstruction_loss(seq, out_feats) spar_loss = sparsity_loss(att_score) total_loss = cost + recon_loss + spar_loss optimizer.zero_grad() total_loss.backward() optimizer.step() torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0) baselines[train_keys[ idx]] = 0.9 * baselines[train_keys[idx]] + 0.1 * np.mean( epis_rewards ) # update baseline reward via moving average reward_writers[train_keys[idx]].append(np.mean(epis_rewards)) epoch_reward = np.mean( [reward_writers[key][epoch] for key in train_keys]) #print("epoch {}/{}\t reward {}\t loss {}".format(epoch+1, args.max_epoch, epoch_reward, total_loss)) else: print("==> Start training") start_time = time.time() model.train() baselines = {key: 0. for key in train_keys} # baseline rewards for videos reward_writers = {key: [] for key in train_keys } # record reward changes for each video for epoch in range(start_epoch, args.max_epoch): idxs = np.arange(len(train_keys)) np.random.shuffle(idxs) # shuffle indices for idx in idxs: key = train_keys[idx] seq = dataset[key]['features'][ ...] # sequence of features, (seq_len, dim) seq = torch.from_numpy(seq).unsqueeze( 0) # input shape (1, seq_len, dim) if use_gpu: seq = seq.cuda() probs, out_feats, att_score = model( seq) # output shape (1, seq_len, 1) cost = args.beta * ( probs.mean() - 0.5)**2 # minimize summary length penalty term m = Bernoulli(probs) epis_rewards = [] for _ in range(args.num_episode): actions = m.sample() log_probs = m.log_prob(actions) reward = compute_reward(seq, actions, use_gpu=use_gpu) expected_reward = log_probs.mean() * (reward - baselines[key]) cost -= expected_reward epis_rewards.append(reward.item()) recon_loss = reconstruction_loss(seq, out_feats) spar_loss = sparsity_loss(att_score) total_loss = cost + recon_loss + spar_loss #print(cost.item(), recon_loss.item(), spar_loss.item()) optimizer.zero_grad() total_loss.backward() optimizer.step() torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0) baselines[key] = 0.9 * baselines[key] + 0.1 * np.mean( epis_rewards) # update baseline reward via moving average reward_writers[key].append(np.mean(epis_rewards)) epoch_reward = np.mean( [reward_writers[key][epoch] for key in train_keys]) #print("epoch {}/{}\t reward {}\t loss {}".format(epoch+1, args.max_epoch, epoch_reward, total_loss)) write_json(reward_writers, osp.join(args.save_dir, 'rewards.json')) evaluate(model, dataset, userscoreset, test_keys, use_gpu) elapsed = round(time.time() - start_time) elapsed = str(datetime.timedelta(seconds=elapsed)) print("Finished. Total elapsed time (h:m:s): {}".format(elapsed)) model_state_dict = model.module.state_dict( ) if use_gpu else model.state_dict() model_save_path = osp.join( args.save_dir, args.metric + '_model_epoch_' + str(args.max_epoch) + '_split_id_' + str(args.split_id) + '-' + str(args.rnn_cell) + '.pth.tar') save_checkpoint(model_state_dict, model_save_path) print("Model saved to {}".format(model_save_path))