def probabilty_s_given_y(theta, s, y, l, k, ratio_agreement=0.95, model=1): if model == 1: eq = torch.eq(k.view(-1, 1).long(), y.long()).double().t() r = ratio_agreement * eq.squeeze() + (1 - ratio_agreement) * ( 1 - eq.squeeze()) eq = torch.stack([eq, 1 - eq]).squeeze().t() params = (theta * eq).sum(1) probability = 1 for i in range(k.shape[0]): m = Beta(r[i] * params[i] / (r[i] + 1), params[i] / (r[i] + 1)) probability *= torch.exp(m.log_prob( s[:, i].double(), )) * l[:, i].double() + (1 - l[:, i]).double() elif model == 2: eq = torch.eq(k.view(-1, 1).long(), y.long()).double().t() eq = torch.stack([eq, 1 - eq]).squeeze().t() params = (theta * eq).sum(1) probability = 1 for i in range(k.shape[0]): m = HalfNormal(params[i]) probability *= ( (1 - torch.exp(m.log_prob(s[:, i].double()))) * eq[i, 0] + (torch.exp(m.log_prob(s[:, i].double()))) * (1 - eq[i, 0])) * l[:, i].double() + (1 - l[:, i]).double() return probability
def act(self, state_tensor ): # state is a batch of tensors rather than a joint state # value, mu, cov = self.value_action_predictor(state_tensor) # dist = MultivariateNormal(mu, cov) # actions = dist.sample() # action_log_probs = dist.log_prob(actions) # action_to_take = [ActionXY(action[0], action[1]) for action in actions.cpu().numpy()] value, alpha_beta_1, alpha_beta_2 = self.value_action_predictor( state_tensor) vx_dist = Beta(alpha_beta_1[:, 0], alpha_beta_1[:, 1]) vy_dist = Beta(alpha_beta_2[:, 0], alpha_beta_2[:, 1]) actions = torch.cat( [vx_dist.sample().unsqueeze(1), vy_dist.sample().unsqueeze(1)], dim=1) action_log_probs = vx_dist.log_prob( actions[:, 0]).unsqueeze(1) + vy_dist.log_prob( actions[:, 1]).unsqueeze(1) action_to_take = [ ActionXY(action[0] * 2 - 1, action[1] * 2 - 1) for action in actions.cpu().numpy() ] return value, actions, action_log_probs, action_to_take
def optimize_epoch(self, num_epochs): if self.optimizer is None: raise ValueError('Learning rate is not set!') if self.data_loader is None: # convert action into indices self.data_loader = DataLoader(self.memory, self.batch_size, shuffle=True) average_value_loss = 0 average_policy_loss = 0 for epoch in range(num_epochs): value_loss = 0 policy_loss = 0 logging.debug('{}-th epoch starts'.format(epoch)) for data in self.data_loader: inputs, values, _, actions = data self.optimizer.zero_grad() # # outputs_val, outputs_mu, outputs_cov = self.model(inputs) # action_log_probs = MultivariateNormal(outputs_mu, outputs_cov).log_prob(actions) outputs_val, alpha_beta_1, alpha_beta_2 = self.model(inputs) vx_dist = Beta(alpha_beta_1[:, 0], alpha_beta_1[:, 1]) vy_dist = Beta(alpha_beta_2[:, 0], alpha_beta_2[:, 1]) p = torch.Tensor([1 + 1e-6]).to(self.device) q = torch.Tensor([1e-8]).to(self.device) action_log_probs = (vx_dist.log_prob(actions[:, 0] / p + q)).unsqueeze(1) +\ (vy_dist.log_prob(actions[:, 1] / p + q)).unsqueeze(1) values = values.to(self.device) dist_entropy = vx_dist.entropy().mean() + vy_dist.entropy( ).mean() loss1 = self.criterion_val(outputs_val, values) loss2 = -action_log_probs.mean() loss = loss1 + loss2 - dist_entropy * self.entropy_coef # loss = loss1 + loss2 loss.backward() self.optimizer.step() value_loss += loss1.data.item() policy_loss += loss2.data.item() logging.debug('{}-th epoch ends'.format(epoch)) average_value_loss = value_loss / len(self.memory) average_policy_loss = policy_loss / len(self.memory) self.writer.add_scalar('IL/average_value_loss', average_value_loss, epoch) self.writer.add_scalar('IL/average_policy_loss', average_policy_loss, epoch) logging.info('Average value, policy loss in epoch %d: %.2E, %.2E', epoch, average_value_loss, average_policy_loss) return average_value_loss
def forward(self, x, a=None): alpha = self.alpha(x) beta = self.beta(x) policy = Beta(alpha, beta) # print(alpha,beta) # print(alpha.squeeze(),beta.squeeze()) pi = policy.sample() pi = pi.squeeze() # print(pi) logp_pi = policy.log_prob(pi).sum(dim=1) if a is not None: logp = policy.log_prob(a).sum(dim=1) else: logp = None return pi, logp, logp_pi
def update(self, a_tnsr, b_tnsr, action_tensor, reward_tensor): self.optimizer.zero_grad() m = Beta(a_tnsr, b_tnsr) log_probs = m.log_prob(action_tensor) log_probs = -1* torch.matmul(reward_tensor, log_probs) loss = log_probs.mean() # print(loss) loss.backward() self.optimizer.step() self.scheduler.step()
def calc_unnormalized_beta_cdf(self, b, alpha, beta, npts=100): bt = Beta(alpha.float(), beta.float()) x = torch.linspace(0 + self.epsilon, b - self.epsilon, int(npts * b.cpu().numpy()), device=self.device).float() pdf = bt.log_prob(x).exp() dx = torch.tensor([1. / (npts * self.num_classes)], device=self.device).float() P = pdf.sum(dim=1) * dx return P
def forward(self, x, a=None): # according to the paper, add 1 to both alpha and beta # TODO : check if this is the right way to add bias to alpha and beta. b = self.maxlikely(x) # print(b) # gamma - exploration factor alpha = 1 + self.gamma * b beta = 1 + self.gamma * (1 - b) policy = Beta(alpha, beta) # if a is None: # print('[%f,%f]'%(alpha.data,beta.data)) pi = policy.sample() logp_pi = policy.log_prob(pi).sum(dim=1) if a is not None: logp = policy.log_prob(a).sum(dim=1) else: logp = None return pi, logp, logp_pi
def log_probs(self, batch_states, batch_actions): # Get action means from policy act = self.forward(batch_states) # Calculate probabilities c1 = F.sigmoid(act[:, :, self.act_dim]) * 5 c2 = F.sigmoid(act[:, :, self.act_dim:]) * 5 beta_dist = Beta(c1, c2) log_probs = beta_dist.log_prob(batch_actions) return log_probs.sum(1, keepdim=True)
class MixtureCDFFlow(nn.Module): def __init__(self, base_dist='uniform', mixture_dist='gaussian', n_components=4): super().__init__() self.composition = False if base_dist == 'uniform': self.base_dist = Uniform(ptu.tensor(0.0), ptu.tensor(1.0)) elif base_dist == 'beta': self.base_dist = Beta(ptu.tensor(5.0), ptu.tensor(5)) else: raise NotImplementedError self.loc = nn.Parameter(torch.randn(n_components), requires_grad=True) self.log_scale = nn.Parameter(torch.zeros(n_components), requires_grad=True) self.weight_logits = nn.Parameter(torch.zeros(n_components), requires_grad=True) if mixture_dist == 'gaussian': self.mixture_dist = Normal # (self.loc, self.log_scale.exp()) elif mixture_dist == 'logistic': raise NotImplementedError self.n_components = n_components def flow(self, x): # z = cdf of x weights = F.softmax(self.weight_logits, dim=0).unsqueeze(0).repeat(x.shape[0], 1) z = (self.mixture_dist(self.loc, self.log_scale.exp()).cdf( x.unsqueeze(1).repeat(1, self.n_components)) * weights).sum(dim=1) # log_det = log dz/dx = log pdf(x) log_det = (self.mixture_dist(self.loc, self.log_scale.exp()).log_prob( x.unsqueeze(1).repeat(1, self.n_components)).exp() * weights).sum(dim=1).log() return z, log_det def log_prob(self, x): z, log_det = self.flow(x) return self.base_dist.log_prob(z) + log_det # Compute loss as negative log-likelihood def nll(self, x): return -self.log_prob(x).mean() def get_density(self): x = np.linspace(-3, 3, 1000) with torch.no_grad(): y = self.log_prob(torch.tensor(x)).exp().numpy() return x, y
def step(self, input, target, teams): """Do one training step and return the loss.""" self.train() self.zero_grad() event_scores, time_scores = self.forward(input, teams) event_proba = F.softmax(event_scores, 2) time_proba = F.softmax(time_scores, 2) # Only get events during the games events_during_game, target_events_during_game, time_during_game, target_time_during_game, end_game_indices = get_during_game_tensors( event_scores, time_scores, target, return_end_game_idx=True) # Only get goals during the games goals_home_tensor, goals_home_target_tensor, goals_away_tensor, goals_away_target_tensor = get_during_game_goals( event_proba, target) goals_tensor = torch.stack([goals_home_tensor, goals_away_tensor], 1) goals_target_tensor = torch.stack( [goals_home_target_tensor, goals_away_target_tensor], 1) accuracy = torch.tensor(0) loss_result_game = torch.tensor(0) # Events and time loss functions loss_events_during_game = self.loss_function_events( events_during_game, target_events_during_game) loss_time_during_game = self.loss_function_time( time_during_game, target_time_during_game) # Compute loss for forcing not having too much events at the same minute time_proba_during_game = F.softmax(time_during_game, 1) beta_distr = Beta(ALPHA_FOR_BETA_DISTR, BETA_FOR_BETA_DISTR) log_prob = beta_distr.log_prob( time_proba_during_game[:, SAME_TIME_THAN_PREV]) same_minute_event_loss = -torch.mean(log_prob) #same_minute_event_loss = Variable(torch.tensor(0)) total_loss = (loss_events_during_game + loss_time_during_game + BETA_WEIGHT * same_minute_event_loss) / (2 + BETA_WEIGHT) total_loss.backward() self.optimizer.step() return event_proba, time_proba, total_loss.data.item( ), loss_events_during_game.data.item( ), loss_time_during_game.data.item(), same_minute_event_loss.item( ), loss_result_game.data.item(), accuracy.item()
def predict_proba_and_get_loss(self, input, target, teams): event_scores, time_scores = self.forward(input, teams) # Get probabilities event_proba = F.softmax(event_scores, 2) time_proba = F.softmax(time_scores, 2) # Separate events from time target_events = target[:, :, 0] target_time = target[:, :, 1] # Only get events during the games events_during_game, target_events_during_game, time_during_game, target_time_during_game = get_during_game_tensors( event_scores, time_scores, target) # Only get goals during the games goals_home_tensor, goals_home_target_tensor, goals_away_tensor, goals_away_target_tensor = get_during_game_goals( event_proba, target) goals_tensor = torch.stack([goals_home_tensor, goals_away_tensor], 1) goals_target_tensor = torch.stack( [goals_home_target_tensor, goals_away_target_tensor], 1) games_proba = get_games_proba_from_goals_proba(goals_tensor) games_results = get_games_results_from_goals(goals_target_tensor) # Cross entropy loss for result, but don't use it in backwards loss_result_game = self.loss_function_result(games_proba, games_results) # Compute loss for forcing not having too much events at the same minute time_proba_during_game = F.softmax(time_during_game, 1) beta_distr = Beta(ALPHA_FOR_BETA_DISTR, BETA_FOR_BETA_DISTR) log_prob = beta_distr.log_prob( time_proba_during_game[:, SAME_TIME_THAN_PREV]) same_minute_event_loss = -torch.mean(log_prob) # Events and time loss functions loss_time_during_game = self.loss_function_time( time_during_game, target_time_during_game) loss_events_during_game = self.loss_function_events( events_during_game, target_events_during_game) total_loss = (loss_events_during_game + loss_time_during_game + BETA_WEIGHT * same_minute_event_loss) / (2 + BETA_WEIGHT) return event_proba, time_proba, total_loss.data.item( ), loss_events_during_game.data.item( ), loss_time_during_game.data.item(), same_minute_event_loss.data.item( ), loss_result_game.data.item()
def get_log_qzpi(zmu, zstd, zsamp, pi_alpha, pi_beta, pi_samp): qz_R_obj = LogNormal(zmu[:,0],zstd[:,0]) qz_C_obj = LogNormal(zmu[:,1],zstd[:,1]) qz_Ts_obj = LogNormal(zmu[:,2],zstd[:,2]) qz_Td_obj = LogNormal(zmu[:,3],zstd[:,3]) qz_CO_obj = LogNormal(zmu[:,4],zstd[:,4]) qz_pi_obj = Beta(pi_alpha, pi_beta) return torch.sum(qz_R_obj.log_prob(zsamp[:,0])) + \ torch.sum(qz_C_obj.log_prob(zsamp[:,1])) + \ torch.sum(qz_Ts_obj.log_prob(zsamp[:,2])) + \ torch.sum(qz_Td_obj.log_prob(zsamp[:,3])) + \ torch.sum(qz_CO_obj.log_prob(zsamp[:,4])) + \ torch.sum(qz_pi_obj.log_prob(torch.clamp(pi_samp, 0.1, 0.9)))
def evaluate_actions(pi, actions, dist_type, env_type): if env_type == 'atari': cate_dist = Categorical(pi) log_prob = cate_dist.log_prob(actions).unsqueeze(-1) entropy = cate_dist.entropy().mean() else: if dist_type == 'gauss': mean, std = pi normal_dist = Normal(mean, std) log_prob = normal_dist.log_prob(actions).sum(dim=1, keepdim=True) entropy = normal_dist.entropy().mean() elif dist_type == 'beta': alpha, beta = pi beta_dist = Beta(alpha, beta) log_prob = beta_dist.log_prob(actions).sum(dim=1, keepdim=True) entropy = beta_dist.entropy().mean() return log_prob, entropy
def train_on_batch(self, batch): """perform optimization step. Args: batch (tuple): tuple of batches of environment observations, calling programs, lstm's hidden and cell states Returns: policy loss, value loss, total loss combining policy and value losses """ e_t = torch.FloatTensor(np.stack(batch[0])) i_t = batch[1] lstm_states = batch[2] h_t, c_t = zip(*lstm_states) h_t, c_t = torch.squeeze(torch.stack(list(h_t))), torch.squeeze( torch.stack(list(c_t))) policy_labels = torch.squeeze(torch.stack(batch[3])) value_labels = torch.stack(batch[4]).view(-1, 1) self.optimizer.zero_grad() policy_predictions, value_predictions, _, _ = self.predict_on_batch( e_t, i_t, h_t, c_t) # policy_loss = -torch.mean(policy_labels * torch.log(policy_predictions), dim=-1).mean() beta = Beta(policy_predictions[0], policy_predictions[1]) policy_action = beta.sample() prob_action = beta.log_prob(policy_action) log_mcts = self.temperature * torch.log(policy_labels) with torch.no_grad(): modified_kl = prob_action - log_mcts policy_loss = -modified_kl * (torch.log(modified_kl) + prob_action) entropy_loss = self.entropy_lambda * beta.entropy() policy_network_loss = policy_loss + entropy_loss value_network_loss = torch.pow(value_predictions - value_labels, 2).mean() total_loss = (policy_network_loss + value_network_loss) / 2 total_loss.backward() self.optimizer.step() return policy_network_loss, value_network_loss, total_loss
class MLLGP(): def __init__(self, model_gp, likelihood_gp, hyperpriors: dict) -> None: self.model_gp = model_gp self.likelihood_gp = likelihood_gp self.hyperpriors = hyperpriors a_beta = self.hyperpriors["lengthscales"].kwds["a"] b_beta = self.hyperpriors["lengthscales"].kwds["b"] self.Beta_tmp = Beta(concentration1=a_beta, concentration0=b_beta) a_gg = self.hyperpriors["outputscale"].kwds["a"] b_gg = self.hyperpriors["outputscale"].kwds["scale"] self.Gamma_tmp = Gamma(concentration=a_gg, rate=1. / b_gg) def log_marginal(self, lengthscales, outputscale) -> float: """ """ # print("lengthscales.shape:",lengthscales.shape) # print("outputscale.shape:",outputscale.shape) if lengthscales.dim() == 3 or outputscale.dim() == 3: Nels = lengthscales.shape[0] loss_vec = torch.zeros(Nels) for k in range(Nels): loss_vec[k] = self.log_marginal(lengthscales[k, 0, :], outputscale[k, 0, :]) return loss_vec assert lengthscales.dim() <= 1 and outputscale.dim() <= 1 assert not torch.any(torch.isnan(lengthscales)) and not torch.any( torch.isinf(lengthscales)), "lengthscales is inf or NaN" assert not torch.isnan(outputscale) and not torch.isinf( outputscale), "outputscale is inf or NaN" # Update hyperparameters: self.model_gp.covar_module.outputscale = outputscale self.model_gp.covar_module.base_kernel.lengthscale = lengthscales # self.model_gp.display_hyperparameters() # Get the log prob of the marginal distribution: function_dist = self.model_gp(self.model_gp.train_inputs[0]) output = self.likelihood_gp(function_dist) loss_val = output.log_prob(self.model_gp.train_targets).view(1) # if self.debug == True: # pdb.set_trace() loss_lengthscales_hyperprior = torch.sum( self.Beta_tmp.log_prob(lengthscales)).view(1) loss_outputscale_hyperprior = self.Gamma_tmp.log_prob(outputscale) # loss_lengthscales_hyperprior = sum(self.hyperpriors["lengthscales"].logpdf(lengthscales)) # loss_outputscale_hyperprior = self.hyperpriors["outputscale"].logpdf(outputscale).item() loss_val += loss_lengthscales_hyperprior + loss_outputscale_hyperprior try: assert not torch.any(torch.isnan(loss_val)) and not torch.any( torch.isinf(loss_val)), "loss_val is Inf or NaN" except: # debug TODO DEBUG logger.info("loss_val: {0:s}".format(str(loss_val))) logger.info("loss_lengthscales_hyperprior: {0:s}".format( str(loss_lengthscales_hyperprior))) logger.info("loss_outputscale_hyperprior: {0:s}".format( str(loss_outputscale_hyperprior))) return loss_val def __call__(self, pars_in): # Slice only last dimension: https://pytorch.org/docs/stable/tensors.html#torch.Tensor.narrow lengthscales = pars_in.narrow( dim=-1, start=self.model_gp.idx_hyperpars["lengthscales"][0], length=len(self.model_gp.idx_hyperpars["lengthscales"])) outputscale = pars_in.narrow( dim=-1, start=self.model_gp.idx_hyperpars["outputscale"][0], length=len(self.model_gp.idx_hyperpars["outputscale"])) return -self.log_marginal( lengthscales, outputscale) # Use minus (-) when minizing the marginal likelihood
def get_log_prob(self, state, action): bsize = state.size(0) alpha, beta = self.forward(state) dist = Beta(concentration1=alpha, concentration0=beta) log_prob = dist.log_prob(action).view(bsize, 1) # (bsize, 1) return log_prob
def get_log_ppi(pi): conc1 = torch.tensor([1.0]).to(device) conc2 = torch.tensor([1.0]).to(device) m = Beta(conc1, conc2) return torch.sum(m.log_prob(torch.clamp(pi, 0.05, 0.95)))
def optimize_batch(self, num_batches, episode=None): if self.optimizer is None: raise ValueError('Learning rate is not set!') if self.data_loader is None: self.data_loader = DataLoader(self.memory, self.batch_size, shuffle=True) value_losses = 0 policy_losses = 0 entropy = 0 l2_losses = 0 batch_count = 0 for data in self.data_loader: inputs, values, rewards, actions, returns, old_action_log_probs, adv_targ = data self.optimizer.zero_grad() # outputs_vals, outputs_mu, outputs_cov = self.model(inputs) # dist = MultivariateNormal(outputs_mu, outputs_cov) # action_log_probs = dist.log_prob(actions) outputs_vals, alpha_beta_1, alpha_beta_2 = self.model(inputs) vx_dist = Beta(alpha_beta_1[:, 0], alpha_beta_1[:, 1]) vy_dist = Beta(alpha_beta_2[:, 0], alpha_beta_2[:, 1]) action_log_probs = vx_dist.log_prob( actions[:, 0]).unsqueeze(1) + vy_dist.log_prob( actions[:, 1]).unsqueeze(1) # TODO: check why entropy is negative dist_entropy = vx_dist.entropy().mean() + vy_dist.entropy().mean() ratio = torch.exp(action_log_probs - old_action_log_probs) assert ratio.shape[1] == 1 surr1 = ratio * adv_targ surr2 = torch.clamp(ratio, 1.0 - self.clip_param, 1.0 + self.clip_param) * adv_targ loss1 = -torch.min(surr1, surr2).mean() loss2 = self.criterion_val(outputs_vals, values) * 0.5 * self.value_loss_coef loss3 = -dist_entropy * self.entropy_coef # speed_square_diff = torch.sum(torch.pow(outputs_mu, 2), dim=1) - torch.Tensor([1]).to(self.device).double() # loss4 = torch.pow(torch.max(speed_square_diff, torch.Tensor([0]).to(self.device).double()), 2).mean() * 1 loss = loss1 + loss2 + loss3 loss.backward() self.optimizer.step() policy_losses += loss1.data.item() value_losses += loss2.data.item() entropy += float(dist_entropy.cpu()) # l2_losses += loss4.data.item() batch_count += 1 if batch_count > num_batches: break average_value_loss = value_losses / num_batches average_policy_loss = policy_losses / num_batches average_entropy = entropy / num_batches average_l2_loss = l2_losses / num_batches logging.info('Average value, policy loss : %.2E, %.2E', average_value_loss, average_policy_loss) self.writer.add_scalar('train/average_value_loss', average_value_loss, episode) self.writer.add_scalar('train/average_policy_loss', average_policy_loss, episode) self.writer.add_scalar('train/average_entropy', average_entropy, episode) # self.writer.add_scalar('train/average_l2_loss', average_l2_loss, episode) return average_value_loss
import torch as T from torch.distributions.beta import Beta x = T.tensor([2., 2.], requires_grad=True) m = Beta(x[0], x[1]) s = m.sample() p = m.log_prob(s) p.backward() print(f"Sample s: {s}, log_prob: {p}, grad_x: {x.grad}")
def sample_and_get_loss(self, target, teams, return_proba=False): total_event_loss = Variable(torch.zeros(1)) total_time_loss = Variable(torch.zeros(1)) total_result_loss = Variable(torch.zeros(1)) total_same_minute_event_loss = Variable(torch.zeros(1)) total_same_minute_proba_game = Variable(torch.zeros(1)) total_accuracy = 0 total_goals_home_loss = Variable(torch.zeros(1)) total_goals_away_loss = Variable(torch.zeros(1)) total_goals_diff_loss = Variable(torch.zeros(1)) sampled_events = [] sampled_times = [] target_events = [] target_times = [] all_proba = [] for batch_idx in range(target.size(0)): end_of_game_idx = get_end_of_game_idx(target[batch_idx, :, 0]) accuracies = [] results_losses = [] results = torch.FloatTensor([0, 0, 0]) # Sample multiple times for _ in range(NB_GAMES_TO_SAMPLE): current_input = Variable( torch.zeros(1, 1, NB_ALL_EVENTS + NB_ALL_TIMES)) current_input[0, 0, SOG_TOKEN] = 1 current_input[0, 0, NB_ALL_EVENTS + GAME_NOT_RUNNING_TIME] = 1 self.hidden = self.init_hidden([teams[batch_idx]]) teams_tensor = get_teams_caracteristics([teams[batch_idx]]) teams_input = teams_tensor.squeeze(0).unsqueeze(1) sampled_events_in_game = [] sampled_times_in_game = [] target_events_in_game = [] target_times_in_game = [] proba = [] game_event_proba = Variable( torch.zeros((end_of_game_idx, NB_ALL_EVENTS))) event_loss_game = Variable(torch.zeros(1)) time_loss_game = Variable(torch.zeros(1)) same_minute_event_loss_game = Variable(torch.zeros(1)) same_minute_proba_game = Variable(torch.zeros(1)) for event_idx in range(end_of_game_idx): input_with_prior = torch.cat([current_input, teams_input], 2) output, self.hidden = self.lstm(input_with_prior, self.hidden) event_scores = self.hidden2event(output) time_scores = self.hidden2time(output) event_loss = self.loss_function_events( event_scores.view(1, -1), target[batch_idx, event_idx, 0].view(1)) time_loss = self.loss_function_time( time_scores.view(1, -1), target[batch_idx, event_idx, 1].view(1)) event_loss_game += event_loss time_loss_game += time_loss event_proba = F.softmax(event_scores, 2) time_proba = F.softmax(time_scores, 2) # Increase total proba #same_minute_proba_game += time_proba[0, 0, SAME_TIME_THAN_PREV] alphas = 4.0 betas = 6.53242321 beta_distr = Beta(alphas, betas) log_prob = beta_distr.log_prob( time_proba[0, 0, SAME_TIME_THAN_PREV]) same_minute_event_loss_game += -log_prob game_event_proba[event_idx, :] = event_proba generated_event = int( torch.multinomial(event_proba[0, 0], 1)[0]) generated_time = int( torch.multinomial(time_proba[0, 0], 1)[0]) # Force different time if generating NO_EVENT if generated_event == NO_EVENT: generated_time = DIFF_TIME_THAN_PREV sampled_events_in_game.append(generated_event) sampled_times_in_game.append(generated_time) target_events_in_game.append(target[batch_idx, event_idx, 0].data.item()) target_times_in_game.append(target[batch_idx, event_idx, 1].data.item()) # Store probabilities of event to happen proba.append([]) for event_nb in range(NB_ALL_EVENTS): proba[-1].append(event_proba[0, 0, event_nb]) current_input = Variable( torch.zeros(1, 1, NB_ALL_EVENTS + NB_ALL_TIMES)) current_input[0, 0, generated_event] = 1 current_input[0, 0, NB_ALL_EVENTS + generated_time] = 1 goals_home_tensor, goals_home_target_tensor, goals_away_tensor, goals_away_target_tensor = get_during_game_goals( game_event_proba.unsqueeze(0), target[batch_idx, :].unsqueeze(0)) goals_tensor = torch.stack( [goals_home_tensor, goals_away_tensor], 1) goals_target_tensor = torch.stack( [goals_home_target_tensor, goals_away_target_tensor], 1) predicted_results = get_games_results_from_goals(goals_tensor) games_results = get_games_results_from_goals( goals_target_tensor) # Count sampled goals for both teams goal_home = sampled_events_in_game.count(GOAL_HOME) goal_away = sampled_events_in_game.count(GOAL_AWAY) if goal_home > goal_away: sampled_res = 0 elif goal_home < goal_away: sampled_res = 1 else: sampled_res = 2 results[sampled_res] += 1 #loss_result_game = self.loss_function_result(games_proba, games_results) #accuracy = games_proba[0][games_results.item()] #results_losses.append(loss_result_game.item()) #accuracies.append(accuracy.item()) total_event_loss += event_loss_game.item() / end_of_game_idx total_time_loss += time_loss_game.item() / end_of_game_idx #total_same_minute_event_loss += same_minute_event_loss_game / end_of_game_idx #total_result_loss += np.mean(results_losses) #total_accuracy += np.mean(accuracies) results /= NB_GAMES_TO_SAMPLE total_accuracy += results[games_results.item()] total_result_loss += self.loss_function_result( results.unsqueeze(0), games_results) same_minute_proba_game /= end_of_game_idx # Compute same minute event loss ''' beta_distr = Beta(ALPHA_FOR_BETA_DISTR, BETA_FOR_BETA_DISTR) log_prob = beta_distr.log_prob(same_minute_proba_game) same_minute_loss_game = -log_prob total_same_minute_event_loss += same_minute_loss_game ''' total_same_minute_event_loss += same_minute_event_loss_game / end_of_game_idx #total_same_minute_proba_game += same_minute_proba_game ''' total_goals_home_loss += loss_goals_home total_goals_away_loss += loss_goals_away total_goals_diff_loss += loss_goals_diff ''' sampled_events.append(sampled_events_in_game) sampled_times.append(sampled_times_in_game) target_events.append(target_events_in_game) target_times.append(target_times_in_game) all_proba.append(proba) ''' total_same_minute_proba_game /= target.size(0) beta_distr = Beta(ALPHA_FOR_BETA_DISTR, BETA_FOR_BETA_DISTR) log_prob = beta_distr.log_prob(total_same_minute_proba_game) ''' total_result_loss /= target.size(0) total_event_loss /= target.size(0) total_time_loss /= target.size(0) total_same_minute_event_loss /= target.size( 0) #= Variable(torch.tensor(0)) #total_same_minute_event_loss = -log_prob total_accuracy /= target.size(0) total_goals_home_loss /= target.size(0) total_goals_away_loss /= target.size(0) total_goals_diff_loss /= target.size(0) loss = (total_event_loss + total_time_loss + BETA_WEIGHT * total_same_minute_event_loss) / (2 + BETA_WEIGHT) if return_proba: return sampled_events, sampled_times, target_events, target_times, all_proba, loss.data[ 0], total_event_loss.data[0], total_time_loss.data[ 0], total_same_minute_event_loss.item( ), total_result_loss.data[0], total_accuracy.item() else: return sampled_events, sampled_times, target_events, target_times, loss.data[ 0], total_event_loss.data[0], total_time_loss.data[ 0], total_same_minute_event_loss.item( ), total_result_loss.data[0], total_accuracy.item()