def probabilty_s_given_y(theta, s, y, l, k, ratio_agreement=0.95, model=1):
    if model == 1:
        eq = torch.eq(k.view(-1, 1).long(), y.long()).double().t()
        r = ratio_agreement * eq.squeeze() + (1 - ratio_agreement) * (
            1 - eq.squeeze())
        eq = torch.stack([eq, 1 - eq]).squeeze().t()
        params = (theta * eq).sum(1)
        probability = 1
        for i in range(k.shape[0]):
            m = Beta(r[i] * params[i] / (r[i] + 1), params[i] / (r[i] + 1))
            probability *= torch.exp(m.log_prob(
                s[:, i].double(), )) * l[:, i].double() + (1 -
                                                           l[:, i]).double()
    elif model == 2:
        eq = torch.eq(k.view(-1, 1).long(), y.long()).double().t()
        eq = torch.stack([eq, 1 - eq]).squeeze().t()
        params = (theta * eq).sum(1)
        probability = 1
        for i in range(k.shape[0]):
            m = HalfNormal(params[i])
            probability *= (
                (1 - torch.exp(m.log_prob(s[:, i].double()))) * eq[i, 0] +
                (torch.exp(m.log_prob(s[:, i].double()))) *
                (1 - eq[i, 0])) * l[:, i].double() + (1 - l[:, i]).double()
    return probability
Esempio n. 2
0
    def act(self, state_tensor
            ):  # state is a batch of tensors rather than a joint state
        # value, mu, cov = self.value_action_predictor(state_tensor)
        # dist = MultivariateNormal(mu, cov)
        # actions = dist.sample()
        # action_log_probs = dist.log_prob(actions)
        # action_to_take = [ActionXY(action[0], action[1]) for action in actions.cpu().numpy()]

        value, alpha_beta_1, alpha_beta_2 = self.value_action_predictor(
            state_tensor)
        vx_dist = Beta(alpha_beta_1[:, 0], alpha_beta_1[:, 1])
        vy_dist = Beta(alpha_beta_2[:, 0], alpha_beta_2[:, 1])
        actions = torch.cat(
            [vx_dist.sample().unsqueeze(1),
             vy_dist.sample().unsqueeze(1)],
            dim=1)
        action_log_probs = vx_dist.log_prob(
            actions[:, 0]).unsqueeze(1) + vy_dist.log_prob(
                actions[:, 1]).unsqueeze(1)
        action_to_take = [
            ActionXY(action[0] * 2 - 1, action[1] * 2 - 1)
            for action in actions.cpu().numpy()
        ]

        return value, actions, action_log_probs, action_to_take
Esempio n. 3
0
    def optimize_epoch(self, num_epochs):
        if self.optimizer is None:
            raise ValueError('Learning rate is not set!')
        if self.data_loader is None:
            # convert action into indices
            self.data_loader = DataLoader(self.memory,
                                          self.batch_size,
                                          shuffle=True)
        average_value_loss = 0
        average_policy_loss = 0
        for epoch in range(num_epochs):
            value_loss = 0
            policy_loss = 0
            logging.debug('{}-th epoch starts'.format(epoch))
            for data in self.data_loader:
                inputs, values, _, actions = data
                self.optimizer.zero_grad()
                # # outputs_val, outputs_mu, outputs_cov = self.model(inputs)
                # action_log_probs = MultivariateNormal(outputs_mu, outputs_cov).log_prob(actions)
                outputs_val, alpha_beta_1, alpha_beta_2 = self.model(inputs)
                vx_dist = Beta(alpha_beta_1[:, 0], alpha_beta_1[:, 1])
                vy_dist = Beta(alpha_beta_2[:, 0], alpha_beta_2[:, 1])
                p = torch.Tensor([1 + 1e-6]).to(self.device)
                q = torch.Tensor([1e-8]).to(self.device)
                action_log_probs = (vx_dist.log_prob(actions[:, 0] / p + q)).unsqueeze(1) +\
                                    (vy_dist.log_prob(actions[:, 1] / p + q)).unsqueeze(1)

                values = values.to(self.device)
                dist_entropy = vx_dist.entropy().mean() + vy_dist.entropy(
                ).mean()

                loss1 = self.criterion_val(outputs_val, values)
                loss2 = -action_log_probs.mean()
                loss = loss1 + loss2 - dist_entropy * self.entropy_coef
                # loss = loss1 + loss2
                loss.backward()
                self.optimizer.step()
                value_loss += loss1.data.item()
                policy_loss += loss2.data.item()
            logging.debug('{}-th epoch ends'.format(epoch))
            average_value_loss = value_loss / len(self.memory)
            average_policy_loss = policy_loss / len(self.memory)
            self.writer.add_scalar('IL/average_value_loss', average_value_loss,
                                   epoch)
            self.writer.add_scalar('IL/average_policy_loss',
                                   average_policy_loss, epoch)
            logging.info('Average value, policy loss in epoch %d: %.2E, %.2E',
                         epoch, average_value_loss, average_policy_loss)

        return average_value_loss
Esempio n. 4
0
    def forward(self, x, a=None):
        alpha = self.alpha(x)
        beta = self.beta(x)
        policy = Beta(alpha, beta)
        # print(alpha,beta)
        # print(alpha.squeeze(),beta.squeeze())
        pi = policy.sample()
        pi = pi.squeeze()

        # print(pi)

        logp_pi = policy.log_prob(pi).sum(dim=1)

        if a is not None:
            logp = policy.log_prob(a).sum(dim=1)
        else:
            logp = None

        return pi, logp, logp_pi
Esempio n. 5
0
 def update(self, a_tnsr, b_tnsr, action_tensor, reward_tensor):
     self.optimizer.zero_grad()
     m = Beta(a_tnsr, b_tnsr)   
     log_probs = m.log_prob(action_tensor)
     log_probs = -1* torch.matmul(reward_tensor, log_probs)   
     loss = log_probs.mean()   
     # print(loss)             
     loss.backward()
         
     self.optimizer.step()
     self.scheduler.step()       
Esempio n. 6
0
 def calc_unnormalized_beta_cdf(self, b, alpha, beta, npts=100):
     bt = Beta(alpha.float(), beta.float())
     x = torch.linspace(0 + self.epsilon,
                        b - self.epsilon,
                        int(npts * b.cpu().numpy()),
                        device=self.device).float()
     pdf = bt.log_prob(x).exp()
     dx = torch.tensor([1. / (npts * self.num_classes)],
                       device=self.device).float()
     P = pdf.sum(dim=1) * dx
     return P
Esempio n. 7
0
    def forward(self, x, a=None):
        # according to the paper, add 1 to both alpha and beta
        # TODO : check if this is the right way to add bias to alpha and beta.
        b = self.maxlikely(x)
        # print(b)
        # gamma - exploration factor
        alpha = 1 + self.gamma * b
        beta = 1 + self.gamma * (1 - b)
        policy = Beta(alpha, beta)
        # if a is None:
        #     print('[%f,%f]'%(alpha.data,beta.data))
        pi = policy.sample()

        logp_pi = policy.log_prob(pi).sum(dim=1)
        if a is not None:
            logp = policy.log_prob(a).sum(dim=1)
        else:
            logp = None

        return pi, logp, logp_pi
Esempio n. 8
0
    def log_probs(self, batch_states, batch_actions):
        # Get action means from policy
        act = self.forward(batch_states)

        # Calculate probabilities
        c1 = F.sigmoid(act[:, :, self.act_dim]) * 5
        c2 = F.sigmoid(act[:, :, self.act_dim:]) * 5

        beta_dist = Beta(c1, c2)
        log_probs = beta_dist.log_prob(batch_actions)
        return log_probs.sum(1, keepdim=True)
Esempio n. 9
0
class MixtureCDFFlow(nn.Module):
    def __init__(self,
                 base_dist='uniform',
                 mixture_dist='gaussian',
                 n_components=4):
        super().__init__()
        self.composition = False
        if base_dist == 'uniform':
            self.base_dist = Uniform(ptu.tensor(0.0), ptu.tensor(1.0))
        elif base_dist == 'beta':
            self.base_dist = Beta(ptu.tensor(5.0), ptu.tensor(5))
        else:
            raise NotImplementedError

        self.loc = nn.Parameter(torch.randn(n_components), requires_grad=True)
        self.log_scale = nn.Parameter(torch.zeros(n_components),
                                      requires_grad=True)
        self.weight_logits = nn.Parameter(torch.zeros(n_components),
                                          requires_grad=True)
        if mixture_dist == 'gaussian':
            self.mixture_dist = Normal  # (self.loc, self.log_scale.exp())
        elif mixture_dist == 'logistic':
            raise NotImplementedError
        self.n_components = n_components

    def flow(self, x):
        # z = cdf of x
        weights = F.softmax(self.weight_logits,
                            dim=0).unsqueeze(0).repeat(x.shape[0], 1)
        z = (self.mixture_dist(self.loc, self.log_scale.exp()).cdf(
            x.unsqueeze(1).repeat(1, self.n_components)) * weights).sum(dim=1)

        # log_det = log dz/dx = log pdf(x)
        log_det = (self.mixture_dist(self.loc, self.log_scale.exp()).log_prob(
            x.unsqueeze(1).repeat(1, self.n_components)).exp() *
                   weights).sum(dim=1).log()

        return z, log_det

    def log_prob(self, x):
        z, log_det = self.flow(x)
        return self.base_dist.log_prob(z) + log_det

    # Compute loss as negative log-likelihood
    def nll(self, x):
        return -self.log_prob(x).mean()

    def get_density(self):
        x = np.linspace(-3, 3, 1000)
        with torch.no_grad():
            y = self.log_prob(torch.tensor(x)).exp().numpy()
        return x, y
Esempio n. 10
0
    def step(self, input, target, teams):
        """Do one training step and return the loss."""

        self.train()
        self.zero_grad()
        event_scores, time_scores = self.forward(input, teams)

        event_proba = F.softmax(event_scores, 2)
        time_proba = F.softmax(time_scores, 2)

        # Only get events during the games
        events_during_game, target_events_during_game, time_during_game, target_time_during_game, end_game_indices = get_during_game_tensors(
            event_scores, time_scores, target, return_end_game_idx=True)

        # Only get goals during the games
        goals_home_tensor, goals_home_target_tensor, goals_away_tensor, goals_away_target_tensor = get_during_game_goals(
            event_proba, target)

        goals_tensor = torch.stack([goals_home_tensor, goals_away_tensor], 1)
        goals_target_tensor = torch.stack(
            [goals_home_target_tensor, goals_away_target_tensor], 1)

        accuracy = torch.tensor(0)
        loss_result_game = torch.tensor(0)

        # Events and time loss functions
        loss_events_during_game = self.loss_function_events(
            events_during_game, target_events_during_game)
        loss_time_during_game = self.loss_function_time(
            time_during_game, target_time_during_game)

        # Compute loss for forcing not having too much events at the same minute
        time_proba_during_game = F.softmax(time_during_game, 1)
        beta_distr = Beta(ALPHA_FOR_BETA_DISTR, BETA_FOR_BETA_DISTR)
        log_prob = beta_distr.log_prob(
            time_proba_during_game[:, SAME_TIME_THAN_PREV])
        same_minute_event_loss = -torch.mean(log_prob)

        #same_minute_event_loss = Variable(torch.tensor(0))

        total_loss = (loss_events_during_game + loss_time_during_game +
                      BETA_WEIGHT * same_minute_event_loss) / (2 + BETA_WEIGHT)

        total_loss.backward()

        self.optimizer.step()

        return event_proba, time_proba, total_loss.data.item(
        ), loss_events_during_game.data.item(
        ), loss_time_during_game.data.item(), same_minute_event_loss.item(
        ), loss_result_game.data.item(), accuracy.item()
Esempio n. 11
0
    def predict_proba_and_get_loss(self, input, target, teams):
        event_scores, time_scores = self.forward(input, teams)

        # Get probabilities
        event_proba = F.softmax(event_scores, 2)
        time_proba = F.softmax(time_scores, 2)

        # Separate events from time
        target_events = target[:, :, 0]
        target_time = target[:, :, 1]

        # Only get events during the games
        events_during_game, target_events_during_game, time_during_game, target_time_during_game = get_during_game_tensors(
            event_scores, time_scores, target)

        # Only get goals during the games
        goals_home_tensor, goals_home_target_tensor, goals_away_tensor, goals_away_target_tensor = get_during_game_goals(
            event_proba, target)

        goals_tensor = torch.stack([goals_home_tensor, goals_away_tensor], 1)
        goals_target_tensor = torch.stack(
            [goals_home_target_tensor, goals_away_target_tensor], 1)

        games_proba = get_games_proba_from_goals_proba(goals_tensor)
        games_results = get_games_results_from_goals(goals_target_tensor)

        # Cross entropy loss for result, but don't use it in backwards
        loss_result_game = self.loss_function_result(games_proba,
                                                     games_results)

        # Compute loss for forcing not having too much events at the same minute
        time_proba_during_game = F.softmax(time_during_game, 1)
        beta_distr = Beta(ALPHA_FOR_BETA_DISTR, BETA_FOR_BETA_DISTR)
        log_prob = beta_distr.log_prob(
            time_proba_during_game[:, SAME_TIME_THAN_PREV])
        same_minute_event_loss = -torch.mean(log_prob)

        # Events and time loss functions
        loss_time_during_game = self.loss_function_time(
            time_during_game, target_time_during_game)
        loss_events_during_game = self.loss_function_events(
            events_during_game, target_events_during_game)

        total_loss = (loss_events_during_game + loss_time_during_game +
                      BETA_WEIGHT * same_minute_event_loss) / (2 + BETA_WEIGHT)

        return event_proba, time_proba, total_loss.data.item(
        ), loss_events_during_game.data.item(
        ), loss_time_during_game.data.item(), same_minute_event_loss.data.item(
        ), loss_result_game.data.item()
Esempio n. 12
0
def get_log_qzpi(zmu, zstd, zsamp, pi_alpha, pi_beta, pi_samp):
    
    qz_R_obj = LogNormal(zmu[:,0],zstd[:,0])
    qz_C_obj = LogNormal(zmu[:,1],zstd[:,1])
    qz_Ts_obj = LogNormal(zmu[:,2],zstd[:,2])
    qz_Td_obj = LogNormal(zmu[:,3],zstd[:,3])
    qz_CO_obj = LogNormal(zmu[:,4],zstd[:,4])

    qz_pi_obj = Beta(pi_alpha, pi_beta) 
    
    return torch.sum(qz_R_obj.log_prob(zsamp[:,0])) + \
            torch.sum(qz_C_obj.log_prob(zsamp[:,1])) + \
            torch.sum(qz_Ts_obj.log_prob(zsamp[:,2])) + \
            torch.sum(qz_Td_obj.log_prob(zsamp[:,3])) + \
            torch.sum(qz_CO_obj.log_prob(zsamp[:,4])) + \
            torch.sum(qz_pi_obj.log_prob(torch.clamp(pi_samp, 0.1, 0.9)))
Esempio n. 13
0
def evaluate_actions(pi, actions, dist_type, env_type):
    if env_type == 'atari':
        cate_dist = Categorical(pi)
        log_prob = cate_dist.log_prob(actions).unsqueeze(-1)
        entropy = cate_dist.entropy().mean()
    else:
        if dist_type == 'gauss':
            mean, std = pi
            normal_dist = Normal(mean, std)
            log_prob = normal_dist.log_prob(actions).sum(dim=1, keepdim=True)
            entropy = normal_dist.entropy().mean()
        elif dist_type == 'beta':
            alpha, beta = pi
            beta_dist = Beta(alpha, beta)
            log_prob = beta_dist.log_prob(actions).sum(dim=1, keepdim=True)
            entropy = beta_dist.entropy().mean()
    return log_prob, entropy
Esempio n. 14
0
    def train_on_batch(self, batch):
        """perform optimization step.

        Args:
          batch (tuple): tuple of batches of environment observations, calling programs, lstm's hidden and cell states

        Returns:
          policy loss, value loss, total loss combining policy and value losses
        """
        e_t = torch.FloatTensor(np.stack(batch[0]))
        i_t = batch[1]
        lstm_states = batch[2]
        h_t, c_t = zip(*lstm_states)
        h_t, c_t = torch.squeeze(torch.stack(list(h_t))), torch.squeeze(
            torch.stack(list(c_t)))

        policy_labels = torch.squeeze(torch.stack(batch[3]))
        value_labels = torch.stack(batch[4]).view(-1, 1)

        self.optimizer.zero_grad()
        policy_predictions, value_predictions, _, _ = self.predict_on_batch(
            e_t, i_t, h_t, c_t)

        # policy_loss = -torch.mean(policy_labels * torch.log(policy_predictions), dim=-1).mean()

        beta = Beta(policy_predictions[0], policy_predictions[1])
        policy_action = beta.sample()
        prob_action = beta.log_prob(policy_action)

        log_mcts = self.temperature * torch.log(policy_labels)
        with torch.no_grad():
            modified_kl = prob_action - log_mcts

        policy_loss = -modified_kl * (torch.log(modified_kl) + prob_action)
        entropy_loss = self.entropy_lambda * beta.entropy()

        policy_network_loss = policy_loss + entropy_loss
        value_network_loss = torch.pow(value_predictions - value_labels,
                                       2).mean()

        total_loss = (policy_network_loss + value_network_loss) / 2
        total_loss.backward()
        self.optimizer.step()

        return policy_network_loss, value_network_loss, total_loss
Esempio n. 15
0
class MLLGP():
    def __init__(self, model_gp, likelihood_gp, hyperpriors: dict) -> None:
        self.model_gp = model_gp
        self.likelihood_gp = likelihood_gp
        self.hyperpriors = hyperpriors

        a_beta = self.hyperpriors["lengthscales"].kwds["a"]
        b_beta = self.hyperpriors["lengthscales"].kwds["b"]

        self.Beta_tmp = Beta(concentration1=a_beta, concentration0=b_beta)

        a_gg = self.hyperpriors["outputscale"].kwds["a"]
        b_gg = self.hyperpriors["outputscale"].kwds["scale"]

        self.Gamma_tmp = Gamma(concentration=a_gg, rate=1. / b_gg)

    def log_marginal(self, lengthscales, outputscale) -> float:
        """
        """

        # print("lengthscales.shape:",lengthscales.shape)
        # print("outputscale.shape:",outputscale.shape)
        if lengthscales.dim() == 3 or outputscale.dim() == 3:
            Nels = lengthscales.shape[0]
            loss_vec = torch.zeros(Nels)
            for k in range(Nels):
                loss_vec[k] = self.log_marginal(lengthscales[k, 0, :],
                                                outputscale[k, 0, :])
            return loss_vec

        assert lengthscales.dim() <= 1 and outputscale.dim() <= 1

        assert not torch.any(torch.isnan(lengthscales)) and not torch.any(
            torch.isinf(lengthscales)), "lengthscales is inf or NaN"
        assert not torch.isnan(outputscale) and not torch.isinf(
            outputscale), "outputscale is inf or NaN"

        # Update hyperparameters:
        self.model_gp.covar_module.outputscale = outputscale
        self.model_gp.covar_module.base_kernel.lengthscale = lengthscales

        # self.model_gp.display_hyperparameters()

        # Get the log prob of the marginal distribution:
        function_dist = self.model_gp(self.model_gp.train_inputs[0])
        output = self.likelihood_gp(function_dist)
        loss_val = output.log_prob(self.model_gp.train_targets).view(1)

        # if self.debug == True:
        #     pdb.set_trace()

        loss_lengthscales_hyperprior = torch.sum(
            self.Beta_tmp.log_prob(lengthscales)).view(1)
        loss_outputscale_hyperprior = self.Gamma_tmp.log_prob(outputscale)

        # loss_lengthscales_hyperprior = sum(self.hyperpriors["lengthscales"].logpdf(lengthscales))
        # loss_outputscale_hyperprior = self.hyperpriors["outputscale"].logpdf(outputscale).item()

        loss_val += loss_lengthscales_hyperprior + loss_outputscale_hyperprior

        try:
            assert not torch.any(torch.isnan(loss_val)) and not torch.any(
                torch.isinf(loss_val)), "loss_val is Inf or NaN"
        except:  # debug TODO DEBUG
            logger.info("loss_val: {0:s}".format(str(loss_val)))
            logger.info("loss_lengthscales_hyperprior: {0:s}".format(
                str(loss_lengthscales_hyperprior)))
            logger.info("loss_outputscale_hyperprior: {0:s}".format(
                str(loss_outputscale_hyperprior)))

        return loss_val

    def __call__(self, pars_in):
        # Slice only last dimension: https://pytorch.org/docs/stable/tensors.html#torch.Tensor.narrow
        lengthscales = pars_in.narrow(
            dim=-1,
            start=self.model_gp.idx_hyperpars["lengthscales"][0],
            length=len(self.model_gp.idx_hyperpars["lengthscales"]))

        outputscale = pars_in.narrow(
            dim=-1,
            start=self.model_gp.idx_hyperpars["outputscale"][0],
            length=len(self.model_gp.idx_hyperpars["outputscale"]))

        return -self.log_marginal(
            lengthscales,
            outputscale)  # Use minus (-) when minizing the marginal likelihood
Esempio n. 16
0
 def get_log_prob(self, state, action):
     bsize = state.size(0)
     alpha, beta = self.forward(state)
     dist = Beta(concentration1=alpha, concentration0=beta)
     log_prob = dist.log_prob(action).view(bsize, 1)  # (bsize, 1)
     return log_prob
Esempio n. 17
0
def get_log_ppi(pi):
    conc1 = torch.tensor([1.0]).to(device)
    conc2 = torch.tensor([1.0]).to(device)
    m = Beta(conc1, conc2)
    return torch.sum(m.log_prob(torch.clamp(pi, 0.05, 0.95)))
Esempio n. 18
0
    def optimize_batch(self, num_batches, episode=None):
        if self.optimizer is None:
            raise ValueError('Learning rate is not set!')
        if self.data_loader is None:
            self.data_loader = DataLoader(self.memory,
                                          self.batch_size,
                                          shuffle=True)
        value_losses = 0
        policy_losses = 0
        entropy = 0
        l2_losses = 0
        batch_count = 0
        for data in self.data_loader:
            inputs, values, rewards, actions, returns, old_action_log_probs, adv_targ = data
            self.optimizer.zero_grad()
            # outputs_vals, outputs_mu, outputs_cov = self.model(inputs)
            # dist = MultivariateNormal(outputs_mu, outputs_cov)
            # action_log_probs = dist.log_prob(actions)
            outputs_vals, alpha_beta_1, alpha_beta_2 = self.model(inputs)
            vx_dist = Beta(alpha_beta_1[:, 0], alpha_beta_1[:, 1])
            vy_dist = Beta(alpha_beta_2[:, 0], alpha_beta_2[:, 1])
            action_log_probs = vx_dist.log_prob(
                actions[:, 0]).unsqueeze(1) + vy_dist.log_prob(
                    actions[:, 1]).unsqueeze(1)

            # TODO: check why entropy is negative
            dist_entropy = vx_dist.entropy().mean() + vy_dist.entropy().mean()

            ratio = torch.exp(action_log_probs - old_action_log_probs)
            assert ratio.shape[1] == 1
            surr1 = ratio * adv_targ
            surr2 = torch.clamp(ratio, 1.0 - self.clip_param,
                                1.0 + self.clip_param) * adv_targ
            loss1 = -torch.min(surr1, surr2).mean()
            loss2 = self.criterion_val(outputs_vals,
                                       values) * 0.5 * self.value_loss_coef
            loss3 = -dist_entropy * self.entropy_coef

            # speed_square_diff = torch.sum(torch.pow(outputs_mu, 2), dim=1) - torch.Tensor([1]).to(self.device).double()
            # loss4 = torch.pow(torch.max(speed_square_diff, torch.Tensor([0]).to(self.device).double()), 2).mean() * 1

            loss = loss1 + loss2 + loss3
            loss.backward()
            self.optimizer.step()

            policy_losses += loss1.data.item()
            value_losses += loss2.data.item()
            entropy += float(dist_entropy.cpu())
            # l2_losses += loss4.data.item()
            batch_count += 1
            if batch_count > num_batches:
                break

        average_value_loss = value_losses / num_batches
        average_policy_loss = policy_losses / num_batches
        average_entropy = entropy / num_batches
        average_l2_loss = l2_losses / num_batches
        logging.info('Average value, policy loss : %.2E, %.2E',
                     average_value_loss, average_policy_loss)
        self.writer.add_scalar('train/average_value_loss', average_value_loss,
                               episode)
        self.writer.add_scalar('train/average_policy_loss',
                               average_policy_loss, episode)
        self.writer.add_scalar('train/average_entropy', average_entropy,
                               episode)
        # self.writer.add_scalar('train/average_l2_loss', average_l2_loss, episode)

        return average_value_loss
Esempio n. 19
0
import torch as T
from torch.distributions.beta import Beta

x = T.tensor([2., 2.], requires_grad=True)
m = Beta(x[0], x[1])
s = m.sample()
p = m.log_prob(s)
p.backward()

print(f"Sample s: {s}, log_prob: {p}, grad_x: {x.grad}")
Esempio n. 20
0
    def sample_and_get_loss(self, target, teams, return_proba=False):
        total_event_loss = Variable(torch.zeros(1))
        total_time_loss = Variable(torch.zeros(1))
        total_result_loss = Variable(torch.zeros(1))
        total_same_minute_event_loss = Variable(torch.zeros(1))
        total_same_minute_proba_game = Variable(torch.zeros(1))
        total_accuracy = 0

        total_goals_home_loss = Variable(torch.zeros(1))
        total_goals_away_loss = Variable(torch.zeros(1))
        total_goals_diff_loss = Variable(torch.zeros(1))

        sampled_events = []
        sampled_times = []
        target_events = []
        target_times = []

        all_proba = []

        for batch_idx in range(target.size(0)):

            end_of_game_idx = get_end_of_game_idx(target[batch_idx, :, 0])

            accuracies = []
            results_losses = []
            results = torch.FloatTensor([0, 0, 0])
            # Sample multiple times
            for _ in range(NB_GAMES_TO_SAMPLE):
                current_input = Variable(
                    torch.zeros(1, 1, NB_ALL_EVENTS + NB_ALL_TIMES))
                current_input[0, 0, SOG_TOKEN] = 1
                current_input[0, 0, NB_ALL_EVENTS + GAME_NOT_RUNNING_TIME] = 1

                self.hidden = self.init_hidden([teams[batch_idx]])

                teams_tensor = get_teams_caracteristics([teams[batch_idx]])
                teams_input = teams_tensor.squeeze(0).unsqueeze(1)

                sampled_events_in_game = []
                sampled_times_in_game = []
                target_events_in_game = []
                target_times_in_game = []

                proba = []

                game_event_proba = Variable(
                    torch.zeros((end_of_game_idx, NB_ALL_EVENTS)))

                event_loss_game = Variable(torch.zeros(1))
                time_loss_game = Variable(torch.zeros(1))
                same_minute_event_loss_game = Variable(torch.zeros(1))
                same_minute_proba_game = Variable(torch.zeros(1))
                for event_idx in range(end_of_game_idx):
                    input_with_prior = torch.cat([current_input, teams_input],
                                                 2)
                    output, self.hidden = self.lstm(input_with_prior,
                                                    self.hidden)

                    event_scores = self.hidden2event(output)
                    time_scores = self.hidden2time(output)

                    event_loss = self.loss_function_events(
                        event_scores.view(1, -1), target[batch_idx, event_idx,
                                                         0].view(1))
                    time_loss = self.loss_function_time(
                        time_scores.view(1, -1), target[batch_idx, event_idx,
                                                        1].view(1))

                    event_loss_game += event_loss
                    time_loss_game += time_loss

                    event_proba = F.softmax(event_scores, 2)
                    time_proba = F.softmax(time_scores, 2)

                    # Increase total proba
                    #same_minute_proba_game += time_proba[0, 0, SAME_TIME_THAN_PREV]

                    alphas = 4.0
                    betas = 6.53242321
                    beta_distr = Beta(alphas, betas)
                    log_prob = beta_distr.log_prob(
                        time_proba[0, 0, SAME_TIME_THAN_PREV])
                    same_minute_event_loss_game += -log_prob

                    game_event_proba[event_idx, :] = event_proba

                    generated_event = int(
                        torch.multinomial(event_proba[0, 0], 1)[0])
                    generated_time = int(
                        torch.multinomial(time_proba[0, 0], 1)[0])

                    # Force different time if generating NO_EVENT
                    if generated_event == NO_EVENT:
                        generated_time = DIFF_TIME_THAN_PREV

                    sampled_events_in_game.append(generated_event)
                    sampled_times_in_game.append(generated_time)
                    target_events_in_game.append(target[batch_idx, event_idx,
                                                        0].data.item())
                    target_times_in_game.append(target[batch_idx, event_idx,
                                                       1].data.item())

                    # Store probabilities of event to happen
                    proba.append([])
                    for event_nb in range(NB_ALL_EVENTS):
                        proba[-1].append(event_proba[0, 0, event_nb])

                    current_input = Variable(
                        torch.zeros(1, 1, NB_ALL_EVENTS + NB_ALL_TIMES))
                    current_input[0, 0, generated_event] = 1
                    current_input[0, 0, NB_ALL_EVENTS + generated_time] = 1

                goals_home_tensor, goals_home_target_tensor, goals_away_tensor, goals_away_target_tensor = get_during_game_goals(
                    game_event_proba.unsqueeze(0),
                    target[batch_idx, :].unsqueeze(0))

                goals_tensor = torch.stack(
                    [goals_home_tensor, goals_away_tensor], 1)
                goals_target_tensor = torch.stack(
                    [goals_home_target_tensor, goals_away_target_tensor], 1)

                predicted_results = get_games_results_from_goals(goals_tensor)
                games_results = get_games_results_from_goals(
                    goals_target_tensor)

                # Count sampled goals for both teams
                goal_home = sampled_events_in_game.count(GOAL_HOME)
                goal_away = sampled_events_in_game.count(GOAL_AWAY)
                if goal_home > goal_away:
                    sampled_res = 0
                elif goal_home < goal_away:
                    sampled_res = 1
                else:
                    sampled_res = 2

                results[sampled_res] += 1

                #loss_result_game = self.loss_function_result(games_proba, games_results)
                #accuracy = games_proba[0][games_results.item()]

                #results_losses.append(loss_result_game.item())
                #accuracies.append(accuracy.item())

            total_event_loss += event_loss_game.item() / end_of_game_idx
            total_time_loss += time_loss_game.item() / end_of_game_idx
            #total_same_minute_event_loss += same_minute_event_loss_game / end_of_game_idx
            #total_result_loss += np.mean(results_losses)
            #total_accuracy += np.mean(accuracies)

            results /= NB_GAMES_TO_SAMPLE
            total_accuracy += results[games_results.item()]
            total_result_loss += self.loss_function_result(
                results.unsqueeze(0), games_results)

            same_minute_proba_game /= end_of_game_idx

            # Compute same minute event loss
            '''
            beta_distr = Beta(ALPHA_FOR_BETA_DISTR, BETA_FOR_BETA_DISTR)
            log_prob = beta_distr.log_prob(same_minute_proba_game)
            same_minute_loss_game = -log_prob
            total_same_minute_event_loss += same_minute_loss_game
            '''

            total_same_minute_event_loss += same_minute_event_loss_game / end_of_game_idx

            #total_same_minute_proba_game += same_minute_proba_game
            '''
            total_goals_home_loss += loss_goals_home
            total_goals_away_loss += loss_goals_away
            total_goals_diff_loss += loss_goals_diff
            '''

            sampled_events.append(sampled_events_in_game)
            sampled_times.append(sampled_times_in_game)
            target_events.append(target_events_in_game)
            target_times.append(target_times_in_game)

            all_proba.append(proba)
        '''
        total_same_minute_proba_game /= target.size(0)
        beta_distr = Beta(ALPHA_FOR_BETA_DISTR, BETA_FOR_BETA_DISTR)
        log_prob = beta_distr.log_prob(total_same_minute_proba_game)
        '''

        total_result_loss /= target.size(0)
        total_event_loss /= target.size(0)
        total_time_loss /= target.size(0)
        total_same_minute_event_loss /= target.size(
            0)  #= Variable(torch.tensor(0))
        #total_same_minute_event_loss = -log_prob
        total_accuracy /= target.size(0)
        total_goals_home_loss /= target.size(0)
        total_goals_away_loss /= target.size(0)
        total_goals_diff_loss /= target.size(0)

        loss = (total_event_loss + total_time_loss +
                BETA_WEIGHT * total_same_minute_event_loss) / (2 + BETA_WEIGHT)

        if return_proba:
            return sampled_events, sampled_times, target_events, target_times, all_proba, loss.data[
                0], total_event_loss.data[0], total_time_loss.data[
                    0], total_same_minute_event_loss.item(
                    ), total_result_loss.data[0], total_accuracy.item()
        else:
            return sampled_events, sampled_times, target_events, target_times, loss.data[
                0], total_event_loss.data[0], total_time_loss.data[
                    0], total_same_minute_event_loss.item(
                    ), total_result_loss.data[0], total_accuracy.item()