Ejemplo n.º 1
0
def rings_log_pdf_grad(X, sigma=0.1, radia=np.array([1, 3])):
    weights = 2 * np.pi * radia
    weights /= np.sum(weights)

    norms = np.linalg.norm(X[:, :2], axis=1)

    result = np.zeros(np.shape(X))

    grads = []
    for i in range(len(X)):
        log_pdf_components = -0.5 * (norms[i] - radia)**2 / (sigma**2)
        log_pdf = logsumexp(log_pdf_components + np.log(weights))
        neg_log_neg_ratios = log_pdf_components - log_pdf

        gs_inner = np.zeros((len(radia), 1))
        for k in range(len(gs_inner)):
            gs_inner[k] = -(norms[i] - radia[k]) / (sigma**2)

        grad_1d = np.dot(gs_inner.T,
                         np.exp(neg_log_neg_ratios + np.log(weights)))
        angle = np.arctan2(X[i, 1], X[i, 0])
        grad_2d = np.array([np.cos(angle), np.sin(angle)]) * grad_1d
        grads += [grad_2d]

    result[:, :2] = np.array(grads)
    if X.shape[1] > 2:
        # standard normal log pdf gradient
        result[:, 2:] = -X[:, 2:] / (sigma**2)

    return result
Ejemplo n.º 2
0
    def calculate_marginal_and_conditional(self, confidence_mass):
        assert len(self.received_msgs) == len(self.neighbours), "message from at least one factor is missing"
        if self.clamped_value is not None:
            marginal = np.zeros(1)
            conditional = np.zeros(1)
        else:
            marginal = np.zeros(self.n_states)
            conditional = np.zeros(self.n_states)
        for fac in self.factors:
            fac_msg_sp, fac_msg_ms = self.received_msgs[fac]
            marginal += fac_msg_sp
            conditional += fac_msg_ms

        # renormalize
        self.log_marginal = marginal - logsumexp(marginal)
        self.log_conditional = conditional - logsumexp(conditional)

        self.marginal = np.exp(self.log_marginal)
        self.conditional = np.exp(self.log_conditional)

        # calculate means and variances
        vals = np.arange(self.n_states)
        self.marginal_mean = np.sum(self.marginal * vals)
        self.marginal_variance = np.sum(self.marginal * vals**2) - self.marginal_mean**2
        self.conditional_mean = np.sum(self.conditional * vals)
        self.conditional_variance = np.sum(self.conditional * vals**2) - self.conditional_mean**2

        # calculate confidence interval
        if not np.all(np.isnan(self.conditional)):
            center = np.nanargmax(self.conditional)
            conditional_cdf = np.cumsum(self.conditional)

            # debug
            # if self.name == "f_40":
            #     import matplotlib.pyplot as plt
            #     plt.plot(self.conditional)
            #     plt.figure()
            #     plt.plot(conditional_cdf)
            #     plt.figure()

            left, right = self.cumulate_mass_around(conditional_cdf, confidence_mass, center)
            self.conditional_conf_lower = center - left
            self.conditional_conf_upper = right - center
        else:
            self.conditional_conf_upper = 0
            self.conditional_conf_lower = 0
Ejemplo n.º 3
0
    def update(self, Z, num_new=1, log_weights=None):
        assert (len(Z) >= num_new)

        # dont do anything if no data observed
        if num_new == 0:
            return

        if log_weights is not None:
            assert len(log_weights) == len(Z)
        else:
            log_weights = np.zeros(len(Z))

        Z_new = Z[-num_new:]
        log_weights_new = log_weights[-num_new:]

        # first update: use first of X and log_weights, and then discard
        if self.log_sum_weights is None:
            # assume have observed fake terms, which is needed for making the system well-posed
            # the L_C says that the fake terms had covariance self.lmbda, which is a regulariser
            self.L_C = np.eye(self.D) * np.sqrt(self.gamma2)
            self.log_sum_weights = log_weights_new[0]
            self.mu = Z_new[0]

            Z_new = Z_new[1:]
            log_weights_new = log_weights_new[1:]
            num_new -= 1

        # dont do anything if no data observed
        if len(Z_new) == 0:
            return

        # generate lmbdas that correspond to weighted averages
        lmbdas = log_weights_to_lmbdas(self.log_sum_weights, log_weights_new)

        # low-rank update of Cholesky, costs O(d^2) only
        old_L_C = np.array(self.L_C, copy=True)
        self.mu, self.L_C = update_mean_cov_L_lmbda(Z_new, self.mu, self.L_C,
                                                    lmbdas)

        if np.any(np.isnan(self.L_C)) or np.any(np.isinf(self.L_C)):
            logger.warning(
                "Numerical error while updating Cholesky factor of C.\n"
                "Before update:\n%s\n"
                "After update:\n%s\n"
                "Updating data:\n%s\n"
                "Updating log weights:\n%s\n"
                "Updating lmbdas:\n%s\n" % (str(old_L_C), str(
                    self.L_C), str(Z_new), str(log_weights_new), str(lmbdas)))
            raise RuntimeError(
                "Numerical error while updating Cholesky factor of C.")

        # update terms and weights
        self.log_sum_weights = logsumexp(
            list(log_weights) + [self.log_sum_weights])
Ejemplo n.º 4
0
    def update(self, Z, num_new=1, log_weights=None):
        assert(len(Z) >= num_new)
        
        # dont do anything if no data observed
        if num_new == 0:
            return
        
        if log_weights is not None:
            assert len(log_weights) == len(Z)
        else:
            log_weights = np.zeros(len(Z))

        Z_new = Z[-num_new:]
        log_weights_new = log_weights[-num_new:]
        
        # first update: use first of X and log_weights, and then discard
        if self.log_sum_weights is None:
            # assume have observed fake terms, which is needed for making the system well-posed
            # the L_C says that the fake terms had covariance self.lmbda, which is a regulariser
            self.L_C = np.eye(self.D) * np.sqrt(self.gamma2)
            self.log_sum_weights = log_weights_new[0]
            self.mu = Z_new[0]
            
            Z_new = Z_new[1:]
            log_weights_new = log_weights_new[1:]
            num_new -= 1
            
        # dont do anything if no data observed
        if len(Z_new) == 0:
            return
        
        # generate lmbdas that correspond to weighted averages
        lmbdas = log_weights_to_lmbdas(self.log_sum_weights, log_weights_new)
        
        # low-rank update of Cholesky, costs O(d^2) only
        old_L_C = np.array(self.L_C, copy=True)
        self.mu, self.L_C = update_mean_cov_L_lmbda(Z_new, self.mu, self.L_C, lmbdas)
        
        if np.any(np.isnan(self.L_C)) or np.any(np.isinf(self.L_C)):
            logger.warning("Numerical error while updating Cholesky factor of C.\n"
                           "Before update:\n%s\n"
                           "After update:\n%s\n"
                           "Updating data:\n%s\n"
                           "Updating log weights:\n%s\n"
                           "Updating lmbdas:\n%s\n"
                           
                           % (str(old_L_C), str(self.L_C), str(Z_new), str(log_weights_new), str(lmbdas))
                           )
            raise RuntimeError("Numerical error while updating Cholesky factor of C.")
        
        # update terms and weights
        self.log_sum_weights = logsumexp(list(log_weights) + [self.log_sum_weights])
Ejemplo n.º 5
0
def log_weights_to_lmbdas(log_sum_old_weights, log_new_weights, boundary_check_min_number=1e-5):
    N = len(log_new_weights)
    lmbdas = np.zeros(N)
    
    for i, log_new_weight in enumerate(log_new_weights):
        log_sum_old_weights = logsumexp([log_sum_old_weights, log_new_weight])
        log_lmbda = log_new_weight - log_sum_old_weights
        lmbdas[i] = np.exp(log_lmbda)
    
    # numerical checks for lambdas. Must be in (0,1)
    lmbdas[lmbdas < boundary_check_min_number] = boundary_check_min_number
    lmbdas[(1 - lmbdas) < boundary_check_min_number] = 1 - boundary_check_min_number
    
    return lmbdas
Ejemplo n.º 6
0
    def predict_log_proba(self,X):
        if isinstance(X, ndarray):
            logps = []
            for clazz in self.classes_:
                data_object = self.\
                    _convert_to_data_object_in_scoring(
                    X,
                    y=array([clazz]*len(X))
                )

                logps += [self._anomaly_detector._LogProbabilityOfData(data_object, len(X))]

            LogPs = [x-logsumexp(x) for x in array(logps).T]

            return array(LogPs)
def log_weights_to_lmbdas(log_sum_old_weights,
                          log_new_weights,
                          boundary_check_min_number=1e-5):
    N = len(log_new_weights)
    lmbdas = np.zeros(N)

    for i, log_new_weight in enumerate(log_new_weights):
        log_sum_old_weights = logsumexp([log_sum_old_weights, log_new_weight])
        log_lmbda = log_new_weight - log_sum_old_weights
        lmbdas[i] = np.exp(log_lmbda)

    # numerical checks for lambdas. Must be in (0,1)
    lmbdas[lmbdas < boundary_check_min_number] = boundary_check_min_number
    lmbdas[(1 - lmbdas
            ) < boundary_check_min_number] = 1 - boundary_check_min_number

    return lmbdas
Ejemplo n.º 8
0
    def predict_log_proba(self, X):
        if isinstance(X, ndarray):
            logps = []
            for clazz in self.classes_:
                data_object = self.\
                    _convert_to_data_object_in_scoring(
                    X,
                    y=array([clazz]*len(X))
                )

                logps += [
                    self._anomaly_detector._LogProbabilityOfData(
                        data_object, len(X))
                ]

            LogPs = [x - logsumexp(x) for x in array(logps).T]

            return array(LogPs)
Ejemplo n.º 9
0
    def _logsum(self, x, dim):
        """Calculates the sum of x[:^(dim-1), i, ...] for each value of i.
        Returns a vector of size x.shape[dim]."""

        # handle case when there is no summation to be done
        if x.ndim == 1 and dim == 0:
            return x

        # make stationary axis the first one
        x = np.rollaxis(x, dim)

        # flatten other axes
        xflat = np.reshape(x, (x.shape[0], -1))

        # calculate sum
        x_sum = logsumexp(xflat, axis=1)

        return x_sum
Ejemplo n.º 10
0
    def predict_log_proba(self, X):
        assert self.class_column > -1

        X1 = None
        if isinstance(X, pyisc.DataObject):
            assert X.class_column == self.class_column
            X1 = X.as_2d_array()
        elif isinstance(X, ndarray):
            X1 = X.copy()

        if X1 is not None:

            logps = self.compute_logp(X1)

            LogPs = [x - logsumexp(x) for x in array(logps).T]  #normalized

            return array(LogPs)
        else:
            raise ValueError("Unknown type of data to score:", type(X))