Beispiel #1
0
 def logNegProbWordsGivenClusterAbnorm(self,
                                       doc,
                                       cluster,
                                       particles=16,
                                       cap=-1):
     """Uses wallach's 'left to right' method to calculate the negative log probability of the words in the document given the rest of the model. Both the cluster (provided as an index) and the documents abnormalities vector are fixed for this calculation. Returns the average of the results for each sample contained within model. particles is the number of particles to use in the left to right estimation algorithm. This is implimented using scipy.weave."""
     return solvers.leftRightNegLogProbWord(self, doc, cluster, particles,
                                            cap)
Beispiel #2
0
  def logNegProbWordsGivenAbnorm(self, doc, particles = 16, cap = -1):
    """Uses logNegProbWordsGivenClusterAbnorm and simply sums out the cluster variable."""

    # Get the probability of each with the dependence with clusters...
    cluScores = map(lambda c: solvers.leftRightNegLogProbWord(self, doc, c, particles, cap), xrange(self.getClusterCount()))

    # Multiply each by the probability of the cluster, so it can be summed out...
    cluNorm = float(self.clusterUse.sum()) + self.clusterConc
    cluScores = map(lambda c,s: s - math.log(float(self.clusterUse[c])/cluNorm), xrange(len(cluScores)), cluScores)

    # Also need to include the probability of a new cluster, even though it is likelly to be a neglible contribution...
    newVal = solvers.leftRightNegLogProbWord(self, doc, -1, particles, cap)
    newVal -= math.log(self.clusterConc/cluNorm)
    cluScores.append(newVal)

    # Sum out the cluster variable, in a numerically stable way given that we are dealing with negative log likelihood values that will map to extremelly low probabilities...
    minScore = min(cluScores)
    cluPropProb = map(lambda s: math.exp(minScore-s), cluScores)
    return minScore - math.log(sum(cluPropProb))
Beispiel #3
0
    def logNegProbWordsGivenAbnorm(self, doc, particles=16, cap=-1):
        """Uses logNegProbWordsGivenClusterAbnorm and simply sums out the cluster variable."""

        # Get the probability of each with the dependence with clusters...
        cluScores = map(
            lambda c: solvers.leftRightNegLogProbWord(self, doc, c, particles,
                                                      cap),
            xrange(self.getClusterCount()))

        # Multiply each by the probability of the cluster, so it can be summed out...
        cluNorm = float(self.clusterUse.sum()) + self.clusterConc
        cluScores = map(
            lambda c, s: s - math.log(float(self.clusterUse[c]) / cluNorm),
            xrange(len(cluScores)), cluScores)

        # Also need to include the probability of a new cluster, even though it is likelly to be a neglible contribution...
        newVal = solvers.leftRightNegLogProbWord(self, doc, -1, particles, cap)
        newVal -= math.log(self.clusterConc / cluNorm)
        cluScores.append(newVal)

        # Sum out the cluster variable, in a numerically stable way given that we are dealing with negative log likelihood values that will map to extremelly low probabilities...
        minScore = min(cluScores)
        cluPropProb = map(lambda s: math.exp(minScore - s), cluScores)
        return minScore - math.log(sum(cluPropProb))
Beispiel #4
0
 def logNegProbWordsGivenClusterAbnorm(self, doc, cluster, particles = 16, cap = -1):
   """Uses wallach's 'left to right' method to calculate the negative log probability of the words in the document given the rest of the model. Both the cluster (provided as an index) and the documents abnormalities vector are fixed for this calculation. Returns the average of the results for each sample contained within model. particles is the number of particles to use in the left to right estimation algorithm. This is implimented using scipy.weave."""
   return solvers.leftRightNegLogProbWord(self, doc, cluster, particles, cap)