Esempio n. 1
0
class OnlinePMI:
    def __init__(self, d, depth, width):
        self.d = d
        self.sketch = Sketch(depth, width)
        self.S = set()
        self.V = defaultdict(list)

    def pmi(self, x, y):
        c = self.sketch.estimate
        log2 = lambda x: math.log(x,2)
        try:
            return log2( c((x,y)) / (c(x) * c(y)) )
        except ValueError:
            return 0.0

    def update(self, B):
        for z,y in B:
            self.S.add((z,y))
            self.sketch.update(z)
            self.sketch.update(y)
            self.sketch.update((z,y))
        # recompute vectors V(x) using current contexts in
        # priority queue and {y|S(<z,y>)=1}
        xs = set([x for x,y in self.S])
        for x in xs:
            ys = set.union( set([y for x_,y in self.S if x_ == x]),
                            set([y for pmi,y in self.V[x]]) )
            for y in ys: 
                if len(self.V[x]) < self.d:
                    heappush(self.V[x], (self.pmi(x,y), y))
                else:
                    heappushpop(self.V[x], (self.pmi(x,y), y))
Esempio n. 2
0
class OnlinePMI:
    def __init__(self, d, depth, width):
        self.d = d
        self.sketch = Sketch(depth, width)
        self.S = set()
        self.V = defaultdict(list)

    def pmi(self, x, y):
        c = self.sketch.estimate
        log2 = lambda x: math.log(x, 2)
        try:
            return log2(c((x, y)) / (c(x) * c(y)))
        except ValueError:
            return 0.0

    def update(self, B):
        for z, y in B:
            self.S.add((z, y))
            self.sketch.update(z)
            self.sketch.update(y)
            self.sketch.update((z, y))
        # recompute vectors V(x) using current contexts in
        # priority queue and {y|S(<z,y>)=1}
        xs = set([x for x, y in self.S])
        for x in xs:
            ys = set.union(set([y for x_, y in self.S if x_ == x]),
                           set([y for pmi, y in self.V[x]]))
            for y in ys:
                if len(self.V[x]) < self.d:
                    heappush(self.V[x], (self.pmi(x, y), y))
                else:
                    heappushpop(self.V[x], (self.pmi(x, y), y))
Esempio n. 3
0
 def __init__(self, d, depth, width):
     self.d = d
     self.sketch = Sketch(depth, width)
     self.S = set()
     self.V = defaultdict(list)
Esempio n. 4
0
 def __init__(self, d, depth, width):
     self.d = d
     self.sketch = Sketch(depth, width)
     self.S = set()
     self.V = defaultdict(list)