class OnlinePMI: def __init__(self, d, depth, width): self.d = d self.sketch = Sketch(depth, width) self.S = set() self.V = defaultdict(list) def pmi(self, x, y): c = self.sketch.estimate log2 = lambda x: math.log(x,2) try: return log2( c((x,y)) / (c(x) * c(y)) ) except ValueError: return 0.0 def update(self, B): for z,y in B: self.S.add((z,y)) self.sketch.update(z) self.sketch.update(y) self.sketch.update((z,y)) # recompute vectors V(x) using current contexts in # priority queue and {y|S(<z,y>)=1} xs = set([x for x,y in self.S]) for x in xs: ys = set.union( set([y for x_,y in self.S if x_ == x]), set([y for pmi,y in self.V[x]]) ) for y in ys: if len(self.V[x]) < self.d: heappush(self.V[x], (self.pmi(x,y), y)) else: heappushpop(self.V[x], (self.pmi(x,y), y))
class OnlinePMI: def __init__(self, d, depth, width): self.d = d self.sketch = Sketch(depth, width) self.S = set() self.V = defaultdict(list) def pmi(self, x, y): c = self.sketch.estimate log2 = lambda x: math.log(x, 2) try: return log2(c((x, y)) / (c(x) * c(y))) except ValueError: return 0.0 def update(self, B): for z, y in B: self.S.add((z, y)) self.sketch.update(z) self.sketch.update(y) self.sketch.update((z, y)) # recompute vectors V(x) using current contexts in # priority queue and {y|S(<z,y>)=1} xs = set([x for x, y in self.S]) for x in xs: ys = set.union(set([y for x_, y in self.S if x_ == x]), set([y for pmi, y in self.V[x]])) for y in ys: if len(self.V[x]) < self.d: heappush(self.V[x], (self.pmi(x, y), y)) else: heappushpop(self.V[x], (self.pmi(x, y), y))
def __init__(self, d, depth, width): self.d = d self.sketch = Sketch(depth, width) self.S = set() self.V = defaultdict(list)