total = mm.size() while mm.tell() < total: yield next(i), mm.readline() mm.close() # map :: (k1,v1) -> [ (k2, v2) ] def mapfn(k1, v): for w in v.bytes.split(): yield w, 1 # reduce :: (k2, [v2]) -> [ (k3, v3) ] def reducefn(k2, v): return sum(v) # Server s = Server() s.connect() s.mapfn = mapfn s.reducefn = reducefn s.datafn = datafn start = time.time() s.start() stop = time.time() print stop-start #print s.results() print sorted(s.results().iteritems(), key=lambda x: x[1], reverse=True)[1:25] ########NEW FILE########
yield next(i), mm.readline() mm.close() # map :: (k1,v1) -> [ (k2, v2) ] def mapfn(k1, v): for w in v.bytes.split(): yield w, 1 # reduce :: (k2, [v2]) -> [ (k3, v3) ] def reducefn(k2, v): return sum(v) # Server s = Server() s.connect() s.mapfn = mapfn s.reducefn = reducefn s.datafn = datafn start = time.time() s.start() stop = time.time() print stop - start # print s.results() print sorted(s.results().iteritems(), key=lambda x: x[1], reverse=True)[1:25]
from kaylee import Server # Example # ----------------------------------------------- f = open('mobydick.txt') data = dict(enumerate(f.readlines())) def mapfn(k, v): for w in v.split(): yield w, 1 def reducefn(k, v): return sum(v) # Server s = Server() s.connect() s.mapfn = mapfn s.reducefn = reducefn s.data = data start = time.time() s.start() stop = time.time() print stop-start #print s.results()