def run_disco(limit): results = disco.job(sys.argv[1], "test_sort", tserver.makeurl([""] * int(1e3)), fun_map, reduce = fun_reduce, nr_reduces = 50, sort = True, mem_sort_limit = limit) k = len(list(disco.result_iterator(results))) if k != int(1e5): raise "not enough results: Got %d, expected %d" % (k, 1e5)
def test(): num = sum(x['max_workers'] for x in disco.nodeinfo()['available']) inputs = range(num * 2) job = disco.new_job( name = "test_missingnode", map = fun_map, input = tserver.makeurl(inputs)) results = job.wait() s = sum(int(k) for k, v in result_iterator(results)) correct = sum(range(num * 2)) if s != correct: raise Exception("Invalid result. Got %d, expected %d" %\ (s, correct)) job.purge()
def run_disco(limit, name): job = disco.new_job( name = "test_sort_%s" % name, input = tserver.makeurl([""] * int(100)), map = fun_map, reduce = fun_reduce, nr_reduces = 1, sort = True, mem_sort_limit = limit) ANS = dict((str(x), True)\ for x in list(string.ascii_lowercase) + range(10)) for k, v in result_iterator(job.wait()): if v != "1000": raise Exception("Incorrect result: "\ "Expected 1000, got %s" % v) del ANS[base64.decodestring(k)] if ANS: raise Exception("Missing keys: %s" % " ".join(ANS.keys())) job.purge()
ANS = "1028380578493512611198383005758052057919386757620401"\ "58350002406688858214958513887550465113168573010369619140625" def data_gen(path): return "\n".join([path[1:]] * 10) def fun_map(e, params): return [('=' + e, e)] def fun_reduce(iter, out, params): s = 1 for k, v in iter: if k != "=" + v: raise Exception("Corrupted key") s *= int(v) out.add("result", s) tserver.run_server(data_gen) inputs = [3, 5, 7, 11, 13, 17, 19, 23, 29, 31] results = disco.job(sys.argv[1], "test_simple", tserver.makeurl(inputs), fun_map, reduce = fun_reduce, nr_reduces = 1, sort = False) if list(disco.result_iterator(results)) != [("result", ANS)]: raise Exception("Invalid answer") print "ok"
def fun_map(e, params): return [(w, 1) for w in re.sub("\W", " ", e).lower().split()] def fun_reduce(iter, out, params): s = {} for k, v in iter: if k in s: s[k] += int(v) else: s[k] = int(v) for k, v in s.iteritems(): out.add(k, v) tserver.run_server(data_gen) results = disco.job(sys.argv[1], "test_50k", tserver.makeurl([""] * int(5e4)), fun_map, reduce = fun_reduce, nr_reduces = 300, sort = False) ANS = {"gutta": int(5e6), "cavat": int(1e7), "capidem": int(5e6)} i = 0 for key, value in disco.result_iterator(results): i += 1 if ANS[key] == int(value): print "Correct: %s %s" % (key, value) else: raise "Results don't match" if i != 3: raise "Too few results" disco.Disco(sys.argv[1]).purge(disco.util.jobname(results[0]))
def fun_map3(e, params): fail def fun_map4(e, params): time.sleep(4) return [] tserver.run_server(data_gen) disco = Disco(sys.argv[1]) jobs = [] for i, m in enumerate([fun_map1, fun_map2, fun_map3, fun_map4]): jobs.append(disco.new_job( name = "test_waitmany_%d" % (i + 1), input = tserver.makeurl([""] * 5), map = m)) res = [] while jobs: cont = False ready, jobs = disco.results(jobs, timeout = 2000) res += ready for n, r in res: if n.startswith("test_waitmany_3"): if r[0] != "dead": raise Exception("Invalid job status: %s" % n) elif r[0] != "ready": raise Exception("Invalid job status: %s" % n) disco.purge(n)
return "\n".join(ani) def fun_map(e, params): if type(e) == tuple: return [(e[0] + params['suffix'], int(e[1]) + 1)] else: return [(e + params['suffix'], 0)] def fun_reduce(iter, out, params): for k, v in iter: out.add(k + "-", v) tserver.run_server(data_gen) disco = Disco(sys.argv[1]) results = disco.new_job(name = "test_chain_0", input = tserver.makeurl([""] * 100), map = fun_map, reduce = fun_reduce, nr_reduces = 4, sort = False, params = {'suffix': '0'}).wait() i = 1 while i < 10: nresults = disco.new_job(name = "test_chain_%d" % i, input = results, map = fun_map, reduce = fun_reduce, nr_reduces = 4, map_reader = chain_reader, sort = False, params = {'suffix': str(i)}).wait() disco.purge(jobname(results[0])) results = nresults i += 1 for key, value in result_iterator(results):
return "test_%s\n" % path[1:] def fun_reduce(iter, out, params): for k, v in iter: out.add("red_" + k, "red_" + v) tserver.run_server(data_gen) inputs = ["ape", "cat", "dog"] params = {"test1": "1,2,3",\ "one two three": "dim\ndam\n",\ "dummy": "value"} job = Disco(sys.argv[1]).new_job( name = "test_external", input = tserver.makeurl(inputs), map = external(["ext_test"]), reduce = fun_reduce, ext_params = params, nr_reduces = 1, sort = False) results = sorted([(v, k) for k, v in result_iterator(job.wait())]) for i, e in enumerate(results): v, k = e if k != "red_dkey" or v != "red_test_%s" % inputs[i / 3]: raise Exception("Invalid answer: %s, %s" % (k, v)) if len(results) != 9: raise Exception("Wrong number of results: %u vs. 9" % len(results))
def fun_map(e, params): return [(w, 1) for w in re.sub("\W", " ", e).lower().split()] def fun_reduce(iter, out, params): s = {} for k, v in iter: if k in s: s[k] += int(v) else: s[k] = int(v) for k, v in s.iteritems(): out.add(k, v) tserver.run_server(data_gen) job = Disco(sys.argv[1]).new_job(name="test_50k", input=tserver.makeurl([""] * int(5e4)), map=fun_map, reduce=fun_reduce, nr_reduces=300, sort=False) ANS = {"gutta": int(5e6), "cavat": int(1e7), "capidem": int(5e6)} i = 0 for key, value in result_iterator(job.wait()): i += 1 if ANS[key] == int(value): print "Correct: %s %s" % (key, value) else: raise "Results don't match" if i != 3: raise "Wrong number of results: Got %d expected 3" % i
return [(e[0] + params["suffix"], int(e[1]) + 1)] else: return [(e + params["suffix"], 0)] def fun_reduce(iter, out, params): for k, v in iter: out.add(k + "-", v) tserver.run_server(data_gen) disco = Disco(sys.argv[1]) results = disco.new_job( name="test_chain_0", input=tserver.makeurl([""] * 100), map=fun_map, reduce=fun_reduce, nr_reduces=4, sort=False, clean=True, params={"suffix": "0"}, ).wait() i = 1 while i < 10: nresults = disco.new_job( name="test_chain_%d" % i, input=results, map=fun_map, reduce=fun_reduce,
from disco import Disco def data_gen(path): return "1 2 3\n" def fun_map(e, params): import time time.sleep(100) return [] disco = Disco(sys.argv[1]) num = sum(x['max_workers'] for x in disco.nodeinfo()['available']) print >> sys.stderr, num, "slots available" tserver.run_server(data_gen) job = disco.new_job(name = "test_kill", input = tserver.makeurl([""] * num * 2), map = fun_map) time.sleep(10) print >> sys.stderr, "Killing", job.name job.kill() time.sleep(5) if job.jobinfo()['active'] == "dead": print "ok" job.purge() else: raise Exception("Killing failed")
fail = ["1", "2", "3"] def data_gen(path): lock.acquire() e = path[1:] if e in fail: fail.remove(e) lock.release() raise tserver.FailedReply() else: lock.release() return str(int(e) * 10) + "\n" def fun_map(e, params): return [(int(e) * 10, "")] tserver.run_server(data_gen) job = Disco(sys.argv[1]).new_job( name = "test_tempfail", input = tserver.makeurl(map(str, range(10))), map = fun_map) res = sum(int(x) for x, y in result_iterator(job.wait())) if res != 4500: raise Exception("Invalid result: Got %d, expected 4500" % res) job.purge() print "ok"
def fun_map(e, params): return [("=" + e, e)] def fun_reduce(iter, out, params): s = 1 for k, v in iter: if k != "=" + v: raise Exception("Corrupted key") s *= int(v) out.add("result", s) tserver.run_server(data_gen) inputs = [3, 5, 7, 11, 13, 17, 19, 23, 29, 31] results = disco.job( sys.argv[1], "test_simple", tserver.makeurl(inputs), fun_map, reduce=fun_reduce, nr_reduces=1, sort=False ) if list(disco.result_iterator(results)) != [("result", ANS)]: raise Exception("Invalid answer") print results disco.Disco(sys.argv[1]).purge(disco.util.jobname(results[0])) print "ok"
if x > 10: return 1 else: return 0 def data_gen(path): return "\n".join([path[1:]] * 10) def fun_map(e, params): return [(e, params.f1(int(e), params.x))] def fun_reduce(iter, out, params): for k, v in iter: out.add(k, params.f2(int(v))) tserver.run_server(data_gen) inputs = range(10) results = disco.job(sys.argv[1], "test_params", tserver.makeurl(inputs), fun_map, params = disco.Params(x = 5, f1 = fun1, f2 = fun2), reduce = fun_reduce, nr_reduces = 1, sort = False) for x, y in disco.result_iterator(results): if fun2(int(x) + 5) != int(y): raise "Invalid result: %s and %s" % (x, y) print "ok"
job.purge() else: raise Exception("Rate limit failed") def data_gen(path): return "badger\n" * 1000000 def fun_map(e, params): msg(e) return [] def fun_map2(e, params): return [] tserver.run_server(data_gen) inputs = tserver.makeurl([1]) job = Disco(sys.argv[1]).new_job(name = "test_ratelimit", input = inputs, map = fun_map) time.sleep(5) check_dead(job) job = Disco(sys.argv[1]).new_job(name = "test_ratelimit2", input = inputs, map = fun_map2, status_interval = 1) time.sleep(5) check_dead(job) job = Disco(sys.argv[1]).new_job(name = "test_ratelimit3", input = inputs, map = fun_map2, status_interval = 0) job.wait()
def fun_reduce(iter, out, params): s = {} for k, v in iter: if k in s: s[k] += int(v) else: s[k] = int(v) for k, v in s.iteritems(): out.add(k, v) tserver.run_server(data_gen) job = Disco(sys.argv[1]).new_job(\ name = "test_profile",\ input = tserver.makeurl([""] * int(100)),\ map = really_unique_function_name,\ reduce = fun_reduce,\ nr_reduces = 30,\ sort = False,\ profile = True) ANS = {"gutta": int(1e4), "cavat": int(2e4), "capidem": int(1e4)} i = 0 for key, value in result_iterator(job.wait()): i += 1 if ANS[key] == int(value): print "Correct: %s %s" % (key, value) else: raise "Results don't match (%s): Got %d expected %d" %\ (key, int(value), ANS[key])
def fun_map(e, params): import time, random time.sleep(random.randint(1, 3)) return [(e, 0)] def fun_reduce(iter, out, params): for k, v in iter: out.add("[%s]" % k, v) tserver.run_server(data_gen) disco = Disco(sys.argv[1]) num = sum(x['max_workers'] for x in disco.nodeinfo()['available']) print >> sys.stderr, num, "slots available" inputs = tserver.makeurl(range(num * 10)) random.shuffle(inputs) jobs = [] for i in range(5): jobs.append(disco.new_job(name = "test_async_%d" % i, input = inputs[i * (num * 2):(i + 1) * (num * 2)], map = fun_map, reduce = fun_reduce, nr_reduces = 11, sort = False)) time.sleep(1) all = dict(("[%s]" % i, 0) for i in range(num * 10)) for job in jobs: results = job.wait() print "Job", job, "done" for k, v in result_iterator(results):
def data_gen(path): return "\n".join([path[1:]] * 10) def fun_map(e, params): return [("=" + e, e)] def fun_reduce(iter, out, params): s = 1 for k, v in iter: if k != "=" + v: raise Exception("Corrupted key") s *= int(v) out.add("result", s) tserver.run_server(data_gen) inputs = [3, 5, 7, 11, 13, 17, 19, 23, 29, 31] job = Disco(sys.argv[1]).new_job( name="test_simple", input=tserver.makeurl(inputs), map=fun_map, reduce=fun_reduce, nr_reduces=1, sort=False ) if list(result_iterator(job.wait())) != [("result", ANS)]: raise Exception("Invalid answer") job.purge() print "ok"
inputs = [] for i in range(N): a = [i] * 10 b = range(i, i + 10) inputs += ["%d:%d" % x for x in zip(a, b)] results[str(i)] = str(sum(b)) random.shuffle(inputs) disco = Disco(sys.argv[1]) print "Running two map jobs.." map1 = disco.new_job(\ name = "test_onlyreduce1", input = tserver.makeurl(inputs[:len(inputs) / 2]), map = fun_map, partition = fun_partition, nr_reduces = N) map2 = disco.new_job(\ name = "test_onlyreduce2", input = tserver.makeurl(inputs[len(inputs) / 2:]), map = fun_map, partition = fun_partition, nr_reduces = N) results1 = map1.wait() print "map1 done" results2 = map2.wait() print "map2 done"