def exec_list(exprList, lenNodes, csvFilename, hex_key, colX): h2e.exec_zero_list(zeroList) # start with trial = 1 because trial-1 is used to point to Result0 which must be initted trial = 1 while (trial < 100): for exprTemplate in exprList: # do each expression at a random node, to facilate key movement nodeX = random.randint(0, lenNodes - 1) # billion rows only has two cols # colX is incremented in the fill_in_expr_template # FIX! should tune this for covtype20x vs 200x vs covtype.data..but for now row = str(random.randint(1, 400000)) execExpr = h2e.fill_in_expr_template(exprTemplate, colX, trial, row, hex_key) execResultInspect = h2e.exec_expr(h2o.nodes[nodeX], execExpr, resultKey="Result" + str(trial) + ".hex", timeoutSecs=60) h2o.check_sandbox_for_errors() print "Trial #", trial, "completed\n" trial += 1
def exec_list(exprList, lenNodes, csvFilename, key2): h2e.exec_zero_list(zeroList) # start with trial = 1 because trial-1 is used to point to Result0 which must be initted trial = 1 while (trial < 100): for exprTemplate in exprList: # do each expression at a random node, to facilate key movement nodeX = random.randint(0,lenNodes-1) colX = random.randint(1,54) # FIX! should tune this for covtype20x vs 200x vs covtype.data..but for now row = str(random.randint(1,400000)) execExpr = h2e.fill_in_expr_template(exprTemplate, colX, trial, row, key2) execResultInspect = h2e.exec_expr(h2o.nodes[nodeX], execExpr, resultKey="Result"+str(trial)+".hex", timeoutSecs=60) eri0 = execResultInspect[0] eri1 = execResultInspect[1] columns = eri0.pop('cols') columnsDict = columns[0] print "\nexecResult columns[0]:", h2o.dump_json(columnsDict) print "\nexecResult [0]:", h2o.dump_json(eri0) print "\nexecResult [1] :", h2o.dump_json(eri1) min = columnsDict["min"] h2o.verboseprint("min: ", min, "trial:", trial) ### self.assertEqual(float(min), float(trial),"what can we check here") ### h2b.browseJsonHistoryAsUrlLastMatch("Inspect") # slows things down to check every iteration, but good for isolation h2o.check_sandbox_for_errors() print "Trial #", trial, "completed\n" trial += 1
def test_exec_filter_slice2(self): timeoutSecs = 10 csvFilename = "covtype.data" csvPathname = h2o.find_dataset('UCI/UCI-large/covtype/covtype.data') key2 = "c" parseKey = h2o_cmd.parseFile(None, csvPathname, 'covtype.data', 'c', 10) print csvFilename, 'parse time:', parseKey['response']['time'] print "Parse result['desination_key']:", parseKey['destination_key'] inspect = h2o_cmd.runInspect(None, parseKey['destination_key']) for trial in range(10): print "Doing the execs in order, to feed filters into slices" nodeX = 0 for exprTemplate in exprList: execExpr = h2e.fill_in_expr_template(exprTemplate, colX=0, n=0, row=1, key2=key2, m=2) time.sleep(2) h2o.check_sandbox_for_errors() execResultInspect, min_value = h2e.exec_expr( h2o.nodes[nodeX], execExpr, resultKey="Result.hex", timeoutSecs=4) print "min_value:", min_value, "execExpr:", execExpr h2o.verboseprint("min: ", min_value, "trial:", trial)
def test_exec2_filter_slice(self): timeoutSecs = 10 csvFilename = "covtype.data" csvPathname = 'standard/covtype.data' hex_key = "c.hex" parseResult = h2i.import_parse(bucket='home-0xdiag-datasets', path=csvPathname, schema='put', hex_key=hex_key, timeoutSecs=20) print "Parse result['desination_key']:", parseResult['destination_key'] inspect = h2o_cmd.runInspect(None, parseResult['destination_key']) for trial in range(10): print "Doing the execs in order, to feed filters into slices" nodeX = 0 for exprTemplate in exprList: execExpr = h2e.fill_in_expr_template(exprTemplate, colX=0, n=0, row=1, keyX=hex_key, m=2) execResultInspect, min_value = h2e.exec_expr(h2o.nodes[nodeX], execExpr, resultKey=None, timeoutSecs=10) print "min_value:", min_value, "execExpr:", execExpr h2o.verboseprint("min: ", min_value, "trial:", trial)
def test_exec_filter_slice2(self): timeoutSecs = 10 csvFilename = "covtype.data" csvPathname = 'UCI/UCI-large/covtype/covtype.data' hex_key = 'c' parseResult = h2i.import_parse(bucket='datasets', path=csvPathname, schema='put', hex_key=hex_key, timeoutSecs=10) print csvFilename, 'parse time:', parseResult['response']['time'] print "Parse result['desination_key']:", parseResult['destination_key'] inspect = h2o_cmd.runInspect(None, parseResult['destination_key']) for trial in range(10): print "Doing the execs in order, to feed filters into slices" nodeX = 0 for exprTemplate in exprList: execExpr = h2e.fill_in_expr_template(exprTemplate, colX=0, n=0, row=1, keyX=hex_key, m=2) time.sleep(2) h2o.check_sandbox_for_errors() execResultInspect, min_value = h2e.exec_expr(h2o.nodes[nodeX], execExpr, resultKey="Result.hex", timeoutSecs=4) print "min_value:", min_value, "execExpr:", execExpr h2o.verboseprint("min: ", min_value, "trial:", trial)
def test_exec2_rotate_inc(self): h2o.beta_features = True lenNodes = len(h2o.nodes) # zero the list of Results using node[0] # FIX! is the zerolist not eing seen correctl? is it not initializing to non-zero? for exprTemplate in initList: execExpr = h2e.fill_in_expr_template(exprTemplate, n=0, m=0) print execExpr execResult = h2e.exec_expr(h2o.nodes[0], execExpr) ### print "\nexecResult:", execResult period = 10 # start at result10, to allow goback of 10 trial = 0 while (trial < 200): for exprTemplate in exprList: # for the first 100 trials: do each expression at node 0, # for the second 100 trials: do each expression at a random node, to facilate key movement # FIX! there's some problem with the initList not taking if rotated amongst nodes? if (DO_ONE_NODE_ONLY or trial < 100): nodeX = 0 else: nodeX = random.randint(0,lenNodes-1) ### print nodeX number = trial + 10 resultKey="Result" + str(number%period) execExpr = h2e.fill_in_expr_template(exprTemplate, n=(number%period), m=((number-goback)%period)) execResultInspect, min_value = h2e.exec_expr(h2o.nodes[nodeX], execExpr, resultKey=None, timeoutSecs=4) print "min_value:", min_value, "execExpr:", execExpr, "number:", number h2o.verboseprint("min: ", min_value, "trial:", trial) self.assertEqual(int(min_value), int(number)) # we're talking to just one node. ignore this comment # 'Although the memory model allows write atomicity to be violated,' + # 'this test was passing with an assumption of multi-jvm write atomicity' + # 'Be interesting if ever fails. Can disable assertion if so, and run without check') # ### h2b.browseJsonHistoryAsUrlLastMatch("Inspect") trial += 1
def test_exec2_rotate_inc(self): lenNodes = len(h2o.nodes) # zero the list of Results using node[0] # FIX! is the zerolist not eing seen correctl? is it not initializing to non-zero? for exprTemplate in initList: execExpr = h2e.fill_in_expr_template(exprTemplate, n=0, m=0) print execExpr execResult = h2e.exec_expr(h2o.nodes[0], execExpr) ### print "\nexecResult:", execResult period = 10 # start at result10, to allow goback of 10 trial = 0 while (trial < 200): for exprTemplate in exprList: # for the first 100 trials: do each expression at node 0, # for the second 100 trials: do each expression at a random node, to facilate key movement # FIX! there's some problem with the initList not taking if rotated amongst nodes? if (DO_ONE_NODE_ONLY or trial < 100): nodeX = 0 else: nodeX = random.randint(0,lenNodes-1) ### print nodeX number = trial + 10 resultKey="Result" + str(number%period) execExpr = h2e.fill_in_expr_template(exprTemplate, n=(number%period), m=((number-goback)%period)) execResultInspect, min_value = h2e.exec_expr(h2o.nodes[nodeX], execExpr, resultKey=None, timeoutSecs=4) print "min_value:", min_value, "execExpr:", execExpr, "number:", number h2o.verboseprint("min: ", min_value, "trial:", trial) self.assertEqual(int(min_value), int(number)) # we're talking to just one node. ignore this comment # 'Although the memory model allows write atomicity to be violated,' + # 'this test was passing with an assumption of multi-jvm write atomicity' + # 'Be interesting if ever fails. Can disable assertion if so, and run without check') # ### h2b.browseJsonHistoryAsUrlLastMatch("Inspect") trial += 1
def exec_list(exprList, lenNodes, csvFilename, hex_key): h2e.exec_zero_list(zeroList) # start with trial = 1 because trial-1 is used to point to Result0 which must be initted trial = 1 while (trial < 100): for exprTemplate in exprList: # do each expression at a random node, to facilate key movement nodeX = random.randint(0,lenNodes-1) colX = random.randint(1,54) # FIX! should tune this for covtype20x vs 200x vs covtype.data..but for now row = str(random.randint(1,400000)) execExpr = h2e.fill_in_expr_template(exprTemplate, colX, trial, row, hex_key) execResultInspect = h2e.exec_expr(h2o.nodes[nodeX], execExpr, resultKey="Result"+str(trial)+".hex", timeoutSecs=60) h2o.check_sandbox_for_errors() print "Trial #", trial, "completed\n" trial += 1
def test_exec2_filter_slice(self): h2o.beta_features = True timeoutSecs = 10 csvFilename = "covtype.data" csvPathname = "standard/covtype.data" hex_key = "c.hex" parseResult = h2i.import_parse( bucket="home-0xdiag-datasets", path=csvPathname, schema="put", hex_key=hex_key, timeoutSecs=10 ) print "Parse result['desination_key']:", parseResult["destination_key"] inspect = h2o_cmd.runInspect(None, parseResult["destination_key"]) for trial in range(10): print "Doing the execs in order, to feed filters into slices" nodeX = 0 for exprTemplate in exprList: execExpr = h2e.fill_in_expr_template(exprTemplate, colX=0, n=0, row=1, keyX=hex_key, m=2) execResultInspect, min_value = h2e.exec_expr(h2o.nodes[nodeX], execExpr, resultKey=None, timeoutSecs=4) print "min_value:", min_value, "execExpr:", execExpr h2o.verboseprint("min: ", min_value, "trial:", trial)
def test_exec_filter_slice(self): timeoutSecs = 10 csvFilename = "covtype.data" csvPathname = h2o.find_dataset('UCI/UCI-large/covtype/covtype.data') key2 = "c" parseKey = h2o_cmd.parseFile(None, csvPathname, 'covtype.data', 'c', 10) print csvFilename, 'parse time:', parseKey['response']['time'] print "Parse result['desination_key']:", parseKey['destination_key'] inspect = h2o_cmd.runInspect(None, parseKey['destination_key']) for trial in range(10): print "Doing the execs in order, to feed filters into slices" nodeX = 0 for exprTemplate in exprList: execExpr = h2e.fill_in_expr_template(exprTemplate, colX=0, n=0, row=1, key2=key2, m=2) execResultInspect, min_value = h2e.exec_expr(h2o.nodes[nodeX], execExpr, resultKey="Result.hex", timeoutSecs=4) print "min_value:", min_value, "execExpr:", execExpr h2o.verboseprint("min: ", min_value, "trial:", trial)
def exec_list(exprList, lenNodes, csvFilename, key2): h2e.exec_zero_list(zeroList) # start with trial = 1 because trial-1 is used to point to Result0 which must be initted trial = 1 while (trial < 100): for exprTemplate in exprList: # do each expression at a random node, to facilate key movement nodeX = random.randint(0, lenNodes - 1) colX = random.randint(1, 54) # FIX! should tune this for covtype20x vs 200x vs covtype.data..but for now row = str(random.randint(1, 400000)) execExpr = h2e.fill_in_expr_template(exprTemplate, colX, trial, row, key2) execResultInspect = h2e.exec_expr(h2o.nodes[nodeX], execExpr, resultKey="Result" + str(trial) + ".hex", timeoutSecs=60) eri0 = execResultInspect[0] eri1 = execResultInspect[1] columns = eri0.pop('cols') columnsDict = columns[0] print "\nexecResult columns[0]:", h2o.dump_json(columnsDict) print "\nexecResult [0]:", h2o.dump_json(eri0) print "\nexecResult [1] :", h2o.dump_json(eri1) min = columnsDict["min"] h2o.verboseprint("min: ", min, "trial:", trial) ### self.assertEqual(float(min), float(trial),"what can we check here") ### h2b.browseJsonHistoryAsUrlLastMatch("Inspect") # slows things down to check every iteration, but good for isolation h2o.check_sandbox_for_errors() print "Trial #", trial, "completed\n" trial += 1