def setup_random_seed(seed=None): # h2o_args.unit_main() or h2o.init() or this function, may be the first to call it # that makes sure it's called to setup any --seed init before we look for a # command line arg here. (h2o.setup_random_seed() is done before h2o.init() in tests) # parse_our_args() will be a noop if it was already called once h2o_args.parse_our_args() if h2o_args.random_seed is not None: SEED = h2o_args.random_seed elif seed is not None: SEED = seed else: SEED = random.randint(0, sys.maxint) random.seed(SEED) print "\nUsing random seed:", SEED return SEED
def init(*args, **kwargs): # this will be a no-op if already called once h2o_args.parse_our_args() global nodes, n0 # ugly, but we have 3 places that are kept in sync..check them all def checkIsNone(thingName, thing): if not (thing is None or len(thing)==0): print "WARNING: %s is not empty before h2o.init()..Unlikely that makes sense? %s" %\ (thingName, thing) checkIsNone("nodes", nodes) checkIsNone("n0", n0) checkIsNone("h2o_nodes.nodes", h2o_nodes.nodes) nodes = h2o_bc.init(*args, **kwargs) n0 = nodes[0] # use to save typing?..i.e. h2o.n0
def init(*args, **kwargs): # this will be a no-op if already called once h2o_args.parse_our_args() global nodes, n0 # ugly, but we have 3 places that are kept in sync..check them all def checkIsNone(thingName, thing): if not (thing is None or len(thing) == 0): print "WARNING: %s is not empty before h2o.init()..Unlikely that makes sense? %s" %\ (thingName, thing) checkIsNone("nodes", nodes) checkIsNone("n0", n0) checkIsNone("h2o_nodes.nodes", h2o_nodes.nodes) nodes = h2o_bc.init(*args, **kwargs) n0 = nodes[0] # use to save typing?..i.e. h2o.n0
row.update(kwargs) row.update(glm) row.update(val) row.update({'wall_clock_secs': wall_clock_secs}) row.update({'java_heap_GB': java_heap_GB}) csvWrt.writerow(row) h2o.nodes[0].remove_key(k) finally: output.close() def parse_file(f): v = h2o.nodes[0].import_files()['succeeded'][0] return h2o.nodes[0].parse(v['key'],timeoutSecs=3600)['destination_key'] if __name__ == '__main__': h2o_args.parse_our_args() files = None if is_ec2(): files = ec2_files h2o.init() else: files = local_files h2o.init(use_hdfs=True) # want to ignore columns with missing values, since GLM throws away those rows, (won't analyze as many rows) # Distance, CRSEElapsedTime has some...I guess ignore # column Year 0 type: int # column Month 1 type: int # column DayofMonth 2 type: int # column DayOfWeek 3 type: int # column DepTime 4 type: int num_missing_values: 2302136
row.update(val) row.update({'wall_clock_secs': wall_clock_secs}) row.update({'java_heap_GB': java_heap_GB}) csvWrt.writerow(row) h2o.nodes[0].remove_key(k) finally: output.close() def parse_file(f): v = h2o.nodes[0].import_files()['succeeded'][0] return h2o.nodes[0].parse(v['key'], timeoutSecs=3600)['destination_key'] if __name__ == '__main__': h2o_args.parse_our_args() files = None if is_ec2(): files = ec2_files h2o.init() else: files = local_files h2o.init(use_hdfs=True) # want to ignore columns with missing values, since GLM throws away those rows, (won't analyze as many rows) # Distance, CRSEElapsedTime has some...I guess ignore # column Year 0 type: int # column Month 1 type: int # column DayofMonth 2 type: int # column DayOfWeek 3 type: int # column DepTime 4 type: int num_missing_values: 2302136