def setup(name, rkeys, settings): os.system("mkdir -p %s" % path(name)) f_pre = path(name, "train.pre") f_map = path(name, "enigma.map") f_log = path(name, "train.log") hashing = settings["hashing"] if os.path.isfile(f_map) and os.path.isfile( f_pre) and not settings["force"]: return enigmap.load(f_map) if not hashing else hashing if rkeys: collect(name, rkeys, settings) if hashing and not settings["hash_debug"]: file(f_map, "w").write('version("%s").\nhash_base(%s).\n' % (settings["version"], hashing)) return hashing #if os.path.isfile(f_log): # os.system("rm -f %s" % f_log) if settings["force"] or not os.path.isfile(f_map): log.msg("+ creating feature info") emap = enigmap.create(file(f_pre), hashing) enigmap.save(emap, f_map, settings["version"], hashing) else: if not hashing: emap = enigmap.load(f_map) return emap if not hashing else hashing
def collect(name, rkeys, settings): version = settings["version"] force = settings["force"] cores = settings["cores"] hashing = settings["hashing"] if not settings["hash_debug"] else None f_dat = path(name, "train.%s" % ("in" if hashing else "pre")) if force or not os.path.isfile(f_dat): log.msg("+ extracting training data from results") pretrains.prepare(rkeys, version, force, cores, hashing) log.msg("+ collecting %s data" % ("training" if hashing else "pretrains")) pretrains.make(rkeys, out=file(f_dat, "w"), hashing=hashing)
def make(model, rkeys, settings): learner = settings["learner"] f_pre = path(model, "train.pre") f_in = path(model, "train.in") f_stats = path(model, "train.stats") f_mod = path(model, "model.%s" % learner.ext()) f_log = path(model, "train.log") if os.path.isfile(f_mod) and not settings["force"]: return True emap = setup(model, rkeys, settings) if not emap: os.system("rm -fr %s" % path(model)) return False if settings["hash_debug"] or not settings["hashing"]: if settings["force"] or not os.path.isfile(f_in): log.msg("+ generating training data") trains.make(open(f_pre), emap, out=open(f_in, "w")) log.msg("+ training %s model" % learner.name()) p = Process(target=learner.build, args=(f_in, f_mod, f_log, f_stats)) p.start() p.join() # wait and show progress bar #total = learner.rounds() #bar = Bar("[3/3]", max=learner.rounds(), suffix="%(percent).1f%% / %(elapsed_td)s / ETA %(eta_td)s") #bar.start() #done = 0 #while p.is_alive(): # cur = learner.current(f_log) # while done < cur: # done += 1 # bar.next() # p.join(1) #while done < total: # done += 1 # bar.next() #bar.finish() stats = json.load(open(f_stats)) if os.path.isfile(f_stats) else {} log.msg("+ training statistics:\n%s" % "\n".join( [" : %s = %s" % (x, stats[x]) for x in sorted(stats)])) if settings["gzip"]: log.msg("+ compressing training files") os.system("cd %s; gzip -qf *.pre *.in *.out 2>/dev/null" % path(model)) return True
def loop(model, settings, nick=None): global RAMDISK_DIR check(settings) if nick: model = "%s/%s" % (model, nick) log.msg("Building model %s" % model) if settings["ramdisk"]: RAMDISK_DIR = os.path.join(settings["ramdisk"], "Enigma") os.system("mkdir -p %s" % RAMDISK_DIR) expres.results.RAMDISK_DIR = os.path.join(settings["ramdisk"], "00RESULTS") os.system("mkdir -p %s" % expres.results.RAMDISK_DIR) update(**settings) if not make(model, settings["results"], settings): raise Exception("Enigma: FAILED: Building model %s" % model) efun = settings["learner"].efun() new = [ protos.solo(settings["pids"][0], model, mult=0, noinit=True, efun=efun), protos.coop(settings["pids"][0], model, mult=0, noinit=True, efun=efun) ] settings["pids"].extend(new) if settings["ramdisk"]: os.system("mkdir -p %s" % ENIGMA_ROOT) os.system("cp -rf %s/* %s" % (RAMDISK_DIR, ENIGMA_ROOT)) os.system("rm -fr %s" % RAMDISK_DIR) RAMDISK_DIR = None update(only=new, **settings) if settings["ramdisk"]: os.system("mkdir -p %s" % expres.results.RESULTS_DIR) os.system("cp -rf %s/* %s" % (expres.results.RAMDISK_DIR, expres.results.RESULTS_DIR)) os.system("rm -fr %s" % expres.results.RAMDISK_DIR) expres.results.RAMDISK_DIR = None log.msg("Building model finished\n") return new
def loop(model, settings, nick=None): check(settings) if nick: model = "%s/%s" % (model, nick) log.msg("Building model %s" % model) update(settings) if not make(model, settings["results"], settings): raise Exception("Enigma: FAILED: Building model %s" % model) efun = settings["learner"].efun() new = [ protos.solo(settings["pids"][0], model, mult=0, noinit=True, efun=efun), protos.coop(settings["pids"][0], model, mult=0, noinit=True, efun=efun) ] settings["pids"].extend(new) update(settings, new) log.msg("Building model finished\n") return new
def make(name, rkeys, settings): learner = settings["learner"] f_pre = path(name, "train.pre") f_in = path(name, "train.in") f_mod = path(name, "model.%s" % learner.ext()) f_log = path(name, "train.log") if os.path.isfile(f_mod) and not settings["force"]: return True emap = setup(name, rkeys, settings) if not emap: os.system("rm -fr %s" % path(name)) return False if settings["hash_debug"] or not settings["hashing"]: if settings["force"] or not os.path.isfile(f_in): log.msg("+ generating training data") trains.make(file(f_pre), emap, out=file(f_in, "w")) log.msg("+ training %s" % learner.name()) tlog = file(f_log, "a") learner.build(f_in, f_mod, tlog) tlog.close() if settings["gzip"]: log.msg("+ compressing training files") os.system("cd %s; gzip -qf *.pre *.in *.out 2>/dev/null" % path(name)) return True
#"ramdisk": "/dev/shm/yan", "gzip": False, "learner": learn.XGBoost(**learning) } models.check(settings) log.start("Building XGB models:", settings) model = models.name(**settings) rkeys = [(settings["bid"], pid, problem, settings["limit"]) for pid in settings["pids"] for problem in expres.benchmarks.problems(settings["bid"])] if not models.make(model, rkeys, settings): raise Exception("Enigma: FAILED: Building model %s" % model) efun = settings["learner"].efun() new.append( protos.solo(settings["pids"][0], model, mult=0, noinit=True, efun=efun)) new.append( protos.coop(settings["pids"][0], model, mult=0, noinit=True, efun=efun)) log.msg("New strategies are available:\n%s\n" % "\n".join(new))