def _update_metrics(outdir, targetname, groupname, rowidx, z, y, aucthresh=(.5,.5)): modeldir = outdir+"/"+targetname # Load current predictions with np.load(modeldir+"/predict.npz") as f: predict_npz = { name : f[name] for name in f.keys() } # Add prediction group group = predict_npz["groups"][()].setdefault(groupname,{}) group["I"] = rowidx group["Z"] = z.reshape((-1,1)) if y is not None: group["Y"] = y.reshape((-1,1)) # Save predictions np.savez_compressed(modeldir+"/predict.npz", **predict_npz) deepity.call_dumpviz(modeldir+"/predict.npz") # Load current metrics metrics = deepity.load_metrics(modeldir+"/metrics.txt") metrics[groupname] = deepity.calc_metrics(z, y, aucthresh=aucthresh) deepity.save_metrics(modeldir+"/metrics.txt", metrics) """
def train(cfgs, data, calibdir, outdir, nfold=1, ntrial=1, auxfilter=None, metric_key="loss"): globals._set_default_logging(outdir) for targetname in sorted(data.targetnames): calib_workdir = getworkdir(calibdir, targetname) samples = load_calib_samples(calib_workdir+"/calib.all.txt") # Get the best sample for this specific model # cfgbest = deepity.hpsearch.get_best_sample([_ for _ in samples if _.params[":cfgname"] == cfgname], "loss") cfgbest = deepity.hpsearch.get_best_sample(samples, metric_key, wantmax="loss" not in metric_key) cfgname = cfgbest.params[":cfgname"] cfg = cfgs[cfgname] outpattern = [outdir, ("target","%s")] outpattern += [("trial", "trial%s")] if nfold > 1: outpattern += [("fold","/fold%s")] deepity.train(cfg["model"], cfg["trainer"], data.astargets([targetname]), hparams={targetname : cfgbest}, hparams_metric=metric_key, outdir=outpattern, nfold=nfold, nsample=ntrial, devices=globals._devices, auxfilter=auxfilter, dumpviz=False, ) # Collect the performance of each trial performances = [] for trial in range(ntrial): instdir = deepity.getinstdir(outpattern, targetname, trial, None) with open(instdir+"/metrics.txt") as f: header = f.readline().rstrip().split() # discard column headers for line in f: line = line.rstrip().split() metricname, trainvalue = line[:2] if metricname == metric_key: performances.append(float(trainvalue)) break # Find the trial with best performance besttrial = np.argmin(performances) if "loss" in metric_key else np.argmax(performances) print "trial metrics:", performances # Copy the best trial into the parent directory, and delete all other trials # to save space and to not drive btsync so crazy. instdir = deepity.getinstdir(outpattern, targetname, besttrial, None) files = glob.glob(instdir+"/*") for file in files: dst = os.path.dirname(os.path.dirname(file))+"/"+os.path.basename(file) if os.path.isdir(file): if os.path.exists(dst): shutil.rmtree(dst, ignore_errors=True) shutil.copytree(file, dst) else: shutil.copyfile(file, dst) time.sleep(0.1) # rmtree sometimes fails if the folder is scanned by btsync; this seems to help a bit for i in range(len(performances)): shutil.rmtree(deepity.getinstdir(outpattern, targetname, i, None), ignore_errors=True) deepity.call_dumpviz(os.path.dirname(instdir))