Ejemplo n.º 1
0
def _update_metrics(outdir, targetname, groupname, rowidx, z, y, aucthresh=(.5,.5)):
    modeldir = outdir+"/"+targetname

    # Load current predictions 
    with np.load(modeldir+"/predict.npz") as f:
        predict_npz = { name : f[name] for name in f.keys() }

    # Add prediction group
    group = predict_npz["groups"][()].setdefault(groupname,{})
    group["I"] = rowidx
    group["Z"] = z.reshape((-1,1))
    if y is not None:
        group["Y"] = y.reshape((-1,1))

    # Save predictions
    np.savez_compressed(modeldir+"/predict.npz", **predict_npz)
    deepity.call_dumpviz(modeldir+"/predict.npz")

    # Load current metrics
    metrics = deepity.load_metrics(modeldir+"/metrics.txt")
    metrics[groupname] = deepity.calc_metrics(z, y, aucthresh=aucthresh)
    deepity.save_metrics(modeldir+"/metrics.txt", metrics)

    """
Ejemplo n.º 2
0
def train(cfgs, data, calibdir, outdir, nfold=1, ntrial=1, auxfilter=None, metric_key="loss"):
    globals._set_default_logging(outdir)

    for targetname in sorted(data.targetnames):
        calib_workdir = getworkdir(calibdir, targetname)
        samples = load_calib_samples(calib_workdir+"/calib.all.txt")

        # Get the best sample for this specific model
        #   cfgbest = deepity.hpsearch.get_best_sample([_ for _ in samples if _.params[":cfgname"] == cfgname], "loss")
        cfgbest = deepity.hpsearch.get_best_sample(samples, metric_key, wantmax="loss" not in metric_key)
        cfgname = cfgbest.params[":cfgname"]
        cfg = cfgs[cfgname]

        outpattern = [outdir, ("target","%s")]
        outpattern += [("trial", "trial%s")]
        if nfold > 1:
            outpattern += [("fold","/fold%s")]
        
        deepity.train(cfg["model"], cfg["trainer"], 
                        data.astargets([targetname]), 
                        hparams={targetname : cfgbest}, hparams_metric=metric_key, 
                        outdir=outpattern,
                        nfold=nfold, 
                        nsample=ntrial,
                        devices=globals._devices,
                        auxfilter=auxfilter,
                        dumpviz=False,
                        )

        # Collect the performance of each trial
        performances = []
        for trial in range(ntrial):
            instdir = deepity.getinstdir(outpattern, targetname, trial, None)
            with open(instdir+"/metrics.txt") as f:
                header = f.readline().rstrip().split() # discard column headers
                for line in f:
                    line = line.rstrip().split()
                    metricname, trainvalue = line[:2]
                    if metricname == metric_key:
                        performances.append(float(trainvalue))
                        break
    
        # Find the trial with best performance
        besttrial = np.argmin(performances) if "loss" in metric_key else np.argmax(performances)
        print "trial metrics:", performances

        # Copy the best trial into the parent directory, and delete all other trials 
        # to save space and to not drive btsync so crazy.
        instdir = deepity.getinstdir(outpattern, targetname, besttrial, None)
        files = glob.glob(instdir+"/*")
        for file in files:
            dst = os.path.dirname(os.path.dirname(file))+"/"+os.path.basename(file)
            if os.path.isdir(file):
                if os.path.exists(dst):
                    shutil.rmtree(dst, ignore_errors=True)
                shutil.copytree(file, dst)
            else:
                shutil.copyfile(file, dst)
        time.sleep(0.1) # rmtree sometimes fails if the folder is scanned by btsync; this seems to help a bit
        for i in range(len(performances)):
            shutil.rmtree(deepity.getinstdir(outpattern, targetname, i, None), ignore_errors=True)
        deepity.call_dumpviz(os.path.dirname(instdir))