def _fit(network, name, va): tr = Frame(np.load(os.path.join("trained_models", name, "tr.npz"))) va = Frame(va) est = network(name=name) est.fit(tr, va) pr = est.predict(va) rmse = ((va["T"] - pr["Y"])**2).mean()**0.5 print("network", name, "trained with", rmse, "validation rmse") return rmse
def merge(self, merge_video, frame_nr, start_merge_sec=0, end_merge_sec=0): file_name = os.path.splitext(os.path.basename(merge_video))[0] temp_video = os.path.join('tmp/merge_{}.avi'.format(file_name)) probe = ffmpeg.probe(merge_video) video_info = next(s for s in probe['streams'] if s['codec_type'] == 'video') width = int(video_info['width']) height = int(video_info['height']) fps = eval(video_info['avg_frame_rate']) x = time.strptime(video_info['tags']['DURATION'].split('.')[0], '%H:%M:%S') duration = int(datetime.timedelta(minutes=x.tm_min, seconds=x.tm_sec).total_seconds() + 1) num_frames = int(duration * fps) end_merge_sec = end_merge_sec if end_merge_sec != 0 else duration process1 = ( ffmpeg .input(merge_video) .output(temp_video, r=fps, b='500000k', g=num_frames, pix_fmt='yuv420p', keyint_min=999999, ss=start_merge_sec, to=end_merge_sec) .run(overwrite_output=True) ) print('Done making p-frame video') merge_input = open(temp_video, 'rb') self.get_frames() print('Got frames of first vid: {}'.format(len(self.frames))) for i in range(0, frame_nr): self.write_frame(self.frames[i]) in_file_bytes = merge_input.read() # 0x30306463 which is ASCII 00dc signals the end of a frame. '0x' is a common way to say that a number is in hexidecimal format. frames = [Frame(d) for d in in_file_bytes.split(bytes.fromhex('30306463'))] for frame in frames: if frame.is_delta_frame(): self.write_frame(frame)
def predict_raw(self, D): """ returns dictionary of raw results """ network = self.network_shim() prY = np.squeeze(network.predict(D)['Y']) pr = Frame(D.items(), Y=prY) result = defaultdict(list) splits = int(np.ceil(pr.N / 5000)) for i, idx in enumerate(np.array_split(np.arange(pr.N), splits)): print("\rworking on split {i} of {n}...".format(i=i + 1, n=splits), end="") memberships = self.cluster_est.predict( self._pipe_transform(pr[idx])) result["memberships"] += [memberships] print() pr.update([(k, np.concatenate(v)) for k, v in result.items()]) return pr
def predict(self, D, coverage=0.95): prY = self.network_shim().predict(D)['Y'] pr = Frame(D.items(), Y=prY) alpha = 1.0 - coverage # opt: SQP or Cobyla quantiles = self.est.compute_quantile(self._pipe_transform(pr), np.array([alpha/2, 1.0-alpha/2]), do_optim=True, opt_method='Cobyla') # print(quantiles.shape) # (N, 2) return prY, quantiles[:,0], quantiles[:,1]
def fit(self, tr, va): network = self.network_shim().fit(tr, va) self.ref = Frame(va.items(), Y=network.predict(va)['Y']) # maybe include the tr set, if prY isn't part of the input # self.ref = Frame([(k, np.concatenate([ref[k],tr[k]])) for k in ref.keys()]) self._pipe_fit(self.ref, self.keys) self.est.fit(self._pipe_transform(self.ref), self.ref['T']) return self
def fit(self, trD, vaD): network = self.network_shim().fit(trD, vaD) data = [] if "tr" in self.sets: data += [(trD, np.squeeze(network.predict(trD)['Y']))] if "va" in self.sets: data += [(vaD, np.squeeze(network.predict(vaD)['Y']))] self.ref = Frame([(k, np.concatenate([D[k] for D, prY in data])) for k in trD.keys()]) self.ref['Y'] = np.squeeze(np.concatenate([prY for D, prY in data])) self.ref_results = Frame(R=self.ref['T'] - self.ref['Y']) # residuals self._pipe_fit(self.ref, self.keys) self.knn = NearestNeighbors(n_neighbors=self.neighbors, n_jobs=self.n_jobs) self.knn.fit(self._pipe_transform(self.ref)) return self
def _predict(self, D, ops_dict, is_validation_run=False): if self._initialize: raise RuntimeError('Session is uninitialized') if self._session._closed: raise RuntimeError('Session has already been closed.') # need the original set for D, not prefabricated batches if isinstance(D, list): keys = D[0].keys() data = Frame((k, np.concatenate([d[k] for d in D])) for k in keys) elif not isinstance(D, Frame) and isinstance(D, dict): data = Frame(**D) else: data = D prE = ops_dict["E"] Es = [] I = np.arange(len(data)) # predict once for the function value using global normalization # chunks = self._split(D, 1000) # E = np.concatenate([self._session.run([prE], feed_dict=self._fill_feed(batch, is_training=False))[0] for batch in chunks]) # Es += [E] # predict many times for the uncertainty using additional batch normalization if not is_validation_run: for i in range(self._n_predictions): I = np.random.permutation(I) chunks = self._split(D[I], self._prediction_batch_size) E = np.concatenate([ self._session.run([prE], feed_dict=self._fill_feed( batch, is_training=True))[0] for batch in chunks ]) Es += [E[np.argsort(I)]] return dict( E=np.mean(Es, axis=0), U=np.var(Es, axis=0), )
def load_simple(filename="data/benzene.npz", skip=10e3, N=None, sizes=[10e3, 2e3, 8e3], verbose=True): N = N or sum(sizes) skip, N = int(np.round(skip)), int(np.round(N)) data_raw = Frame(np.load(filename)) data_raw['index'] = np.arange(len(data_raw)) data_raw = data_raw[skip:] # leak information about your testset, decrease your rmse! data_raw = data_raw[np.random.choice(np.arange(len(data_raw)), size=N, replace=False)] if verbose: print(data_raw.keys(), len(data_raw["E"])) preproc = MolecularDataPreprocessor(expand_method=None, target_key="E", dont_norm='R Z E index'.split()) sets = split_sets(data_raw['D E R Z index'.split()].copy(), sizes, start=0) preproc = preproc.fit(sets[0]) sets = map(preproc.transform, sets) sets = [Frame(s.items()) for s in sets] if verbose: print([len(s) for s in sets], 'sets') for k,v in sets[0].items(): for s in sets: print(k, s[k].shape, s[k].mean(), s[k].std()) return preproc, sets
def fit(self, tr, va): network = self.network_shim().fit(tr, va) self.ref = Frame(va.items(), Y=network.predict(va)['Y']) self._pipe_fit(self.ref, self.keys) print("fitting normal", end="\r") self.est.fit(self._pipe_transform(self.ref), self.ref['T']) print("fitting lower ", end="\r") self.est_min.fit(self._pipe_transform(self.ref), self.ref['T']) print("fitting upper", end="\r") self.est_max.fit(self._pipe_transform(self.ref), self.ref['T']) print("fitting done") return self
def predict(self, D, coverage=None): if coverage is not None: print( "WARNING: GradientBoostingUncertainty requires the desired coverage as an init parameter, otherwise it will be ignored." ) prY = self.network_shim().predict(D)['Y'] pr = Frame(D.items(), Y=prY) prY = self.est.predict(self._pipe_transform(pr)) minY = self.est_min.predict(self._pipe_transform(pr)) maxY = self.est_max.predict(self._pipe_transform(pr)) return prY, minY, maxY
r=fps, b='500000k', g=fps * (end_sec - start_sec), pix_fmt='yuv420p', keyint_min=999999, ss=start_sec, to=end_sec).run(overwrite_output=True)) in_file = open(temp_video, 'rb') out_file = open(output_mosh, 'wb') # because we used 'rb' above when the file is read the output is in byte format instead of Unicode strings in_file_bytes = in_file.read() # 0x30306463 which is ASCII 00dc signals the end of a frame. '0x' is a common way to say that a number is in hexidecimal format. frames = [Frame(d) for d in in_file_bytes.split(bytes.fromhex('30306463'))] def write_frame(frame): out_file.write(frame + bytes.fromhex('30306463')) pic1, err1 = (ffmpeg.input(input_png, s='{}x{}'.format(width, height)).trim( start_frame=0, end_frame=1).output('pipe:', format='avi', pix_fmt='yuv420p', s='{}x{}'.format(width, height)).run(capture_stdout=True)) png_frame = pic1.split(bytes.fromhex('30306463'))[1]
def predict(self, data): cache = Frame(**np.load(self.cachefile)) cacheindex = cache['index'] # should be sorted I = np.argsort(cacheindex) return cache[I[np.searchsorted(cacheindex[I], data['index'])]]
def fit(self, trD, vaD): network = self.network_shim().fit(trD, vaD) data = [] if "tr" in self.sets: data += [(trD, np.squeeze(network.predict(trD)['Y']))] if "va" in self.sets: data += [(vaD, np.squeeze(network.predict(vaD)['Y']))] self.ref = Frame([(k, np.concatenate([D[k] for D, prY in data])) for k in trD.keys()]) self.ref['Y'] = np.squeeze(np.concatenate([prY for D, prY in data])) self.ref_results = Frame(R=self.ref['T'] - self.ref['Y']) # residuals self._pipe_fit(self.ref, self.keys) n_clusters = int((len(self.ref) / 2)**0.5) # heuristic from pevec2013 a = 1.0 - self.coverage R = self.ref_results["R"] if self.method == 'kmeans' or self.method == 'kmeans-mahalanobis': self.cluster_est = KMeans(n_clusters=n_clusters, n_jobs=self.n_jobs) memberships = self.cluster_est.fit_predict( self._pipe_transform(self.ref)) self.ref_results["memberships"] = memberships self.clusterbounds = np.empty((n_clusters, 2)) for c in range(n_clusters): cdf, ppf = ecdf(R[memberships == c]) self.clusterbounds[c, :] = ppf(a / 2), ppf(1 - a / 2) elif self.method == "cmeans": self.cluster_est = CMeans(n_clusters=n_clusters, error=1e-5, max_iter=10 * self.ref.N) memberships = self.cluster_est.fit_predict( self._pipe_transform(self.ref)) # (N, clusters) asc_residuals = np.argsort(R) # self.clusterbounds = np.empty((n_clusters, 2)) # for c in range(n_clusters): # cumsum = np.cumsum(memberships.T[c][asc_residuals]) # lb = np.argwhere(cumsum < (a/2)*cumsum[-1])[-1,0] # ub = np.argwhere(cumsum > (1-a/2)*cumsum[-1])[0,0] # self.clusterbounds[c,:] = R[asc_residuals][lb], R[asc_residuals][ub] cumsum = np.cumsum(memberships[asc_residuals], axis=0).T # (clusters, N) self.clusterbounds = np.vstack([ R[asc_residuals][np.argmax(cumsum >= (a / 2) * cumsum[:, -1:], axis=1)], R[asc_residuals] [np.argmin(cumsum <= (1 - a / 2) * cumsum[:, -1:], axis=1) - 1] ]).T elif self.method == 'density': self.cluster_est = GaussianDensity(n_jobs=self.n_jobs) self.cluster_est.fit(self._pipe_transform(self.ref)) density = self.cluster_est.predict(self._pipe_transform(self.ref)) self.max_density = density.max() else: raise ValueError("Method {} is not a valid option.".format( self.method)) return self
def predict(est, data_dir=None, calibrate=False, coverage=0.95, pretrained=False, savename=None): ''' predicts ''' print(est, "(loaded estimator)") print("loading datasets") if not isinstance(data_dir, str): # directory containing tr, va, te{i} files raise ValueError( "data must a path to directory containing tr.npz va.npz te.npz files" ) # load data from file sets = dict() for name in "tr va te de special".split(): filename = os.path.join(data_dir, "{}.npz".format(name)) if os.path.exists(filename): print("loading file", filename) sets[name] = Frame(**np.load(filename)) # fit if pretrained: print("pretrained model, training skipped") else: print("fitting...") tstart = time.time() est.fit(sets['tr'], sets['va']) mins = (time.time() - tstart) / 60 print("fit in {}h{}m".format(int(mins / 60), int(mins % 60))) if calibrate: prY, lo, hi = est.predict(sets['va'], coverage=coverage) factor = est.calibrate( sets['va']['T'], prY, (hi - lo) / 2 ) # factor is a global scaling factor, but pointwise scaling will be used instead # predict print("predicting and saving...") has_index = 'index' in sets['tr'] and 'index' in sets['va'] # predict and save reference sets prs = dict() for dname, d in sets.items(): print("predicting", dname.upper(), "for coverage", coverage) prY, minY, maxY = est.predict(d, coverage=coverage) pr = dict(T=d["T"], Y=prY, minY=minY, maxY=maxY) print(((pr['T'] - pr['Y'])**2).mean()**0.5, 'rmse') prs[dname] = Frame(pr) if has_index: pr["index"] = d["index"].copy() path = os.path.join(data_dir, savename) os.makedirs(path, exist_ok=True) np.savez(os.path.join(path, "pr_{}.npz".format(dname)), **pr) if calibrate: # save calibrated predictions, if desired prY, minY, maxY = est.scale(pr['T'], pr['Y'], pr['minY'], pr['maxY'], coverage=coverage) pr = dict(pr.items(), Y=prY, minY=minY, maxY=maxY) dname_cp = "{}_{:02d}cp".format(dname, int(100 * coverage)) prs[dname_cp] = Frame(pr) np.savez(os.path.join(path, "pr_{}.npz".format(dname_cp)), **pr) print("predict done") return prs
def predict_raw(self, D, skip_closest=0): """ returns dictionary of raw results """ network = self.network_shim() prY = np.squeeze(network.predict(D)['Y']) pr = Frame(D.items(), Y=prY) if not hasattr(self, "_pipe"): raise RuntimeError( "Estimator is not fitted yet. The neural network shim should decide if training is necessary or not." ) X = self._pipe_transform(pr) refX = self._pipe_transform(self.ref) if self.method == 'mahalanobis': # takes a long time if not self.silent: print("caching inverse covariance matrix ...") self.mahalanobis_uncertainty(X[:2], refX[:2], refX, n_jobs=self.n_jobs) k = self.neighbors knn = self.knn knn.set_params(n_neighbors=k) result = defaultdict(list) splits = int(np.ceil(len(pr) / 5000)) for i, idx in enumerate(np.array_split(np.arange(len(pr)), splits)): if not self.silent: print("working on split {i} of {n}...".format(i=i + 1, n=splits), end="\r") distances, neighbors = (A[:, skip_closest:] for A in knn.kneighbors(X[idx])) knnR = self.ref_results['R'][ neighbors] # signed residuals in neighborhood knnY = self.ref['Y'][neighbors] # predictions in neighborhood knnT = self.ref['T'][neighbors] # labels (targets) in neighborhood # print(knnY.shape, "knnY shape") # (n_points_in_split, n_neighbors) result['avgDist'] += [distances.mean(axis=1)] result['meanR'] += [knnR.mean(axis=1)] # for stdR result['meanY'] += [knnY.mean(axis=1)] result['meanT'] += [knnT.mean(axis=1)] result['stdR'] += [ (((knnR - result['meanR'][-1][:, None])**2).sum(axis=1) / (k - 1))**0.5 ] result['absR'] += [np.abs(knnR).mean(axis=1)] result['varT'] += [np.var(knnT, axis=1, ddof=1)] # cs_knnV result['stdT'] += [np.var(knnT, axis=1, ddof=1)**0.5] # cs_knnV**0.5 result['sqR'] += [(knnR**2).mean(axis=1)] # cs_knnE if self.method == 'mahalanobis': # takes a long time, exceeds memory limits if not split like this VI = self._mahalanobis_params['VI'] # precomputed above dists = Parallel(self.n_jobs, 'threading', verbose=0)( delayed(run)(self._mahalanobis_uncertainty_job, X[idx[i]][None, :], refX[neighbors[i]], VI=VI) for i in range(len(idx))) result['mn-dist'] += [np.concatenate(dists)] if not self.silent: print() pr.update([(k, np.concatenate(v)) for k, v in result.items()]) return pr
def _predict(network, name, data): data = Frame(data) est = network(name=name) pr = est.predict(data) return pr["Y"]
def create_instances(data_dir="trained_models/benzene/", folder_prefix="benzene/", n_jobs=1, only_est=None, gpus=(1, ), mu_max=10): ''' returns a list of (name,constructor) tuples of uncertainty estimators to train and predict with in the evaluation loop. ''' fp = folder_prefix if fp[:len("trained_models")] == "trained_models": # all models are implied to be saved under trained_models/, no need to # explicitely prefix it fp = fp[len("trained_models/"):] instances = [] dtnn = lambda: DTNNShim(name=os.path.join(fp, "dtnn")) normal_ppf = scipy.stats.norm.ppf t_ppf = lambda df: lambda x: scipy.stats.t.ppf(x, df=df) t50_ppf = t_ppf(50) ensemble_members = 24 days = ceil(ensemble_members / sum(gpus)) * 72 / 24 print("ensemble training will take approx.", days, "days") tr = Frame(**load(os.path.join(data_dir, "tr.npz"))) params["tr_mean_std"] = Frame( **load(os.path.join(data_dir, "tr_mean_std.npz"))) params["point"] = tr[0] params["n_tr_points"] = len(tr) params["mu_max"] = mu_max # basic baseline, nothing should perform worse than these instances += [("dummy-const", lambda: DummyUncertainty(dtnn, method="const"))] # these rely on the points being sorted (from one trajectory) instances += [("dummy-f'", lambda: DummyUncertainty(dtnn, method="f'"))] instances += [("dummy-f''", lambda: DummyUncertainty(dtnn, method="f''"))] instances += [("dummy-1/f'", lambda: DummyUncertainty(dtnn, method="1/f'")) ] instances += [("dummy-1/f''", lambda: DummyUncertainty(dtnn, method="1/f''"))] # dropout variance ~ gal2015 instances += [('doho60', lambda: UncertaintyWrapper( HoShim60, ppf=t50_ppf, name=os.path.join(fp, 'doho60')))] instances += [('doho70', lambda: UncertaintyWrapper( HoShim70, ppf=t50_ppf, name=os.path.join(fp, 'doho70')))] instances += [('doho80', lambda: UncertaintyWrapper( HoShim80, ppf=t50_ppf, name=os.path.join(fp, 'doho80')))] instances += [('doho90', lambda: UncertaintyWrapper( HoShim90, ppf=t50_ppf, name=os.path.join(fp, 'doho90')))] instances += [('dohe60', lambda: UncertaintyWrapper( HeShim60, ppf=t50_ppf, name=os.path.join(fp, 'dohe60')))] instances += [('dohe70', lambda: UncertaintyWrapper( HeShim70, ppf=t50_ppf, name=os.path.join(fp, 'dohe70')))] instances += [('dohe80', lambda: UncertaintyWrapper( HeShim80, ppf=t50_ppf, name=os.path.join(fp, 'dohe80')))] instances += [('dohe90', lambda: UncertaintyWrapper( HeShim90, ppf=t50_ppf, name=os.path.join(fp, 'dohe90')))] # conditional density estimation [bishop1995] instances += [("density", lambda: UncertaintyWrapper( DensityShim, ppf=normal_ppf, name=os.path.join(fp, "density")))] instances += [("density_full", lambda: UncertaintyWrapper( FullDensityShim, ppf=normal_ppf, name=os.path.join(fp, "density_full")) )] instances += [ ("density_stepped", lambda: UncertaintyWrapper(SteppedDensityShim, ppf=normal_ppf, name=os.path.join(fp, "density_stepped"))) ] instances += [("density_half", lambda: UncertaintyWrapper( HalfDensityShim, ppf=normal_ppf, name=os.path.join(fp, "density_half")) )] instances += [("mixture", lambda: UncertaintyWrapper( MixtureShim, ppf=normal_ppf, name=os.path.join(fp, "mixture")))] instances += [("mixture_half", lambda: UncertaintyWrapper( HalfMixtureShim, ppf=normal_ppf, name=os.path.join(fp, "mixture_half")) )] instances += [("mixture_full", lambda: UncertaintyWrapper( FullMixtureShim, ppf=normal_ppf, name=os.path.join(fp, "mixture_full")) )] instances += [ ("mixture_stepped", lambda: UncertaintyWrapper(SteppedMixtureShim, ppf=normal_ppf, name=os.path.join(fp, "mixture_stepped"))) ] instances += [("mixture_re", lambda: UncertaintyWrapper( ReMixtureShim, ppf=normal_ppf, name=os.path.join(fp, "mixture_re")))] # print("skipping quantile regression") # if False: # instances += [("quantile-regression", lambda: GradientBoostingUncertainty(DTNNShim, keys="DY", n_estimators=100, max_depth=300, learning_rate=.1, min_samples_leaf=9, min_samples_split=9))] # est = lambda: QuantileForestUncertainty(DTNNShim, keys="DY", n_estimators=500, max_depth=30, n_jobs=n_jobs) # # let's try it how the org. authors suggested as well (e.i. not include prY) # instances += [("quantile-regression-no-y", lambda: GradientBoostingUncertainty(DTNNShim, keys="D", n_estimators=100, max_depth=300, learning_rate=.1, min_samples_leaf=9, min_samples_split=9))] # instances += [("quantile-forest-no-y", lambda: QuantileForestUncertainty(DTNNShim, keys="D", n_estimators=500, max_depth=30, n_jobs=n_jobs))] # fuzzy c-means clustering [pevec2013 shrestha2006] # LOCAL NEIGHBORHOOD # instances += [("fuzzy-cmeans", lambda: ClusterUncertainty(dtnn, method="cmeans", coverage=cp, n_jobs=n_jobs))] # instances += [("kmeans", lambda: ClusterUncertainty(dtnn, method="kmeans", coverage=cp, n_jobs=n_jobs))] # mahalanobis distance to data set center [toplak2014] # instances += [("kmeans-mahalanobis", lambda: ClusterUncertainty(dtnn, method="kmeans-mahalanobis", coverage=cp, n_jobs=n_jobs))] # density based estimate [bosnic2008] # instances += [("gaussian-clusters", lambda: ClusterUncertainty(dtnn, method="density", coverage=cp, n_jobs=n_jobs))] # CONFIVE (Variance of Error) [briesemeister2012] # (self, network_shim, method="label_var", sets="va", keys="", neighbors=20, n_jobs=1, silent=False): # quantile will have to be provided at evaluation time. # it makes sense to generate quantiles when the desired coverage is known. instances += [("knn-mse", lambda: KNNUncertainty( dtnn, method="mse", sets='va', keys='Y', neighbors=8, n_jobs=n_jobs))] instances += [("knn-mseq", lambda: KNNUncertainty( dtnn, method="mse", sets='tr', keys='', neighbors=10, n_jobs=n_jobs))] instances += [("knn-dev", lambda: KNNUncertainty( dtnn, method="dev", sets='va', keys='Y', neighbors=716, n_jobs=n_jobs)) ] instances += [("knn-avgdist", lambda: KNNUncertainty(dtnn, method="avgdist", sets='tr', keys='Y', neighbors=34, n_jobs=n_jobs))] instances += [("knn-vary", lambda: KNNUncertainty( dtnn, method="label_var", sets='va', neighbors=2653, n_jobs=n_jobs))] # instances += [("knn-mahalanobis", lambda: KNNUncertainty(dtnn, method="mahalanobis", n_jobs=n_jobs))] # instances += [("knn-absolute-deviation-from-avg-label", lambda: KNNUncertainty(dtnn, method="absDev", n_jobs=n_jobs))] # instances += [("knn-mae", lambda: KNNUncertainty(dtnn, method="mae", n_jobs=n_jobs))] # instances += [("knn-rmse", lambda: KNNUncertainty(dtnn, method="rmse", n_jobs=n_jobs))] # avgDist [briesemeister2012] # instances += [("knn-avg-dist", lambda: KNNUncertainty(dtnn, method="avgDist", n_jobs=n_jobs))] # ENSEMBLES # bagging by example pairs and residuals [tibshirani1995] instances += [( "ensemble-pairs", lambda: EnsembleUncertainty(DTNNShim, m=ensemble_members, method="pairs", balancing=False, folder=os.path.join(fp, "ensemble-pairs/"), gpus=gpus))] instances += [("ensemble-pairs-12", lambda: EnsembleUncertainty( DTNNShim, m=12, method="pairs", balancing=False, folder=os.path.join(fp, "ensemble-pairs-12/"), gpus=gpus))] instances += [("ensemble-pairs-6", lambda: EnsembleUncertainty( DTNNShim, m=6, method="pairs", balancing=False, folder=os.path.join(fp, "ensemble-pairs-6/"), gpus=gpus))] instances += [("ensemble-pairs-3", lambda: EnsembleUncertainty( DTNNShim, m=3, method="pairs", balancing=False, folder=os.path.join(fp, "ensemble-pairs-3/"), gpus=gpus))] instances += [("ensemble-residuals", lambda: EnsembleUncertainty( DTNNShim, m=ensemble_members, method="residuals", balancing=False, folder=os.path.join(fp, "ensemble-residuals/"), gpus=gpus))] instances += [("ensemble-residuals-12", lambda: EnsembleUncertainty( DTNNShim, m=12, method="residuals", balancing=False, folder=os.path.join(fp, "ensemble-residuals-12/"), gpus=gpus))] instances += [("ensemble-residuals-6", lambda: EnsembleUncertainty( DTNNShim, m=6, method="residuals", balancing=False, folder=os.path.join(fp, "ensemble-residuals-6/"), gpus=gpus))] instances += [("ensemble-residuals-3", lambda: EnsembleUncertainty( DTNNShim, m=3, method="residuals", balancing=False, folder=os.path.join(fp, "ensemble-residuals-3/"), gpus=gpus))] # balancing doesn't yield better estimates, unfortunately instances += [("ensemble-pairs-balanced", lambda: EnsembleUncertainty( DTNNShim, m=ensemble_members, k=ensemble_members // 6, method="pairs", balancing=True, folder=os.path.join(fp, "ensemble-pairs-balanced/"), gpus=gpus))] instances += [("ensemble-residuals-balanced", lambda: EnsembleUncertainty( DTNNShim, m=ensemble_members, k=ensemble_members // 6, method="residuals", balancing=True, folder=os.path.join(fp, "ensemble-residuals-balanced/"), gpus=gpus))] if only_est is not None: estimators = dict(instances) if only_est not in estimators.keys(): raise ValueError( "No such estimator instance: '{}'\nChoose from \n {}".format( only_est, "\n ".join(sorted(estimators.keys())))) else: return [(only_est, estimators[only_est])] return instances