def station_prediction(station, nframes=12, interval=300, prev_frames=4): frames = radardb.get_latest(station, prev_frames) if frames == None or len(frames) < 2: logging.info("Couldn't get frames from radardb. station: %s", station) logging.debug("Trying to get frames directly from NOAA FTP...") for i in range(3): logging.debug("Attempt %d of 3", i + 1) frames = pullframes.get_latest(station, prev_frames) if frames and len(frames) > 1: break if frames == None or len(frames) < 2: logging.error("Couldn't get frames from either source. Giving up.") return None z = np.array([f['z'] for f in frames]) frame_times = np.array([f['unix_time'] for f in frames]) #Sort in ascending time order sa = np.argsort(frame_times) frame_times = frame_times[sa] z = z[sa] mods = [ distance.DistanceInner(), distance.DistanceOuter(), diffusion.DiffusionPredictor(), warp.Warp((50, 50), poly_deg=4, reg_param=0.01) ] ensemble_mod = ensemble.Ensemble(mods, coeffs, coeff_times) output_times = np.array([1.0 * interval * i for i in range(nframes)]) rel_times = frame_times - frame_times[-1] print rel_times prob = ensemble_mod.predict(rel_times, z, output_times) pred = {} pred['prob'] = prob pred['prev_z'] = z pred['prev_t'] = rel_times pred['extent'] = frames[0]['extent'] pred['utmzone'] = frames[0]['utmzone'] pred['interval'] = interval pred['start_time'] = frame_times[-1] return pred
def get_ensm_measures(model_names, n_models_list, test_images, test_labels): ensm_measures = defaultdict(list) for n_models in n_models_list: print("############ ensm {}".format(n_models)) model_name_subset = model_names[:n_models] print(model_name_subset) wrapped_models = [ ensemble.KerasLoadsWhole(name, pop_last=True) for name in model_name_subset ] ensm_model = ensemble.Ensemble(wrapped_models) evaluation_result = evaluation.calc_classification_measures( ensm_model, test_images, test_labels, wrapper_type='ensemble') for measure, value in evaluation_result.items(): ensm_measures[measure].append(value) return ensm_measures
#tf.config.experimental.set_memory_growth(physical_devices[0], True) # Example usage if __name__ == "__main__": # Note: in below example, it is assumed that there is a trained Keras model # saved with saveload.save_model, saved using the name 'cnn' # Wrap models model1 = ensemble.KerasLoadsWhole(model_load_name="cnn", name="cnn_1") model2 = ensemble.KerasLoadsWhole(model_load_name="cnn", name="cnn_2") model3 = ensemble.KerasLoadsWhole(model_load_name="cnn", name="cnn_3") model4 = ensemble.KerasLoadsWhole(model_load_name="cnn", name="cnn_4") # Build ensemble cnn_models = [model1, model2, model3, model4] cnn_ensemble = ensemble.Ensemble(cnn_models) print(cnn_ensemble) # Load data (train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data() # Predict with ensemble ensemble_preds = cnn_ensemble.predict(test_images) print("Ensemble preds shape: {}".format(ensemble_preds.shape)) # Retrieve models from ensemble cnn_1 = cnn_ensemble.get_model("cnn_1") cnn_1_preds = cnn_1.predict(test_images) print("CNN preds shape: {}".format(cnn_1_preds.shape))
ENSM_MODEL_NAME, ENSM_N_MODELS = 'vgg_a', 100 ENDD_MODEL_NAME, ENDD_BASE_MODEL = 'endd_vgg_cifar10_a', 'vgg' ENDD_AUX_MODEL_NAME, ENDD_AUX_BASE_MODEL = 'new_cifar10_vgg_endd_aux_0_TEMP=10', 'vgg' # Choose dataset DATASET_NAME = 'cifar10' OUT_DATASET_NAME = 'lsun' # Prepare ENSM model ensemble_model_names = saveload.get_ensemble_model_names() model_names = ensemble_model_names[ENSM_MODEL_NAME][ DATASET_NAME][:ENSM_N_MODELS] models = [ ensemble.KerasLoadsWhole(name, pop_last=True) for name in model_names ] ensm_model = ensemble.Ensemble(models) # Prepare ENDD model endd_model = endd.get_model(ENDD_BASE_MODEL, dataset_name=DATASET_NAME, compile=True, weights=ENDD_MODEL_NAME) # Prepare ENDD+AUX model endd_aux_model = endd.get_model(ENDD_AUX_BASE_MODEL, dataset_name=DATASET_NAME, compile=True, weights=ENDD_AUX_MODEL_NAME) # Load data _, (in_images, _) = datasets.get_dataset(DATASET_NAME)
physical_devices = tf.config.experimental.list_physical_devices('GPU') tf.config.experimental.set_memory_growth(physical_devices[0], True) from utils import evaluation from utils import datasets from utils import saveload from models import ensemble ENSEMBLE_NAME = 'basic_cnn' DATASET_NAME = 'cifar10' # Load ensemble model ensemble_model_names = saveload.get_ensemble_model_names() model_names = ensemble_model_names[ENSEMBLE_NAME][DATASET_NAME][:3] models = [ensemble.KerasLoadsWhole(name) for name in model_names] ensm = ensemble.Ensemble(models) ensm_wrapper_type = 'ensemble' # Load individual model ind = saveload.load_tf_model(model_names[0]) ind_wrapper_type = 'individual' # Load data _, (test_images, test_labels) = datasets.get_dataset(DATASET_NAME) # Preprocess data test_labels = test_labels.reshape(-1) # Calculate measures ensm_measures = evaluation.calc_classification_measures( ensm, test_images, test_labels, wrapper_type=ensm_wrapper_type)
measures = {'endd': defaultdict(list), 'ensm': defaultdict(list)} for n_models in N_MODELS_LIST: # Get model names if SAMPLE_ENSEMBLE_MODELS: model_name_subset = np.random.choice(model_names, n_models) else: model_name_subset = model_names[:n_models] #model_name_subset = ['vgg_cifar10_cifar10_25'] print("##############", model_name_subset) wrapped_models = [ ensemble.KerasLoadsWhole(name, pop_last=True) for name in model_name_subset ] # Build ensemble ensm_model = ensemble.Ensemble(wrapped_models) #import pdb; pdb.set_trace(); #ensm_measures = evaluation.calc_classification_measures(ensm_model, # test_images, # test_labels, # wrapper_type='ensemble') #print("############# Ensemble Measures") #for measure, value in ensm_measures.items(): # print("{}={}".format(measure, value)) # measures['ensm'][measure].append(value) #print() # Train ENDD if SAMPLE_ENSEMBLE_MODELS: save = True load = False
for i in range(n_ens): io_util.restore_checkpoint( models[i], optimizers[i], args.exp_dir, i=i, device=torch.device("cuda"), filename=args.restore_name, ) models[i].eval() imgs = [dataloaders[args.split].dataset[i][0] for i in range(args.n)] if args.ensemble_type == "real": ens = ensemble.Ensemble(models) else: ens = ensemble.MCEnsemble(models, n=args.ensemble) eig_records = [] if args.batches_to_test is not None: batches_to_test = list(args.batches_to_test) else: batches_to_test = list( range(args.max_batches, args.min_batches, -args.batches_step)) epochs_to_test = args.epochs_to_test for n_epochs in epochs_to_test: for n_batches in batches_to_test:
def main(args): """ The main training script. :param args: an argparse.Namespace generated from io_util.parse_args. :returns: None """ # If using a real ensemble, the actual number of models is --ensemble; # otherwise we only train 1 model (and construct a "fake" ensemble later) if args.ensemble_type == "real": n_ens = args.ensemble else: n_ens = 1 # Check if resumable resumable = args.resume and all( io_util.is_resumable(args.exp_dir, i=i) for i in range(n_ens)) if resumable: logger.warning( "When resuming, we don't support multiple start epochs...if --warm_start, models may get trained for an extra epoch or two" ) os.makedirs(args.exp_dir, exist_ok=True) if not resumable: io_util.save_args(args, args.exp_dir) # Seeds torch.manual_seed(args.seed) # Cuda device if args.cuda and args.cuda_id != -1: torch.cuda.set_device(args.cuda_id) dataloaders, vocab = builders.build_dataloaders(args) models, optimizer_func, losses = builders.build_ensemble(args, vocab) optimizers = optimizer_func() # Initialize / load checkpoint all_metrics = [] if resumable: for i in range(n_ens): io_util.restore_checkpoint(models[i], optimizers[i], args.exp_dir, i=i) metrics = io_util.load_metrics(args.exp_dir, i=i) all_metrics.append(metrics) # FIXME: Resuming isn't quite correct yet: # (1) need to restore the dataloaders based on existing EIG (make that # a func used by al.acq) # (2) Reload eig_records, all_eig_details start_epoch = min(m["current_epoch"] for m in all_metrics) + 1 if os.path.exists(os.path.join(args.exp_dir, "ens_metrics.json")): ens_metrics = io_util.load_metrics(args.exp_dir, filename="ens_metrics.json") else: ens_metrics = util.init_metrics(ensemble=True) logger.info("Resuming from epoch {}".format(start_epoch)) else: for i in range(n_ens): metrics = util.init_metrics(ensemble=False) all_metrics.append(metrics) start_epoch = 0 ens_metrics = util.init_metrics(ensemble=True) # Pooling ( if args.eig_workers == 0 or args.eig_method == "random": pool_ctx = util.FakePool else: pool_ctx = mp.Pool all_eig_details = defaultdict(list) eig_records = [] if args.warm_start or args.acquisition == "eig_y": # Keep the same EIG optimizer if warm start OR if acquire_y (to prevent waste) # Only applies to acquire_xy; in acquire_y, new optimizer for each image eig_config = builders.build_eig_estimator( args, vocab, model_batch_size=args.qhy_batch_size) else: eig_config = None # Loop through acquisition steps for acq_step in range(start_epoch, args.epochs): # ==== INDIVIDUAL MODEL TRAINING ==== for i, (model, optimizer, loss, metrics) in enumerate( zip(models, optimizers, losses, all_metrics)): if args.no_train: break # Train on seed set - inner loop best_model_state_dict = None val_evals_since_improvement = 0 if args.dataset == "wmt14": best_val_metrics = { "loss": float("inf"), "top1": 0.0, "ppl": float("inf"), } rf = run_mt else: best_val_metrics = { "loss": float("inf"), "top5": 0.0, "top1": 0.0 } rf = run for epoch in trange( 0, args.inner_epochs, desc= f"acq step {acq_step}/{args.epochs} (size {len(dataloaders['seed'].dataset)}): train model {i}", ): train_metrics = rf("seed", epoch, model, optimizer, loss, dataloaders, args, i) util.print_metrics_progress("train", epoch, train_metrics, i, logger=logger) if (epoch + 1 ) % args.val_interval == 0: # since epochs are 0-indexed val_metrics = rf("val", epoch, model, optimizer, loss, dataloaders, args, i) util.print_metrics_progress("val", epoch, val_metrics, i, logger=logger) if val_metrics["top1"] > best_val_metrics["top1"]: best_val_metrics = val_metrics best_val_metrics["epoch"] = epoch best_model_state_dict = copy.deepcopy( model.state_dict()) val_evals_since_improvement = 0 else: val_evals_since_improvement += 1 if val_evals_since_improvement >= args.val_patience: logger.info( f"Stopped at inner epoch {epoch}; no improvement after {val_evals_since_improvement} evals (best top1: {best_val_metrics['top1']:f})" ) break if best_model_state_dict is None: # No early stopping was performed, val was never evaluated best_model_state_dict = copy.deepcopy(model.state_dict()) # Restore best model model.load_state_dict(best_model_state_dict) # Update your metrics, prepending the split name. for metric, value in train_metrics.items(): metrics["train_{}".format(metric)].append(value) for metric, value in best_val_metrics.items(): metrics["val_{}".format(metric)].append(value) metrics["current_epoch"] = acq_step # Check if there was an improvement is_best = best_val_metrics["top1"] > metrics["best_top1"] if is_best: metrics["best_top1"] = best_val_metrics["top1"] metrics["best_loss"] = best_val_metrics["loss"] metrics["best_epoch"] = acq_step if is_best: metrics["epochs_since_improvement"] = 0 else: metrics["epochs_since_improvement"] += 1 logger.info( f"Epochs since last improvement: {metrics['epochs_since_improvement']}" ) state_dict = { "state_dict": model.state_dict(), "optimizer": optimizer.state_dict(), } io_util.save_checkpoint(state_dict, is_best, args.exp_dir, i=i) # Save checkpoint at fixed intervals based on epoch if acq_step % args.save_interval == 0: io_util.save_checkpoint( state_dict, False, args.exp_dir, filename="{}.pth".format(acq_step), i=i, ) io_util.save_metrics(metrics, args.exp_dir, i=i) # ==== ENSEMBLE EVAL ==== if args.eval_ensemble: if args.dataset not in {"mnist"}: raise NotImplementedError if args.ensemble_type == "mc": raise NotImplementedError ens = ensemble.Ensemble(models) this_ens_metrics = eval_ensemble("val", ens, dataloaders, args, eval_batch_size=4) for metric, value in this_ens_metrics.items(): ens_metrics["ens_{}".format(metric)].append(value) ens_metrics["current_epoch"] = acq_step if this_ens_metrics["acc"] > ens_metrics["best_ens_acc"]: ens_metrics["best_ens_acc"] = this_ens_metrics["acc"] ens_metrics["best_ens_loss"] = this_ens_metrics["loss"] ens_metrics["best_ens_epoch"] = acq_step io_util.save_metrics(ens_metrics, args.exp_dir, filename="ens_metrics.json") util.print_ensemble_progress(acq_step, all_metrics, ens_metrics=ens_metrics, logger=logger) # ==== ACQUISITION ==== # Compute sizes (e.g. if they are % of dataset) current_pool_size = len(dataloaders["pool"].dataset) current_seed_size = len(dataloaders["seed"].dataset) dataset_size = current_pool_size + current_seed_size pool_size = util.compute_size(args.pool_size, dataset_size, remaining_dataset_size=current_pool_size) acq_size = util.compute_size(args.acq_size, dataset_size) if args.ensemble_type == "real": ens = ensemble.Ensemble(models).eval() else: assert len(models) == 1 ens = ensemble.MCEnsemble(models[0], n=args.ensemble).eval() if args.compare is not None: # Preselect a pool and evaluate EIG for each datum with all # available methods. Do selection according to the selected EIG pool = util.sample_idx(pool_size, RandomSampler(dataloaders["pool"].dataset)) # Save real run for last acq_methods = args.compare[:] acq_methods.append(args.acquisition) else: # Just do one pool = None acq_methods = [args.acquisition] # Possible to run some acq methods multiple times acq_method_counts = Counter(acq_methods) counts = Counter() for acq_method_i, acq_method in enumerate(acq_methods): # Only the last acq_method is not a dry run dry_run = acq_method_i != len(acq_methods) - 1 if acq_method in {"eig_xy", "batchbald", "batcheig"}: acqf = { "eig_xy": al.acquisition.acquire_xy, "batcheig": al.acquisition.acquire_batcheig, "batchbald": al.acquisition.acquire_batchbald, }[acq_method] eig_record, eig_details = acqf( acq_size, pool_size, ens, dataloaders, args, eig_config=eig_config, pool=pool, dry_run=dry_run, ) else: with pool_ctx(args.eig_workers) as mp_pool: eig_record, eig_details = al.acquisition.acquire_y( al.funcs.ACQ_FUNCS[acq_method], acq_size, pool_size, ens, dataloaders, args, mp_pool=mp_pool, eig_config=eig_config, pool=pool, dry_run=dry_run, epoch=acq_step, ) eig_record["epoch"] = acq_step if not dry_run: logger.info(eig_record) if acq_method_counts[acq_method] > 1: n_done = counts[acq_method] counts[acq_method] += 1 acq_method = f"{acq_method}_{n_done}" eig_record["method"] = acq_method eig_record["dry_run"] = dry_run eig_records.append(eig_record) for edname, edval in eig_details.items(): all_eig_details[edname].extend(edval) all_eig_details["method"].extend(acq_method for _ in eig_details["id"]) all_eig_details["dry_run"].extend(dry_run for _ in eig_details["id"]) all_eig_details["epoch"].extend(acq_step for _ in eig_details["id"]) eig_fname = os.path.join(args.exp_dir, "eig.csv") pd.DataFrame.from_records(eig_records).to_csv(eig_fname, index=False) eig_details_fname = os.path.join(args.exp_dir, "eig_details.csv") pd.DataFrame(all_eig_details).to_csv(eig_details_fname, index=False) # Done acquiring - reset parameters if not args.warm_start: ens.reset_parameters() optimizers = optimizer_func()