def __init__(self, n_states, n_actions): super(DQN, self).__init__() self.n_states = n_states self.n_actions = n_actions self.replay_memory = ExperienceReplay() self.model = create_model(self.n_states, n_actions).to(device) print(self.model) self.target_model = create_model(self.n_states, n_actions).to(device) self.target_model.eval() self.opt = torch.optim.RMSprop(self.model.parameters(), 2e-4) # self.loss = nn.SmoothL1Loss() self.loss = nn.MSELoss() self.target_counter = 0
def __init__(self, player): print(f"Running on device: {device.upper()}") self.n_states = 9 self.state = np.zeros(self.n_states, dtype=np.int) self.player = player self.model = create_model(self.n_states + 1, self.n_states).to(device) self.model.load_state_dict(torch.load('models/self_play_32000.pth')) self.model.eval()
def main( region, am_pm, start_year, end_year, config_path, model_path, output_path_prefix, batch_size, parallel, ): config = load_config(config_path) batch_size = batch_size or config.test_batch_size device = torch.device("cuda:0") model = create_model(UNet, config) if parallel: model = torch.nn.DataParallel(model) model = model.to(device) model.load_state_dict(torch.load(model_path)) model.eval() n = 0 tbdss = [] for y in range(start_year, end_year + 1): fname = ( f"../data/tb/gapfilled_{region}/tb_{y}_{am_pm}_{region}_filled.npy" ) ni = (dt.date(y + 1, 1, 1) - dt.date(y, 1, 1)).days ds = dh.LazyLoadFastUnloadNpyDataset(fname, ni) tbdss.append(ds) n += ni tbdss = torch.utils.data.ConcatDataset(tbdss) ptbss = torch.utils.data.Subset(tbdss, list(range(0, n - 1))) tbdss = torch.utils.data.Subset(tbdss, list(range(1, n))) zds = dh.RepeatDataset(np.load(config.dem_data_path), n - 1) ds = dh.GridsStackDataset([zds, ptbss, tbdss]) dloader = torch.utils.data.DataLoader(ds, batch_size=batch_size, shuffle=False, drop_last=False) water_mask = ~np.load(config.land_mask_path) pred, prob = get_predictions(dloader, model, water_mask, LABEL_OTHER, device) np.save(f"{output_path_prefix}_pred.npy", pred) np.save(f"{output_path_prefix}_prob.npy", prob)
def main(config_path, resumable=False): config = load_config(config_path) device = torch.device("cuda:0") model = create_model(UNet, config) if torch.cuda.device_count() > 1: print("Using DataParallel") model = DataParallel(model) model = model.to(device) opt = torch.optim.Adam( model.parameters(), lr=config.learning_rate, weight_decay=config.l2_reg_weight, ) sched = torch.optim.lr_scheduler.MultiStepLR(opt, config.lr_milestones, config.lr_step_gamma) grad_scaler = torch.cuda.amp.GradScaler() metric_checker = MetricImprovementChecker(MaxMetricTracker(-np.inf), MET_MCC) root_dir = config.run_dir snap_handler = SnapshotHandler(root_dir, model, opt, sched, metric_checker) resume = resumable and snap_handler.can_resume() if resume: print("Resuming") print(f"Initializing run dir: {root_dir}") train_summary, test_summary = init_run_dir(root_dir, config_path, resume=resume) last_epoch = 0 if resume: ( last_epoch, model, opt, sched, metric_checker, ) = snap_handler.load_full_snapshot() land_mask = torch.tensor(np.load(config.land_mask_path)) # # Training data # train_ds = build_full_dataset_from_config(config, land_mask, True) # # Test Data # test_ds, test_input_ds, test_era_ds = build_full_dataset_from_config( config, land_mask, False) train_dataloader = torch.utils.data.DataLoader( train_ds, batch_size=config.train_batch_size, shuffle=True, drop_last=config.drop_last, ) test_dataloader = torch.utils.data.DataLoader( test_ds, batch_size=config.test_batch_size, shuffle=False, drop_last=False, ) if not resume: snap_handler.take_model_snapshot() try: for epoch in range(last_epoch, config.epochs): train_summary.add_scalar("learning_rate", next(iter(opt.param_groups))["lr"], epoch) train( model, device, grad_scaler, train_dataloader, opt, land_mask, train_summary, epoch, config, ) loss, cm = test( model, device, grad_scaler, test_dataloader, opt, land_mask, test_summary, epoch, config, ) if metric_checker.check(cm): snap_handler.take_model_snapshot() log_metrics(test_summary, cm, epoch) sched.step() if epoch % 3 == 0 and epoch != 0: snap_handler.take_full_snapshot(epoch) except KeyboardInterrupt: print("Exiting training loop") except Exception as e: print(f"\n{e}") raise e finally: train_summary.close() test_summary.close() # Free up data for GC train_ds = None train_dataloader = None # Validation val_dates = load_dates(config.test_date_map_path) if config.use_prior_day: val_dates = val_dates[1:] model = snap_handler.load_best_model() model.eval() # Create and save predictions for test data print("Generating predictions") test_loader = torch.utils.data.DataLoader( test_input_ds, batch_size=config.test_batch_size, shuffle=False, drop_last=False, ) pred, raw_prob = get_predictions(test_loader, model, ~land_mask, LABEL_OTHER, device, config) predictions_path = os.path.join(root_dir, FNAME_PREDICTIONS) print(f"Saving predictions: '{predictions_path}'") np.save(predictions_path, pred) probabilities_path = os.path.join(root_dir, FNAME_PROBABILITIES) print(f"Saving probabilities: '{probabilities_path}'") np.save(probabilities_path, raw_prob) # Validate against ERA5 print("Validating against ERA5") test_era_ds = dataset_to_array(test_era_ds).argmax(1).squeeze() era_acc = validate_against_era5(pred, test_era_ds, val_dates, land_mask) # Validate against AWS DB db = get_db_session(config.db_path) lon_grid = np.load(config.lon_grid_path) lat_grid = np.load(config.lat_grid_path) aws_acc = validate_against_aws(pred, db, val_dates, lon_grid, lat_grid, land_mask, config) db.close() # Write accuracies acc_file = os.path.join(root_dir, "acc.csv") write_accuracies_file(val_dates, era_acc, aws_acc, acc_file) print(f"Era Mean Acc: {era_acc.mean()}") print(f"AWS Mean Acc: {aws_acc.mean()}") add_plots_to_run_dir(root_dir, config.do_val_plots, config.do_pred_plots)
fileHandler.setLevel(logging.DEBUG) logger_py.addHandler(fileHandler) repo = git.Repo(search_parent_directories=False) sha = repo.head.object.hexsha logger_py.debug('Git commit: %s' % sha) # Data train_dataset = config.create_dataset(cfg.data, mode='train') val_dataset = config.create_dataset(cfg.data, mode='val') val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=batch_size_val, num_workers=int(n_workers // 2), shuffle=False, collate_fn=tolerating_collate, ) # data_viz = next(iter(val_loader)) model = config.create_model(cfg, camera_model=train_dataset.get_cameras(), device=device) # Create rendering objects from loaded data cameras = train_dataset.get_cameras() lights = train_dataset.get_lights() # Optimizer if cfg.model.type == 'point': optimizer = optim.SGD( [p for p in model.parameters() if p.requires_grad], lr=lr) else: if cfg.renderer.is_neural_texture: optimizer = optim.Adam(model.parameters(), lr=lr) else: optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.99))
os.makedirs(generation_dir) batch_size = 1 vis_n_outputs = cfg['generation']['vis_n_outputs'] mesh_extension = cfg['generation']['mesh_extension'] # Dataset dataset = config.create_dataset(cfg.data, mode='test') test_loader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, num_workers=1, shuffle=False) img_size = args.img_size or dataset.resolution if isinstance(img_size, Number): img_size = (img_size, img_size) # Model model = config.create_model(cfg, mode='test', device=device, camera_model=dataset.get_cameras()).to(device=device) checkpoint_io = CheckpointIO(out_dir, model=model) checkpoint_io.load(cfg['test']['model_file']) # Generator generator = config.create_generator(cfg, model, device=device) torch.manual_seed(0) # Generate with torch.autograd.no_grad(): model.eval() # Generate meshes if not args.render_only: logger_py.info('Generating mesh...')