Exemple #1
0
 def __init__(self, n_states, n_actions):
     super(DQN, self).__init__()
     self.n_states = n_states
     self.n_actions = n_actions
     self.replay_memory = ExperienceReplay()
     self.model = create_model(self.n_states, n_actions).to(device)
     print(self.model)
     self.target_model = create_model(self.n_states, n_actions).to(device)
     self.target_model.eval()
     self.opt = torch.optim.RMSprop(self.model.parameters(), 2e-4)
     # self.loss = nn.SmoothL1Loss()
     self.loss = nn.MSELoss()
     self.target_counter = 0
Exemple #2
0
 def __init__(self, player):
     print(f"Running on device: {device.upper()}")
     self.n_states = 9
     self.state = np.zeros(self.n_states, dtype=np.int)
     self.player = player
     self.model = create_model(self.n_states + 1, self.n_states).to(device)
     self.model.load_state_dict(torch.load('models/self_play_32000.pth'))
     self.model.eval()
Exemple #3
0
def main(
    region,
    am_pm,
    start_year,
    end_year,
    config_path,
    model_path,
    output_path_prefix,
    batch_size,
    parallel,
):
    config = load_config(config_path)
    batch_size = batch_size or config.test_batch_size
    device = torch.device("cuda:0")
    model = create_model(UNet, config)
    if parallel:
        model = torch.nn.DataParallel(model)
    model = model.to(device)
    model.load_state_dict(torch.load(model_path))
    model.eval()

    n = 0
    tbdss = []
    for y in range(start_year, end_year + 1):
        fname = (
            f"../data/tb/gapfilled_{region}/tb_{y}_{am_pm}_{region}_filled.npy"
        )
        ni = (dt.date(y + 1, 1, 1) - dt.date(y, 1, 1)).days
        ds = dh.LazyLoadFastUnloadNpyDataset(fname, ni)
        tbdss.append(ds)
        n += ni
    tbdss = torch.utils.data.ConcatDataset(tbdss)
    ptbss = torch.utils.data.Subset(tbdss, list(range(0, n - 1)))
    tbdss = torch.utils.data.Subset(tbdss, list(range(1, n)))
    zds = dh.RepeatDataset(np.load(config.dem_data_path), n - 1)
    ds = dh.GridsStackDataset([zds, ptbss, tbdss])
    dloader = torch.utils.data.DataLoader(ds,
                                          batch_size=batch_size,
                                          shuffle=False,
                                          drop_last=False)
    water_mask = ~np.load(config.land_mask_path)
    pred, prob = get_predictions(dloader, model, water_mask, LABEL_OTHER,
                                 device)
    np.save(f"{output_path_prefix}_pred.npy", pred)
    np.save(f"{output_path_prefix}_prob.npy", prob)
Exemple #4
0
def main(config_path, resumable=False):
    config = load_config(config_path)
    device = torch.device("cuda:0")

    model = create_model(UNet, config)
    if torch.cuda.device_count() > 1:
        print("Using DataParallel")
        model = DataParallel(model)
    model = model.to(device)
    opt = torch.optim.Adam(
        model.parameters(),
        lr=config.learning_rate,
        weight_decay=config.l2_reg_weight,
    )
    sched = torch.optim.lr_scheduler.MultiStepLR(opt, config.lr_milestones,
                                                 config.lr_step_gamma)
    grad_scaler = torch.cuda.amp.GradScaler()

    metric_checker = MetricImprovementChecker(MaxMetricTracker(-np.inf),
                                              MET_MCC)
    root_dir = config.run_dir
    snap_handler = SnapshotHandler(root_dir, model, opt, sched, metric_checker)
    resume = resumable and snap_handler.can_resume()
    if resume:
        print("Resuming")
    print(f"Initializing run dir: {root_dir}")
    train_summary, test_summary = init_run_dir(root_dir,
                                               config_path,
                                               resume=resume)

    last_epoch = 0
    if resume:
        (
            last_epoch,
            model,
            opt,
            sched,
            metric_checker,
        ) = snap_handler.load_full_snapshot()

    land_mask = torch.tensor(np.load(config.land_mask_path))
    #
    # Training data
    #
    train_ds = build_full_dataset_from_config(config, land_mask, True)
    #
    # Test Data
    #
    test_ds, test_input_ds, test_era_ds = build_full_dataset_from_config(
        config, land_mask, False)

    train_dataloader = torch.utils.data.DataLoader(
        train_ds,
        batch_size=config.train_batch_size,
        shuffle=True,
        drop_last=config.drop_last,
    )
    test_dataloader = torch.utils.data.DataLoader(
        test_ds,
        batch_size=config.test_batch_size,
        shuffle=False,
        drop_last=False,
    )
    if not resume:
        snap_handler.take_model_snapshot()
    try:
        for epoch in range(last_epoch, config.epochs):
            train_summary.add_scalar("learning_rate",
                                     next(iter(opt.param_groups))["lr"], epoch)
            train(
                model,
                device,
                grad_scaler,
                train_dataloader,
                opt,
                land_mask,
                train_summary,
                epoch,
                config,
            )
            loss, cm = test(
                model,
                device,
                grad_scaler,
                test_dataloader,
                opt,
                land_mask,
                test_summary,
                epoch,
                config,
            )
            if metric_checker.check(cm):
                snap_handler.take_model_snapshot()
            log_metrics(test_summary, cm, epoch)
            sched.step()
            if epoch % 3 == 0 and epoch != 0:
                snap_handler.take_full_snapshot(epoch)
    except KeyboardInterrupt:
        print("Exiting training loop")
    except Exception as e:
        print(f"\n{e}")
        raise e
    finally:
        train_summary.close()
        test_summary.close()
        # Free up data for GC
        train_ds = None
        train_dataloader = None

        # Validation
        val_dates = load_dates(config.test_date_map_path)
        if config.use_prior_day:
            val_dates = val_dates[1:]

        model = snap_handler.load_best_model()
        model.eval()
        # Create and save predictions for test data
        print("Generating predictions")
        test_loader = torch.utils.data.DataLoader(
            test_input_ds,
            batch_size=config.test_batch_size,
            shuffle=False,
            drop_last=False,
        )
        pred, raw_prob = get_predictions(test_loader, model, ~land_mask,
                                         LABEL_OTHER, device, config)
        predictions_path = os.path.join(root_dir, FNAME_PREDICTIONS)
        print(f"Saving predictions: '{predictions_path}'")
        np.save(predictions_path, pred)
        probabilities_path = os.path.join(root_dir, FNAME_PROBABILITIES)
        print(f"Saving probabilities: '{probabilities_path}'")
        np.save(probabilities_path, raw_prob)
        # Validate against ERA5
        print("Validating against ERA5")
        test_era_ds = dataset_to_array(test_era_ds).argmax(1).squeeze()
        era_acc = validate_against_era5(pred, test_era_ds, val_dates,
                                        land_mask)
        # Validate against AWS DB
        db = get_db_session(config.db_path)
        lon_grid = np.load(config.lon_grid_path)
        lat_grid = np.load(config.lat_grid_path)
        aws_acc = validate_against_aws(pred, db, val_dates, lon_grid, lat_grid,
                                       land_mask, config)
        db.close()
        # Write accuracies
        acc_file = os.path.join(root_dir, "acc.csv")
        write_accuracies_file(val_dates, era_acc, aws_acc, acc_file)
        print(f"Era Mean Acc: {era_acc.mean()}")
        print(f"AWS Mean Acc: {aws_acc.mean()}")
        add_plots_to_run_dir(root_dir, config.do_val_plots,
                             config.do_pred_plots)
Exemple #5
0
fileHandler.setLevel(logging.DEBUG)
logger_py.addHandler(fileHandler)

repo = git.Repo(search_parent_directories=False)
sha = repo.head.object.hexsha
logger_py.debug('Git commit: %s' % sha)

# Data
train_dataset = config.create_dataset(cfg.data, mode='train')
val_dataset = config.create_dataset(cfg.data, mode='val')
val_loader = torch.utils.data.DataLoader(
    val_dataset, batch_size=batch_size_val, num_workers=int(n_workers // 2),
    shuffle=False, collate_fn=tolerating_collate,
)
# data_viz = next(iter(val_loader))
model = config.create_model(cfg, camera_model=train_dataset.get_cameras(), device=device)

# Create rendering objects from loaded data
cameras = train_dataset.get_cameras()
lights = train_dataset.get_lights()


# Optimizer
if cfg.model.type == 'point':
    optimizer = optim.SGD(
        [p for p in model.parameters() if p.requires_grad], lr=lr)
else:
    if cfg.renderer.is_neural_texture:
        optimizer = optim.Adam(model.parameters(), lr=lr)
    else:
        optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.99))
    os.makedirs(generation_dir)

batch_size = 1
vis_n_outputs = cfg['generation']['vis_n_outputs']
mesh_extension = cfg['generation']['mesh_extension']

# Dataset
dataset = config.create_dataset(cfg.data, mode='test')
test_loader = torch.utils.data.DataLoader(
    dataset, batch_size=batch_size, num_workers=1, shuffle=False)
img_size = args.img_size or dataset.resolution
if isinstance(img_size, Number):
    img_size = (img_size, img_size)

# Model
model = config.create_model(cfg, mode='test', device=device, camera_model=dataset.get_cameras()).to(device=device)

checkpoint_io = CheckpointIO(out_dir, model=model)
checkpoint_io.load(cfg['test']['model_file'])

# Generator
generator = config.create_generator(cfg, model, device=device)

torch.manual_seed(0)

# Generate
with torch.autograd.no_grad():
    model.eval()
    # Generate meshes
    if not args.render_only:
        logger_py.info('Generating mesh...')