Example #1
0
def main(trainsize, num_modes):
    """Compute the POD basis (dominant left singular values) of a set of
    lifted, scaled snapshot training data and save the basis and the
    corresponding singular values.

    WARNING: This will OVERWRITE any existing basis for this `trainsize`.

    Parameters
    ----------
    trainsize : int
        The number of snapshots to use in the computation. There must
        exist a file of exactly `trainsize` lifted, scaled snapshots
        (see step2a_transform.py).

    num_modes : int or list(int)
        The number of POD modes (left singular vectors) to retain.
    """
    utils.reset_logger(trainsize)

    # Load the first `trainsize` lifted, scaled snapshot data.
    training_data, _, qbar, scales = utils.load_scaled_data(trainsize)

    if num_modes == -1:
        # Secret mode! Compute all singular values (EXPENSIVE).
        return compute_and_save_all_svdvals(training_data)
    else:
        # Compute and save the (randomized) SVD from the training data.
        return compute_and_save_pod_basis(num_modes, training_data, qbar,
                                          scales)
def train_and_save_all(trainsize, num_modes, regs):
    """Train and save ROMs with the given dimension and regularization.

    Parameters
    ----------
    trainsize : int
        Number of snapshots to use to train the ROM(s).

    num_modes : int or list(int)
        Dimension of the ROM(s) to train, i.e., the number of retained POD
        modes (left singular vectors) used to project the training data.

    regs : float or list(float)
        regularization parameter(s) to use in the training.
    """
    utils.reset_logger(trainsize)

    logging.info(f"TRAINING {len(num_modes)*len(regs)} ROMS")
    for r in num_modes:
        # Load training data.
        X_, Xdot_, time_domain, _ = utils.load_projected_data(trainsize, r)

        # Evaluate inputs over the training time domain.
        Us = config.U(time_domain)

        # Train and save each ROM.
        for reg in regs:
            with utils.timed_block(f"Training ROM with r={r:d}, reg={reg:e}"):
                rom = train_rom(X_, Xdot_, Us, reg)
                if rom:
                    rom.save_model(config.rom_path(trainsize, r, reg),
                                   save_basis=False, overwrite=True)
def main(trainsize, num_modes):
    """Project lifted, scaled snapshot training data to the subspace spanned
    by the columns of the POD basis V; compute velocity information for the
    projected snapshots; and save the projected data.

    Parameters
    ----------
    trainsize : int
        The number of snapshots to use in the computation. There must exist
        a file of exactly `trainsize` lifted, scaled snapshots
        (see step2a_lift.py).

    num_modes : int or list(int)
        The number of POD modes (left singular vectors) to use in the
        projection, which determines the dimension of the resulting ROM.
        There must exist a file of at least `num_modes` left singular vectors
        computed from exactly `trainsize` lifted, scaled snapshots
        (see step2b_basis.py).
    """
    utils.reset_logger(trainsize)

    if np.isscalar(num_modes):
        num_modes = [int(num_modes)]

    # Load lifted, scaled snapshot data.
    X, time_domain, scales = utils.load_scaled_data(trainsize)

    # Load the POD basis.
    V, _ = utils.load_basis(trainsize, max(num_modes))

    # Project and save the data for each number of POD modes.
    for r in num_modes:
        project_and_save_data(trainsize, r, X, time_domain, scales, V)
def train_single(trainsize, r, regs):
    """Train and save a ROM with the given dimension and regularization
    hyperparameters.

    Parameters
    ----------
    trainsize : int
        Number of snapshots to use to train the ROM.

    r : int
        Dimension of the desired ROM. Also the number of retained POD modes
        (left singular vectors) used to project the training data.

    regs : two or three non-negative floats
        Regularization hyperparameters (first-order, quadratic, cubic) to use
        in the Operator Inference least-squares problem for training the ROM.
    """
    utils.reset_logger(trainsize)

    # Validate inputs.
    modelform = get_modelform(regs)
    check_lstsq_size(trainsize, r, modelform)
    check_regs(regs)

    # Load training data.
    Q_, Qdot_, t = utils.load_projected_data(trainsize, r)
    U = config.U(t)

    # Train and save the ROM.
    with utils.timed_block(f"Training ROM with k={trainsize:d}, "
                           f"{config.REGSTR(regs)}"):
        rom = opinf.InferredContinuousROM(modelform)
        rom.fit(None, Q_, Qdot_, U, P=regularizer(r, *list(regs)))
        save_trained_rom(trainsize, r, regs, rom)
Example #5
0
def train_single(trainsize, r, regs):
    """Train and save a ROM with the given dimension and regularization
    hyperparameters.

    Parameters
    ----------
    trainsize : int
        Number of snapshots to use to train the ROM.

    r : int
        Dimension of the desired ROM. Also the number of retained POD modes
        (left singular vectors) used to project the training data.

    regs : two positive floats
        Regularization hyperparameters (non-quadratic, quadratic) to use in
        the Operator Inference least-squares problem for training the ROM.
    """
    utils.reset_logger(trainsize)

    # Validate inputs.
    d = check_lstsq_size(trainsize, r)
    λ1, λ2 = check_regs(regs)

    # Load training data.
    Q_, Qdot_, t = utils.load_projected_data(trainsize, r)
    U = config.U(t)

    # Train and save the ROM.
    with utils.timed_block(f"Training ROM with k={trainsize:d}, "
                           f"r={r:d}, λ1={λ1:.0f}, λ2={λ2:.0f}"):
        rom = roi.InferredContinuousROM(config.MODELFORM)
        rom.fit(None, Q_, Qdot_, U, P=regularizer(r, d, λ1, λ2))
        save_trained_rom(trainsize, r, regs, rom)
Example #6
0
def main(testsize):
    """Run all tests with `testsize` columns of data."""
    utils.reset_logger()
    lifted_data = test_lift(testsize)
    test_getvar(lifted_data)
    test_scalers(lifted_data)
    logging.info("ALL TESTS PASSED")
    print("ALL TESTS PASSED")
Example #7
0
def main(trainsize, num_modes, center=False):
    """Lift and scale the GEMS simulation data; compute a POD basis of the
    lifted, scaled snapshot training data; project the lifted, scaled snapshot
    training data to the subspace spanned by the columns of the POD basis V,
    and compute velocity information for the projected snapshots.

    Save lifted/scaled snapshots, the POD basis, and the projected data.

    Parameters
    ----------
    trainsize : int
        Number of snapshots to lift / scale / save.

    num_modes : int or None
        The number of POD modes (left singular vectors) to use in the
        projection. This is the upper bound for the size of ROMs that
        can be trained with this data set.

    center : bool
        If True, center the scaled snapshots by the mean scaled snapshot
        before computing the POD basis.
    """
    utils.reset_logger(trainsize)

    # STEP 2A: Lift and scale the data ----------------------------------------
    try:
        # Attempt to load existing lifted, scaled data.
        training_data, time, qbar, scales = utils.load_scaled_data(trainsize)

    except utils.DataNotFoundError:
        # Lift the GEMS data, then scale the lifted snapshots by variable.
        lifted_data, time = step2a.load_and_lift_gems_data(trainsize)
        training_data, qbar, scales = step2a.scale_and_save_data(
            trainsize, lifted_data, time, center)
        del lifted_data

    # STEP 2B: Get the POD basis from the lifted, scaled data -----------------
    try:
        # Attempt to load existing SVD data.
        basis, qbar, scales = utils.load_basis(trainsize, None)
        if basis.shape[1] < num_modes:
            raise utils.DataNotFoundError("not enough saved basis vectors")
        num_modes = basis.shape[1]  # Use larger basis size if available.

    except utils.DataNotFoundError:
        # Compute and save the (randomized) SVD from the training data.
        basis = step2b.compute_and_save_pod_basis(num_modes, training_data,
                                                  qbar, scales)

    # STEP 2C: Project data to the appropriate subspace -----------------------
    return step2c.project_and_save_data(training_data, time, basis)
Example #8
0
def main(trainsize, num_modes):
    """Lift and scale the GEMS simulation data; compute a POD basis of the
    lifted, scaled snapshot training data; project the lifted, scaled snapshot
    training data to the subspace spanned by the columns of the POD basis V,
    and compute velocity information for the projected snapshots.

    Save lifted/scaled snapshots, the POD basis, and the projected data.

    Parameters
    ----------
    trainsize : int
        Number of snapshots to lift / scale / save.

    num_modes : int or list(int)
        The number of POD modes (left singular vectors) to use in the
        projection, which determines the dimension of the resulting ROM.
    """
    utils.reset_logger(trainsize)

    if np.isscalar(num_modes):
        num_modes = [int(num_modes)]

    # STEP 2A: Lift and scale the data ----------------------------------------
    try:
        # Attempt to load existing lifted, scaled data.
        X, time_domain, scales = utils.load_scaled_data(trainsize)

    except utils.DataNotFoundError:
        # Lift the GEMS data, then scale the lifted snapshots by variable.
        lifted_data, time_domain = step2a.load_and_lift_gems_data(trainsize)
        X, scales = step2a.scale_and_save_data(trainsize, lifted_data,
                                               time_domain)

    # STEP 2B: Get the POD basis from the lifted, scaled data -----------------
    try:
        # Attempt to load existing SVD data.
        V, _ = utils.load_basis(trainsize, max(num_modes))

    except utils.DataNotFoundError:
        # Compute and save the (randomized) SVD from the training data.
        V, _ = step2b.compute_and_save_pod_basis(trainsize, max(num_modes), X,
                                                 scales)

    # STEP 2C: Project data to the appropriate subspace -----------------------
    for r in num_modes:
        step2c.project_and_save_data(trainsize, r, X, time_domain, scales, V)
Example #9
0
def main(trainsize,
         r,
         regs,
         elems=None,
         plotPointTrace=False,
         plotRelativeErrors=False,
         plotSpatialStatistics=False):
    """Make the indicated visualization.

    Parameters
    ----------
    trainsize : int
        Number of snapshots used to train the ROM.

    r : int
        Dimension of the ROM.

    regs : two positive floats
        Regularization hyperparameters used to train the ROM.

    elems : list(int) or ndarray(int)
        Indices in the spatial domain at which to compute time traces.
    """
    utils.reset_logger(trainsize)

    # Point traces in time.
    if plotPointTrace:
        logging.info("POINT TRACES")
        point_traces(trainsize, r, regs, elems)

    # Relative projection / prediction errors in time.
    if plotRelativeErrors:
        logging.info("ERRORS IN TIME")
        errors_in_time(trainsize, r, regs)

    # Spatial statistic in time.
    if plotSpatialStatistics:
        logging.info("SPATIAL STATISTICS")
        # Compute GEMS features if needed (only done once).
        if not os.path.isfile(config.statistical_features_path()):
            save_statistical_features()
        spatial_statistics(trainsize, r, regs)
Example #10
0
def main(trainsizes):
    """Lift and scale the GEMS simulation training data and save the results.

    Parameters
    ----------
    trainsizes : int or list(int)
        Number of snapshots to lift, scale, and save.
    """
    utils.reset_logger()

    if np.isscalar(trainsizes):
        trainsizes = [int(trainsizes)]

    # Lift the training data.
    lifted_data, time_domain = load_and_lift_gems_data(max(trainsizes))

    # Scale and save each subset of lifted data.
    for trainsize in trainsizes:
        utils.reset_logger(trainsize)
        scale_and_save_data(trainsize, lifted_data, time_domain)
Example #11
0
def main(trainsize, num_modes):
    """Compute the POD basis (dominant left singular values) of a set of
    lifted, scaled snapshot training data and save the basis and the
    corresponding singular values.

    Parameters
    ----------
    trainsize : int
        The number of snapshots to use in the computation. There must exist
        a file of exactly `trainsize` lifted, scaled snapshots
        (see step2a_lift.py).

    num_modes : int or list(int)
        The number of POD modes (left singular vectors) to retain.
    """
    utils.reset_logger(trainsize)

    # Load the first `trainsize` lifted, scaled snapshot data.
    training_data, _, scales = utils.load_scaled_data(trainsize)

    # Compute and save the (randomized) SVD from the training data.
    compute_and_save_pod_basis(trainsize, num_modes, training_data, scales)
def main(trainsize):
    """Project lifted, scaled snapshot training data to the subspace spanned
    by the columns of the POD basis V; compute velocity information for the
    projected snapshots; and save the projected data.

    Parameters
    ----------
    trainsize : int
        The number of snapshots to use in the computation. There must
        exist a file of exactly `trainsize` lifted, scaled snapshots
        (see step2a_transform.py) and a basis for those snapshots
        (see step2b_basis.py).
    """
    utils.reset_logger(trainsize)

    # Load lifted, scaled snapshot data.
    scaled_data, time_domain, _, _ = utils.load_scaled_data(trainsize)

    # Load the POD basis.
    V, _, _ = utils.load_basis(trainsize, None)

    # Project and save the data.
    return project_and_save_data(scaled_data, time_domain, V)
def main(trainsize,
         r,
         reg,
         elems,
         plotTimeTrace=False,
         plotStatisticalFeatures=False):
    """Make the indicated visualization.

    Parameters
    ----------
    trainsize : int
        Number of snapshots used to train the ROM.

    r : int
        Dimension of the ROM. This is also the number of retained POD
        modes (left singular vectors) used to project the training data.

    reg : float
        The regularization parameters used to train each ROM.

    elems : list(int) or ndarray(int)
        Indices in the spatial domain at which to compute time traces.
    """
    utils.reset_logger(trainsize)

    # Time traces (single ROM, several monitoring locations).
    if plotTimeTrace:
        logging.info("TIME TRACES")
        time_traces(trainsize, r, reg, elems)

    # Statistical features (single ROM, several features).
    if plotStatisticalFeatures:
        logging.info("STATISTICAL FEATURES")
        # Compute GEMS features if needed (only done once).
        if not os.path.isfile(config.statistical_features_path()):
            save_statistical_features()
        statistical_features(trainsize, r, reg)
def main(trainsizes, center=False):
    """Lift and scale the GEMS simulation training data and save the results.

    Parameters
    ----------
    trainsizes : int or list(int)
        Number of snapshots to lift, scale, and save.

    center : bool
        If True, center the scaled snapshots by the mean scaled snapshot
        before computing the POD basis.
    """
    utils.reset_logger()

    if np.isscalar(trainsizes):
        trainsizes = [int(trainsizes)]

    # Lift the training data.
    lifted_data, time_domain = load_and_lift_gems_data(max(trainsizes))

    # Scale and save each subset of lifted data.
    for trainsize in trainsizes:
        utils.reset_logger(trainsize)
        scale_and_save_data(trainsize, lifted_data, time_domain, center)
def main(data_folder, overwrite=False, serial=False):
    """Extract snapshot data, in parallel, from the .tar files in the
    specified folder of the form Data_<first-snapshot>to<last-snapshot>.tar.

    Parameters
    ----------
    data_folder : str
        Path to the folder that contains the raw GEMS .tar data files,
        preferably as an absolute path (e.g., /path/to/folder).

    overwrite : bool
        If False and the snapshot matrix file exists, raise an error.
        If True, overwrite the existing snapshot matrix file if it exists.

    serial : bool
        If True, do the unpacking sequentially in 10,000 snapshot chunks.
        If False, do the unpacking in parallel with 10,000 snapshot chunks.
    """
    utils.reset_logger()

    # If it exists, copy the grid file to the Tecplot data directory.
    source = os.path.join(data_folder, config.GRID_FILE)
    if os.path.isfile(source):
        target = config.grid_data_path()
        with utils.timed_block(f"Copying {source} to {target}"):
            shutil.copy(source, target)
    else:
        logging.warning(f"Grid file {source} not found!")

    # Locate and sort raw .tar files.
    target_pattern = os.path.join(data_folder, "Data_*to*.tar")
    tarfiles = sorted(glob.glob(target_pattern))
    if not tarfiles:
        raise FileNotFoundError(target_pattern)

    # Get the snapshot indices corresponding to each file from the file names.
    starts, stops = [], []
    for i,tfile in enumerate(tarfiles):
        matches = re.findall(r"Data_(\d+)to(\d+).tar", tfile)
        if not matches:
            raise ValueError(f"file {tfile} not named with convention "
                             "Data_<first-snapshot>to<last-snapshot>.tar")
        start, stop = [int(d) for d in matches[0]]
        if i == 0:
            start0 = start  # Offset
        starts.append(start - start0)
        stops.append(stop + 1 - start0)

        if i > 0 and stops[i-1] != starts[i]:
            raise ValueError(f"file {tfile} not continuous from previous set")
    num_snapshots = stops[-1]

    # Create an empty HDF5 file of appropriate size for the data.
    save_path = config.gems_data_path()
    if os.path.isfile(save_path) and not overwrite:
        raise FileExistsError(f"{save_path} (use --overwrite to overwrite)")
    with utils.timed_block("Initializing HDF5 file for data"):
        with h5py.File(save_path, 'w') as hf:
            hf.create_dataset("data", shape=(config.DOF*config.NUM_GEMSVARS,
                                             num_snapshots),
                                      dtype=np.float64)
            hf.create_dataset("time", shape=(num_snapshots,),
                                      dtype=np.float64)
    logging.info(f"Data file initialized as {save_path}.")

    # Read the files in chunks.
    args = zip(tarfiles, starts, stops)
    if serial:       # Read the files serially (sequentially).
        for tf, start, stop in args:
            _read_tar_and_save_data(tf, start, stop, parallel=False)
    else:            # Read the files in parallel.
        with mp.Pool(initializer=_globalize_lock, initargs=(mp.Lock(),),
                     processes=min([len(tarfiles), mp.cpu_count()])) as pool:
            pool.starmap(_read_tar_and_save_data, args)
def main(timeindices,
         variables=None,
         snaptype=["gems", "rom", "error"],
         trainsize=None,
         r=None,
         reg=None):
    """Convert a snapshot in .h5 format to a .dat file that matches the format
    of grid.dat. The new file is saved in `config.tecplot_path()` with the same
    filename and the new file extension .dat.

    Parameters
    ----------
    timeindices : ndarray(int) or int
        Indices (one-based) in the full time domain of the snapshots to save.

    variables : str or list(str)
        The variables to scale, a subset of config.ROM_VARIABLES.
        Defaults to all variables.

    snaptype : {"rom", "gems", "error"} or list(str)
        Which kinds of snapshots to save. Options:
        * "gems": snapshots from the full-order GEMS data;
        * "rom": reconstructed snapshots produced by a ROM;
        * "error": absolute error between the full-order data
                   and the reduced-order reconstruction.
        If "rom" or "error" are selected, the ROM is selected by the
        remaining arguments.

    trainsize : int
        Number of snapshots used to train the ROM.

    r : int
        Number of retained modes in the ROM.

    reg : float
        Regularization factor used to train the ROM.
    """
    utils.reset_logger(trainsize)

    # Parse parameters.
    timeindices = np.sort(np.atleast_1d(timeindices))
    simtime = timeindices.max()
    t = utils.load_time_domain(simtime + 1)

    # Parse the variables.
    if variables is None:
        variables = config.ROM_VARIABLES
    elif isinstance(variables, str):
        variables = [variables]
    varnames = '\n'.join(f'"{v}"' for v in variables)

    if isinstance(snaptype, str):
        snaptype = [snaptype]
    for stype in snaptype:
        if stype not in ("gems", "rom", "error"):
            raise ValueError(f"invalid snaptype '{stype}'")

    # Read the grid file.
    with utils.timed_block("Reading Tecplot grid data"):
        # Parse the header.
        grid_path = config.grid_data_path()
        with open(grid_path, 'r') as infile:
            grid = infile.read()
        if int(re.findall(r"Elements=(\d+)", grid)[0]) != config.DOF:
            raise RuntimeError(f"{grid_path} DOF and config.DOF do not match")
        num_nodes = int(re.findall(r"Nodes=(\d+)", grid)[0])
        end_of_header = re.findall(r"DT=.*?\n", grid)[0]
        headersize = grid.find(end_of_header) + len(end_of_header)

        # Extract geometry information.
        grid_data = grid[headersize:].split()
        x = grid_data[:num_nodes]
        y = grid_data[num_nodes:2 * num_nodes]
        cell_volume = grid_data[2 * num_nodes:3 * num_nodes]
        connectivity = grid_data[3 * num_nodes:]

    # Extract full-order data if needed.
    if ("gems" in snaptype) or ("error" in snaptype):
        gems_data, _ = utils.load_gems_data(cols=timeindices)
        with utils.timed_block("Lifting selected snapshots of GEMS data"):
            lifted_data = dproc.lift(gems_data)
            true_snaps = np.concatenate(
                [dproc.getvar(v, lifted_data) for v in variables])
    # Simulate ROM if needed.
    if ("rom" in snaptype) or ("error" in snaptype):
        # Load the SVD data.
        V, _ = utils.load_basis(trainsize, r)

        # Load the initial conditions and scales.
        X_, _, _, scales = utils.load_projected_data(trainsize, r)

        # Load the appropriate ROM.
        rom = utils.load_rom(trainsize, r, reg)

        # Simulate the ROM over the time domain.
        with utils.timed_block(f"Simulating ROM with r={r:d}, reg={reg:.0e}"):
            x_rom = rom.predict(X_[:, 0], t, config.U, method="RK45")
            if np.any(np.isnan(x_rom)) or x_rom.shape[1] < simtime:
                raise ValueError("ROM unstable!")

        # Reconstruct the results (only selected variables / snapshots).
        with utils.timed_block("Reconstructing simulation results"):
            x_rec = dproc.unscale(V[:, :r] @ x_rom[:, timeindices], scales)
            x_rec = np.concatenate([dproc.getvar(v, x_rec) for v in variables])

    dsets = {}
    if "rom" in snaptype:
        dsets["rom"] = x_rec
    if "gems" in snaptype:
        dsets["gems"] = true_snaps
    if "error" in snaptype:
        with utils.timed_block("Computing absolute error of reconstruction"):
            abs_err = np.abs(true_snaps - x_rec)
        dsets["error"] = abs_err

    # Save each of the selected snapshots in Tecplot format matching grid.dat.
    for j, tindex in enumerate(timeindices):

        header = HEADER.format(varnames, tindex, t[tindex], num_nodes,
                               config.DOF,
                               len(variables) + 2, "SINGLE " * len(variables))
        for label, dset in dsets.items():

            if label == "gems":
                save_path = config.gems_snapshot_path(tindex)
            if label in ("rom", "error"):
                folder = config.rom_snapshot_path(trainsize, r, reg)
                save_path = os.path.join(folder, f"{label}_{tindex:05d}.dat")
            with utils.timed_block(f"Writing {label} snapshot {tindex:05d}"):
                with open(save_path, 'w') as outfile:
                    # Write the header.
                    outfile.write(header)

                    # Write the geometry data (x,y coordinates).
                    for i in range(0, len(x), NCOLS):
                        outfile.write(' '.join(x[i:i + NCOLS]) + '\n')
                    for i in range(0, len(y), NCOLS):
                        outfile.write(' '.join(y[i:i + NCOLS]) + '\n')

                    # Write the data for each variable.
                    for i in range(0, dset.shape[0], NCOLS):
                        row = ' '.join(f"{v:.9E}"
                                       for v in dset[i:i + NCOLS, j])
                        outfile.write(row + '\n')

                    # Write connectivity information.
                    for i in range(0, len(connectivity), NCOLS):
                        outfile.write(' '.join(connectivity[i:i + NCOLS]) +
                                      '\n')
def _train_minimize_1D(trainsize, r, regs, testsize=None, margin=1.1):
    """Train ROMs with the given dimension(s), saving only the ROM with
    the least training error that satisfies a bound on the integrated POD
    coefficients, using a search algorithm to choose the regularization
    parameter.

    Parameters
    ----------
    trainsize : int
        Number of snapshots to use to train the ROM.

    r : int
        Dimension of the desired ROM. Also the number of retained POD modes
        (left singular vectors) used to project the training data.

    regs : two non-negative floats
        Bounds for the (single) regularization hyperparameter to use in the
        Operator Inference least-squares problem for training the ROM.

    testsize : int
        Number of time steps for which a valid ROM must satisfy the POD bound.

    margin : float ≥ 1
        Amount that the integrated POD coefficients of a valid ROM are allowed
        to deviate in magnitude from the maximum magnitude of the training
        data Q, i.e., bound = margin * max(abs(Q)).
    """
    utils.reset_logger(trainsize)

    # Parse aguments.
    check_lstsq_size(trainsize, r, modelform="cAHB")
    log10regs = np.log10(regs)

    # Load training data.
    t = utils.load_time_domain(testsize)
    Q_, Qdot_, _ = utils.load_projected_data(trainsize, r)
    U = config.U(t[:trainsize])

    # Compute the bound to require for integrated POD modes.
    B = margin * np.abs(Q_).max()

    # Create a solver mapping regularization hyperparameters to operators.
    with utils.timed_block(f"Constructing least-squares solver, r={r:d}"):
        rom = opinf.InferredContinuousROM("cAHB")
        rom._construct_solver(None, Q_, Qdot_, U, 1)

    # Test each regularization hyperparameter.
    def training_error(log10reg):
        """Return the training error resulting from the regularization
        hyperparameters λ1 = λ2 = 10^log10reg. If the resulting model
        violates the POD bound, return "infinity".
        """
        λ = 10**log10reg

        # Train the ROM on all training snapshots.
        with utils.timed_block(f"Testing ROM with λ={λ:e}"):
            rom._evaluate_solver(λ)

            # Simulate the ROM over the full domain.
            with np.warnings.catch_warnings():
                np.warnings.simplefilter("ignore")
                q_rom = rom.predict(Q_[:, 0], t, config.U, method="RK45")

            # Check for boundedness of solution.
            if not is_bounded(q_rom, B):
                return _MAXFUN

            # Calculate integrated relative errors in the reduced space.
            return opinf.post.Lp_error(Q_, q_rom[:, :trainsize],
                                       t[:trainsize])[1]

    opt_result = opt.minimize_scalar(training_error,
                                     method="bounded",
                                     bounds=log10regs)
    if opt_result.success and opt_result.fun != _MAXFUN:
        λ = 10**opt_result.x
        with utils.timed_block(f"Best regularization for k={trainsize:d}, "
                               f"r={r:d}: λ={λ:.0f}"):
            rom._evaluate_solver(λ)
            save_trained_rom(trainsize, r, (λ, λ), rom)
    else:
        message = "Regularization search optimization FAILED"
        print(message)
        logging.info(message)
def train_gridsearch(trainsize, r, regs, testsize=None, margin=1.1):
    """Train ROMs with the given dimension over a grid of potential
    regularization hyperparameters, saving only the ROM with the least
    training error that satisfies a bound on the integrated POD coefficients.

    Parameters
    ----------
    trainsize : int
        Number of snapshots to use to train the ROM.

    r : int
        Dimension of the desired ROM. Also the number of retained POD modes
        (left singular vectors) used to project the training data.

    regs : (float, float, int, float, float, int)
        Bounds and sizes for the grid of regularization hyperparameters.
        First-order: search in [regs[0], regs[1]] at regs[2] points.
        Quadratic:   search in [regs[3], regs[4]] at regs[5] points.
        Cubic:       search in [regs[6], regs[7]] at regs[8] points.

    testsize : int
        Number of time steps for which a valid ROM must satisfy the POD bound.

    margin : float ≥ 1
        Amount that the integrated POD coefficients of a valid ROM are allowed
        to deviate in magnitude from the maximum magnitude of the training
        data Q, i.e., bound = margin * max(abs(Q)).
    """
    utils.reset_logger(trainsize)

    # Parse aguments.
    if len(regs) not in [6, 9]:
        raise ValueError("6 or 9 regs required (bounds / sizes of grids")
    grids = []
    for i in range(0, len(regs), 3):
        check_regs(regs[i:i + 2])
        grids.append(
            np.logspace(np.log10(regs[i]), np.log10(regs[i + 1]),
                        int(regs[i + 2])))
    modelform = get_modelform(grids)
    d = check_lstsq_size(trainsize, r, modelform)

    # Load training data.
    t = utils.load_time_domain(testsize)
    Q_, Qdot_, _ = utils.load_projected_data(trainsize, r)
    U = config.U(t[:trainsize])

    # Compute the bound to require for integrated POD modes.
    M = margin * np.abs(Q_).max()

    # Create a solver mapping regularization hyperparameters to operators.
    num_tests = np.prod([grid.size for grid in grids])
    print(f"TRAINING {num_tests} ROMS")
    with utils.timed_block(f"Constructing least-squares solver, r={r:d}"):
        rom = opinf.InferredContinuousROM(modelform)
        rom._construct_solver(None, Q_, Qdot_, U, np.ones(d))

    # Test each regularization hyperparameter.
    errors_pass = {}
    errors_fail = {}
    for i, regs in enumerate(itertools.product(*grids)):
        with utils.timed_block(f"({i+1:d}/{num_tests:d}) Testing ROM with "
                               f"{config.REGSTR(regs)}"):
            # Train the ROM on all training snapshots.
            rom._evaluate_solver(regularizer(r, *list(regs)))

            # Simulate the ROM over the full domain.
            with np.warnings.catch_warnings():
                np.warnings.simplefilter("ignore")
                q_rom = rom.predict(Q_[:, 0], t, config.U, method="RK45")

            # Check for boundedness of solution.
            errors = errors_pass if is_bounded(q_rom, M) else errors_fail

            # Calculate integrated relative errors in the reduced space.
            if q_rom.shape[1] > trainsize:
                errors[tuple(regs)] = opinf.post.Lp_error(
                    Q_, q_rom[:, :trainsize], t[:trainsize])[1]

    # Choose and save the ROM with the least error.
    if not errors_pass:
        message = f"NO STABLE ROMS for r={r:d}"
        print(message)
        logging.info(message)
        return

    err2reg = {err: reg for reg, err in errors_pass.items()}
    regs = list(err2reg[min(err2reg.keys())])
    with utils.timed_block(f"Best regularization for k={trainsize:d}, "
                           f"r={r:d}: {config.REGSTR(regs)}"):
        rom._evaluate_solver(regularizer(r, *regs))
        save_trained_rom(trainsize, r, regs, rom)
Example #19
0
def temperature_average(trainsize, r, reg, cutoff=60000):
    """Get the average-in-time temperature profile for the GEMS data and a
    specific ROM.

    Parameters
    ----------
    trainsize : int
        Number of snapshots used to train the ROM.

    r : int
        Dimension of the ROM.

    reg : float
        Regularization hyperparameters used to train the ROM.

    cutoff : int
        Number of time steps to average over.
    """
    utils.reset_logger(trainsize)

    # Read the grid file.
    with utils.timed_block("Reading Tecplot grid data"):
        # Parse the header.
        grid_path = config.grid_data_path()
        with open(grid_path, 'r') as infile:
            grid = infile.read()
        if int(re.findall(r"Elements=(\d+)", grid)[0]) != config.DOF:
            raise RuntimeError(f"{grid_path} DOF and config.DOF do not match")
        num_nodes = int(re.findall(r"Nodes=(\d+)", grid)[0])
        end_of_header = re.findall(r"DT=.*?\n", grid)[0]
        headersize = grid.find(end_of_header) + len(end_of_header)

        # Extract geometry information.
        grid_data = grid[headersize:].split()
        x = grid_data[:num_nodes]
        y = grid_data[num_nodes:2 * num_nodes]
        # cell_volume = grid_data[2*num_nodes:3*num_nodes]
        connectivity = grid_data[3 * num_nodes:]

    # Compute full-order time-averaged temperature from GEMS data.
    _s = config.DOF * config.GEMS_VARIABLES.index("T")
    gems_data, _ = utils.load_gems_data(rows=slice(_s, _s + config.DOF),
                                        cols=cutoff)
    with utils.timed_block("Computing time-averaged GEMS temperature"):
        T_gems = gems_data.mean(axis=1)
        assert T_gems.shape == (config.DOF, )

    # Simulate ROM and compute the time-averaged temperature.
    t, V, scales, q_rom = step4.simulate_rom(trainsize, r, reg, steps=cutoff)
    with utils.timed_block("Reconstructing ROM simulation results"):
        T_rom = dproc.unscale(dproc.getvar("T", V) @ q_rom, scales, "T")
        T_rom = T_rom.mean(axis=1)
        assert T_rom.shape == (config.DOF, )

    header = HEADER.format('"T"', 0, 0, num_nodes, config.DOF, 3,
                           "DOUBLE " * 3)
    header = header.replace("VARLOCATION=([3-3]", "VARLOCATION=([3]")
    for label, dset in zip(["gems", "rom"], [T_gems, T_rom]):
        if label == "gems":
            save_path = os.path.join(config.tecplot_path(), "gems",
                                     "temperature_average.dat")
        elif label == "rom":
            folder = config.rom_snapshot_path(trainsize, r, reg)
            save_path = os.path.join(folder, "temperature_average.dat")
        with utils.timed_block(f"Writing {label} temperature average"):
            with open(save_path, 'w') as outfile:
                # Write the header.
                outfile.write(header)

                # Write the geometry data (x,y coordinates).
                for i in range(0, len(x), NCOLS):
                    outfile.write(' '.join(x[i:i + NCOLS]) + '\n')
                for i in range(0, len(y), NCOLS):
                    outfile.write(' '.join(y[i:i + NCOLS]) + '\n')

                # Write the data for each variable.
                for i in range(0, dset.shape[0], NCOLS):
                    row = ' '.join(f"{v:.9E}" for v in dset[i:i + NCOLS])
                    outfile.write(row + '\n')

                # Write connectivity information.
                for i in range(0, len(connectivity), NCOLS):
                    outfile.write(' '.join(connectivity[i:i + NCOLS]) + '\n')
def train_with_minimization(trainsize, num_modes, regs,
                            testsize=None, margin=1.5):
    """Train ROMs with the given dimension(s), saving only the ROM with
    the least training error that satisfies a bound on the integrated POD
    coefficients, using a search algorithm to choose the regularization
    parameter.

    Parameters
    ----------
    trainsize : int
        Number of snapshots to use to train the ROM(s).

    num_modes : int or list(int)
        Dimension of the ROM(s) to train, i.e., the number of retained POD
        modes (left singular vectors) used to project the training data.

    regs : [float, float]
        regularization parameter(s) to use in the training.

    testsize : int
        Number of time steps for which a valid ROM must satisfy the POD bound.

    margin : float >= 1
        Amount that the integrated POD coefficients of a valid ROM are allowed
        to deviate in magnitude from the maximum magnitude of the training
        data Q, i.e., bound = margin * max(abs(Q)).
    """
    utils.reset_logger(trainsize)

    # Parse aguments.
    if np.isscalar(num_modes):
        num_modes = [num_modes]
    if np.isscalar(regs) or len(regs) != 2:
        raise ValueError("2 regularizations required (reg_low, reg_high)")
    bounds = np.log10(regs)

    # Load the full time domain and evaluate the input function.
    t = utils.load_time_domain(testsize)
    Us = config.U(t)

    for r in num_modes:
        # Load training data.
        X_, Xdot_, _, scales = utils.load_projected_data(trainsize, r)

        # Compute the bound to require for integrated POD modes.
        B = margin * np.abs(X_).max()

        # Test each regularization parameter.
        def training_error_from_rom(log10reg):
            reg = 10**log10reg

            # Train the ROM on all training snapshots.
            with utils.timed_block(f"Testing ROM with r={r:d}, reg={reg:e}"):
                rom = train_rom(X_, Xdot_, Us[:trainsize], reg)
                if not rom:
                    return _MAXFUN

                # Simulate the ROM over the full domain.
                with np.warnings.catch_warnings():
                    np.warnings.simplefilter("ignore")
                    x_rom = rom.predict(X_[:,0], t, config.U, method="RK45")

                # Check for boundedness of solution.
                if not is_bounded(x_rom, B):
                    return _MAXFUN

                # Calculate integrated relative errors in the reduced space.
                return roi.post.Lp_error(X_, x_rom[:,:trainsize],
                                                   t[:trainsize])[1]

        opt_result = opt.minimize_scalar(training_error_from_rom,
                                         bounds=bounds, method="bounded")
        if opt_result.success and opt_result.fun != _MAXFUN:
            best_reg = 10 ** opt_result.x
            best_rom = train_rom(X_, Xdot_, Us[:trainsize], best_reg)
            save_best_trained_rom(trainsize, r, best_reg, best_rom)
        else:
            print(f"Regularization search optimization FAILED for r = {r:d}")
def train_with_gridsearch(trainsize, num_modes, regs,
                          testsize=None, margin=1.5):
    """Train ROMs with the given dimension(s) and regularization(s),
    saving only the ROM with the least training error that satisfies
    a bound on the integrated POD coefficients.

    Parameters
    ----------
    trainsize : int
        Number of snapshots to use to train the ROM(s).

    num_modes : int or list(int)
        Dimension of the ROM(s) to train, i.e., the number of retained POD
        modes (left singular vectors) used to project the training data.

    regs : float or list(float)
        regularization parameter(s) to use in the training.

    testsize : int
        Number of time steps for which a valid ROM must satisfy the POD bound.

    margin : float >= 1
        Amount that the integrated POD coefficients of a valid ROM are allowed
        to deviate in magnitude from the maximum magnitude of the training
        data Q, i.e., bound = margin * max(abs(Q)).
    """
    utils.reset_logger(trainsize)

    # Parse aguments.
    if np.isscalar(num_modes):
        num_modes = [num_modes]
    if np.isscalar(regs):
        regs = [regs]

    # Load the full time domain and evaluate the input function.
    t = utils.load_time_domain(testsize)
    Us = config.U(t)

    logging.info(f"TRAINING {len(num_modes)*len(regs)} ROMS")
    for ii,r in enumerate(num_modes):
        # Load training data.
        X_, Xdot_, _, scales = utils.load_projected_data(trainsize, r)

        # Compute the bound to require for integrated POD modes.
        M = margin * np.abs(X_).max()

        # Test each regularization parameter.
        trained_roms = {}
        errors_pass = {}
        errors_fail = {}
        for reg in regs:

            # Train the ROM on all training snapshots.
            with utils.timed_block(f"Testing ROM with r={r:d}, reg={reg:e}"):
                rom = train_rom(X_, Xdot_, Us[:trainsize], reg)
                if not rom:
                    continue        # Skip if training fails.
                trained_roms[reg] = rom

                # Simulate the ROM over the full domain.
                with np.warnings.catch_warnings():
                    np.warnings.simplefilter("ignore")
                    x_rom = rom.predict(X_[:,0], t, config.U, method="RK45")

                # Check for boundedness of solution.
                errors = errors_pass if is_bounded(x_rom, M) else errors_fail

                # Calculate integrated relative errors in the reduced space.
                if x_rom.shape[1] > trainsize:
                    errors[reg] = roi.post.Lp_error(X_, x_rom[:,:trainsize],
                                                              t[:trainsize])[1]

        # Choose and save the ROM with the least error.
        plt.semilogx(list(errors_fail.keys()), list(errors_fail.values()),
                     f"C{ii}x", mew=1, label=fr"$r = {r:d}$, bound violated")
        if not errors_pass:
            print(f"NO STABLE ROMS for r = {r:d}")
            continue

        err2reg = {err:reg for reg,err in errors_pass.items()}
        best_reg = err2reg[min(err2reg.keys())]
        best_rom = trained_roms[best_reg]
        save_best_trained_rom(trainsize, r, best_reg, best_rom)

        plt.semilogx(list(errors_pass.keys()), list(errors_pass.values()),
                     f"C{ii}*", mew=0, label=fr"$r = {r:d}$, bound satisfied")
        plt.axvline(best_reg, lw=.5, color=f"C{ii}")

    plt.legend()
    plt.xlabel(r"Regularization parameter $\lambda$")
    plt.ylabel(r"ROM relative error $\frac"
               r"{||\widehat{\mathbf{Q}} - \widetilde{\mathbf{Q}}'||}"
               r"{||\widehat{\mathbf{Q}}||}$")
    plt.ylim(0, 1)
    plt.xlim(min(regs), max(regs))
    plt.title(fr"$n_t = {trainsize}$")
    utils.save_figure(f"regsweep_nt{trainsize:05d}.pdf")
Example #22
0
def train_gridsearch(trainsize, r, regs, testsize=None, margin=1.5):
    """Train ROMs with the given dimension over a grid of potential
    regularization hyperparameters, saving only the ROM with the least
    training error that satisfies a bound on the integrated POD coefficients.

    Parameters
    ----------
    trainsize : int
        Number of snapshots to use to train the ROM.

    r : int
        Dimension of the desired ROM. Also the number of retained POD modes
        (left singular vectors) used to project the training data.

    regs : (float, float, int, float, float, int)
        Bounds and sizes for the grid of regularization parameters.
        Linear:    search in [regs[0], regs[1]] at regs[2] points.
        Quadratic: search in [regs[3], regs[4]] at regs[5] points.

    testsize : int
        Number of time steps for which a valid ROM must satisfy the POD bound.

    margin : float >= 1
        Amount that the integrated POD coefficients of a valid ROM are allowed
        to deviate in magnitude from the maximum magnitude of the training
        data Q, i.e., bound = margin * max(abs(Q)).
    """
    utils.reset_logger(trainsize)

    # Parse aguments.
    d = check_lstsq_size(trainsize, r)
    if len(regs) != 6:
        raise ValueError("len(regs) != 6 (bounds / sizes for parameter grid")
    check_regs(regs[0:2])
    check_regs(regs[3:5])
    λ1grid = np.logspace(np.log10(regs[0]), np.log10(regs[1]), int(regs[2]))
    λ2grid = np.logspace(np.log10(regs[3]), np.log10(regs[4]), int(regs[5]))

    # Load training data.
    t = utils.load_time_domain(testsize)
    Q_, Qdot_, _ = utils.load_projected_data(trainsize, r)
    U = config.U(t[:trainsize])

    # Compute the bound to require for integrated POD modes.
    M = margin * np.abs(Q_).max()

    # Create a solver mapping regularization parameters to operators.
    print(f"TRAINING {λ1grid.size*λ2grid.size} ROMS")
    with utils.timed_block(f"Constructing least-squares solver, r={r:d}"):
        rom = roi.InferredContinuousROM(config.MODELFORM)
        rom._construct_solver(None, Q_, Qdot_, U, np.ones(d))

    # Test each regularization parameter.
    errors_pass = {}
    errors_fail = {}
    for λ1, λ2 in itertools.product(λ1grid, λ2grid):
        with utils.timed_block(f"Testing ROM with λ1={λ1:5e}, λ2={λ2:5e}"):
            # Train the ROM on all training snapshots.
            rom._evaluate_solver(regularizer(r, d, λ1, λ2))

            # Simulate the ROM over the full domain.
            with np.warnings.catch_warnings():
                np.warnings.simplefilter("ignore")
                q_rom = rom.predict(Q_[:, 0], t, config.U, method="RK45")

            # Check for boundedness of solution.
            errors = errors_pass if is_bounded(q_rom, M) else errors_fail

            # Calculate integrated relative errors in the reduced space.
            if q_rom.shape[1] > trainsize:
                errors[(λ1, λ2)] = roi.post.Lp_error(Q_, q_rom[:, :trainsize],
                                                     t[:trainsize])[1]

    # Choose and save the ROM with the least error.
    if not errors_pass:
        message = f"NO STABLE ROMS for r={r:d}"
        print(message)
        logging.info(message)
        return

    err2reg = {err: reg for reg, err in errors_pass.items()}
    λ1, λ2 = err2reg[min(err2reg.keys())]
    with utils.timed_block(f"Best regularization for k={trainsize:d}, "
                           f"r={r:d}: λ1={λ1:.0f}, λ2={λ2:.0f}"):
        rom._evaluate_solver(regularizer(r, d, λ1, λ2))
        save_trained_rom(trainsize, r, (λ1, λ2), rom)
Example #23
0
def basis(trainsize, r, variables=None):
    """Export the POD basis vectors to Tecplot format.

    Parameters
    ----------
    trainsize : int
        Number of snapshots used to compute the basis.

    r : int
        Number of basis vectors to save.

    variables : str or list(str)
        Variables to save, a subset of config.ROM_VARIABLES.
        Defaults to all variables.
    """
    utils.reset_logger(trainsize)

    if variables is None:
        variables = config.ROM_VARIABLES
    elif isinstance(variables, str):
        variables = [variables]
    varnames = '\n'.join(f'"{v}"' for v in variables)

    # Read the grid file.
    with utils.timed_block("Reading Tecplot grid data"):
        # Parse the header.
        grid_path = config.grid_data_path()
        with open(grid_path, 'r') as infile:
            grid = infile.read()
        if int(re.findall(r"Elements=(\d+)", grid)[0]) != config.DOF:
            raise RuntimeError(f"{grid_path} DOF and config.DOF do not match")
        num_nodes = int(re.findall(r"Nodes=(\d+)", grid)[0])
        end_of_header = re.findall(r"DT=.*?\n", grid)[0]
        headersize = grid.find(end_of_header) + len(end_of_header)

        # Extract geometry information.
        grid_data = grid[headersize:].split()
        x = grid_data[:num_nodes]
        y = grid_data[num_nodes:2 * num_nodes]
        # cell_volume = grid_data[2*num_nodes:3*num_nodes]
        connectivity = grid_data[3 * num_nodes:]

    # Load the basis and extract desired variables.
    V, _, _ = utils.load_basis(trainsize, r)
    V = np.concatenate([dproc.getvar(var, V) for var in variables])

    # Save each of the basis vectors in Tecplot format matching grid.dat.
    for j in range(r):
        header = HEADER.format(varnames, j, j, num_nodes, config.DOF,
                               len(variables) + 2, "DOUBLE " * len(variables))
        save_folder = config._makefolder(config.tecplot_path(), "basis",
                                         config.TRNFMT(trainsize))
        save_path = os.path.join(save_folder, f"vec_{j+1:03d}.dat")
        with utils.timed_block(f"Writing basis vector {j+1:d}"):
            with open(save_path, 'w') as outfile:
                # Write the header.
                outfile.write(header)

                # Write the geometry data (x,y coordinates).
                for i in range(0, len(x), NCOLS):
                    outfile.write(' '.join(x[i:i + NCOLS]) + '\n')
                for i in range(0, len(y), NCOLS):
                    outfile.write(' '.join(y[i:i + NCOLS]) + '\n')

                # Write the data for each variable.
                for i in range(0, V.shape[0], NCOLS):
                    row = ' '.join(f"{v:.9E}" for v in V[i:i + NCOLS, j])
                    outfile.write(row + '\n')

                # Write connectivity information.
                for i in range(0, len(connectivity), NCOLS):
                    outfile.write(' '.join(connectivity[i:i + NCOLS]) + '\n')
    print(f"Basis info exported to {save_folder}/*.dat.")