Exemple #1
0
def scale_and_save_data(trainsize, lifted_data, time_domain):
    """Scale lifted snapshots (by variable) and save the scaled snapshots.

    Parameters
    ----------
    trainsize : int
        Number of snapshots to scale and save.

    lifted_data : (NUM_ROMVARS*DOF, k>trainsize) ndarray
        Lifted snapshots to scale and then save.

    time_domain : (k>trainsize,) ndarray
        The time domain corresponding to the lifted snapshots.
    """
    # Scale the learning variables to the bounds in config.SCALE_TO.
    with utils.timed_block(f"Scaling {trainsize:d} lifted snapshots"):
        scaled_data, scales = dproc.scale(lifted_data[:,:trainsize].copy())

    # Save the lifted, scaled training data.
    save_path = config.scaled_data_path(trainsize)
    with utils.timed_block("Saving scaled, lifted training data"):
        with h5py.File(save_path, 'w') as hf:
            hf.create_dataset("data", data=scaled_data)
            hf.create_dataset("time", data=time_domain[:trainsize])
            hf.create_dataset("scales", data=scales)
    logging.info(f"Scaled data saved as {save_path}.\n")

    return scaled_data, scales
Exemple #2
0
def test_lift(testsize):
    """Read `testsize` random snapshots of GEMS data, lift them, and check
    that the before-and-after variables are consistent with each other.
    """
    # Load the unlifted, unscaled snapshot data.
    testindices = np.random.choice(30000, size=testsize, replace=False)
    testindices.sort()
    gems_data, t = utils.load_gems_data(cols=testindices)

    # Lift the training data to the learning variables.
    with utils.timed_block("Lifting training data to new coordinates"):
        lifted_data = dproc.lift(gems_data)

    # Check that the first three variables are the same.
    with utils.timed_block("Verifying first four variables"):
        for i in range(4):
            s = slice(i * config.DOF, (i + 1) * config.DOF)
            assert np.allclose(lifted_data[s], gems_data[s])

    # Verify inverse lifting.
    with utils.timed_block("Verifying inverse lifting"):
        unlifted_data = dproc.unlift(lifted_data)
        assert np.allclose(unlifted_data, gems_data)

    return lifted_data
def save_statistical_features():
    """Compute the (spatial) mean temperatures on the full time domain and
    save them for later. This only needs to be done once.
    """
    # Load the full data set.
    gems_data, t = utils.load_gems_data()

    # Lift the data (convert to molar concentrations).
    with utils.timed_block("Lifting GEMS data"):
        lifted_data = dproc.lift(gems_data)

    # Compute statistical features.
    with utils.timed_block("Computing statistical features of variables"):
        mins, maxs, sums, stds, means = {}, {}, {}, {}, {}
        for var in config.ROM_VARIABLES:
            val = dproc.getvar(var, lifted_data)
            mins[var] = val.min(axis=0)
            maxs[var] = val.max(axis=0)
            sums[var] = val.sum(axis=0)
            stds[var] = val.std(axis=0)
            means[var] = sums[var] / val.shape[0]

    # Save the data.
    data_path = config.statistical_features_path()
    with utils.timed_block("Saving statistical features"):
        with h5py.File(data_path, 'w') as hf:
            for var in config.ROM_VARIABLES:
                hf.create_dataset(f"{var}_min", data=mins[var])
                hf.create_dataset(f"{var}_max", data=maxs[var])
                hf.create_dataset(f"{var}_sum", data=sums[var])
                hf.create_dataset(f"{var}_std", data=stds[var])
                hf.create_dataset(f"{var}_mean", data=means[var])
            hf.create_dataset("time", data=t)
    logging.info(f"Statistical features saved to {data_path}")
Exemple #4
0
def compute_and_save_all_svdvals(training_data):
    """Compute and save the singular values corresponding to the *full* POD
    basis for the training data.

    Parameters
    ----------
    training_data : (NUM_ROMVARS*DOF,trainsize) ndarray
        Training snapshots to take the SVD of.

    Returns
    -------
    svdvals : (trainsize,) ndarray
        Singular values for the full POD basis.
    """
    # Compute the DENSE SVD of the training data to get the singular values.
    with utils.timed_block("Computing *dense* SVD for singular values"):
        svdvals = la.svdvals(training_data,
                             overwrite_a=True,
                             check_finite=False)

    # Save the POD basis.
    save_path = config.basis_path(training_data.shape[1])
    save_path = save_path.replace(config.BASIS_FILE, "svdvals.h5")
    with utils.timed_block("Saving singular values"):
        with h5py.File(save_path, 'w') as hf:
            hf.create_dataset("svdvals", data=svdvals)
    logging.info(f"Singular values saved to {save_path}.\n")

    return svdvals
def project_and_save_data(trainsize, r, X, time_domain, scales, V):
    """Project preprocessed snapshots to a low-dimensional subspace.

    Parameters
    ----------
    trainsize : int
        Number of training snapshots to project.

    r : int
        Number of POD modes to use in the projection.

    X : (NUM_ROMVARS*DOF,trainsize) ndarray
        Preprocessed snapshot data to be projected.

    time_domain : (trainsize,) ndarray
        Time domain corresponding to the snapshots.

    scales : (NUM_ROMVARS,2) ndarray
        Info on how the snapshot data was scaled.

    V : (NUM_ROMVARS*DOF,r) ndarray
        POD basis of rank at least r.

    Returns
    -------
    X_ : (r,trainsize) ndarray
        Projected snapshots.

    Xdot_ : (r,trainsize) ndarray
        Time derivatives of projected snapshots.
    """
    # Verify that the time domain is uniformly spaced with spacing config.DT.
    dt = time_domain[1] - time_domain[0]
    if not np.allclose(np.diff(time_domain), dt):
        raise ValueError("time domain not uniformly spaced")
    if not np.isclose(dt, config.DT):
        raise ValueError("time domain spacing != config.DT")

    # Project the snapshot data.
    with utils.timed_block("Projecting snapshots to a "
                           f"{r:d}-dimensional linear subspace"):
        X_ = V[:, :r].T @ X

    # Compute time derivative data.
    with utils.timed_block("Approximating time derivatives "
                           "of projected snapshots"):
        Xdot_ = roi.pre.xdot_uniform(X_, dt, order=4)

    # Save the projected training data.
    save_path = config.projected_data_path(trainsize, r)
    with utils.timed_block(f"Saving projected data"):
        with h5py.File(save_path, 'w') as hf:
            hf.create_dataset("data", data=X_)
            hf.create_dataset("xdot", data=Xdot_)
            hf.create_dataset("time", data=time_domain)
            hf.create_dataset("scales", data=scales)
    logging.info(f"Projected data saved to {save_path}.\n")

    return X_, Xdot_
def project_and_save_data(Q, t, V):
    """Project preprocessed snapshots to a low-dimensional subspace.

    Parameters
    ----------
    Q : (NUM_ROMVARS*DOF,trainsize) ndarray
        Preprocessed snapshot data to be projected.

    t : (trainsize,) ndarray
        Time domain corresponding to the snapshots.

    V : (NUM_ROMVARS*DOF,r) ndarray
        POD basis of rank r.

    Returns
    -------
    Q_ : (r,trainsize) ndarray
        Projected snapshots.

    Qdot_ : (r,trainsize) ndarray
        Time derivatives of projected snapshots.
    """
    # Validate arguments.
    if Q.shape[1] != t.shape[0]:
        raise ValueError("training_data and time_domain not aligned")

    # Verify that the time domain is uniformly spaced with spacing config.DT.
    dt = t[1] - t[0]
    if not np.allclose(np.diff(t), dt):
        raise ValueError("t not uniformly spaced")
    if not np.isclose(dt, config.DT):
        raise ValueError("t spacing != config.DT")

    # Project the snapshot data.
    with utils.timed_block(f"Projecting snapshots to a {V.shape[1]:d}"
                           "-dimensional linear subspace"):
        Q_ = V.T @ Q

    # Compute time derivative data.
    with utils.timed_block("Approximating time derivatives "
                           "of projected snapshots"):
        Qdot_ = opinf.pre.xdot_uniform(Q_, dt, order=4)

    # Save the projected training data.
    save_path = config.projected_data_path(Q.shape[1])
    with utils.timed_block("Saving projected data"):
        with h5py.File(save_path, 'w') as hf:
            hf.create_dataset("data", data=Q_)
            hf.create_dataset("ddt", data=Qdot_)
            hf.create_dataset("time", data=t)
    logging.info(f"Projected data saved to {save_path}.\n")

    return Q_, Qdot_
def scale_and_save_data(trainsize, lifted_data, time_domain, center=False):
    """Scale lifted snapshots (by variable) and save the scaled snapshots.

    Parameters
    ----------
    trainsize : int
        Number of snapshots to scale and save.

    lifted_data : (NUM_ROMVARS*DOF, k>trainsize) ndarray
        Lifted snapshots to scale and then save.

    time_domain : (k>trainsize,) ndarray
        The time domain corresponding to the lifted snapshots.

    center : bool
        If True, center the scaled snapshots by the mean scaled snapshot
        before computing the POD basis. Default False (no shift).

    Returns
    -------
    training_data : (NUM_ROMVARS*DOF, trainsize) ndarray
        Scaled, shifted snapshots to use as training data for the basis.

    qbar : (NUM_ROMVARS*DOF,) ndarray
        Mean snapshot of the scaled training data. All zeros if center=False.

    scales : (NUM_ROMVARS,2) ndarray
        Info on how the snapshot data was scaled.
    """
    # Scale the learning variables to the bounds in config.SCALE_TO.
    with utils.timed_block(f"Scaling {trainsize:d} lifted snapshots"):
        training_data, scales = dproc.scale(lifted_data[:, :trainsize].copy())

    # Shift the scaled data by the mean snapshot.
    if center:
        with utils.timed_block("Shifting scaled snapshots by mean"):
            qbar = np.mean(training_data, axis=1)  # Compute mean snapshot.
            training_data -= qbar.reshape((-1, 1))  # Shift columns by mean.
    else:
        qbar = np.zeros(training_data.shape[0])

    # Save the lifted, scaled training data.
    save_path = config.scaled_data_path(trainsize)
    with utils.timed_block("Saving scaled, lifted training data"):
        with h5py.File(save_path, 'w') as hf:
            hf.create_dataset("data", data=training_data)
            hf.create_dataset("time", data=time_domain[:trainsize])
            hf.create_dataset("mean", data=qbar)
            hf.create_dataset("scales", data=scales)
    logging.info(f"Processed data saved to {save_path}.\n")

    return training_data, qbar, scales
Exemple #8
0
def compute_and_save_pod_basis(trainsize, num_modes, training_data, scales):
    """Compute and save the POD basis via a randomized SVD.

    Parameters
    ----------
    trainsize : int
        Number of snapshots to use in computing the basis.

    num_modes : list(int) or int
        Number of POD modes to compute.

    training_data : (NUM_ROMVARS*DOF,trainsize) ndarray
        Training snapshots to take the SVD of.

    scales : (NUM_ROMVARS,2) ndarray
        Info on how the snapshot data was scaled.

    Returns
    -------
    V : (NUM_ROMVARS*DOF,r) ndarray
        POD basis of rank r = max(num_modes).

    svdvals : (r,) ndarray
        Singular values corresponding to the POD modes.
    """
    if trainsize != training_data.shape[1]:
        raise ValueError("trainsize and training_data not aligned")

    if np.isscalar(num_modes):
        num_modes = [int(num_modes)]

    # Compute the randomized SVD from the training data.
    rmax = max(num_modes)
    with utils.timed_block(f"Computing {rmax}-component randomized SVD"):
        V, svdvals = roi.pre.pod_basis(training_data,
                                       r=rmax,
                                       mode="randomized",
                                       n_iter=15,
                                       random_state=42)
    # Save the POD basis.
    for r in num_modes:
        save_path = config.basis_path(trainsize, r)
        with utils.timed_block(f"Saving POD basis of rank {r}"):
            with h5py.File(save_path, 'w') as hf:
                hf.create_dataset("V", data=V[:, :r])
                hf.create_dataset("svdvals", data=svdvals[:r])
        logging.info(f"POD basis of rank {r} saved to {save_path}.\n")

    return V, svdvals
Exemple #9
0
def train_single(trainsize, r, regs):
    """Train and save a ROM with the given dimension and regularization
    hyperparameters.

    Parameters
    ----------
    trainsize : int
        Number of snapshots to use to train the ROM.

    r : int
        Dimension of the desired ROM. Also the number of retained POD modes
        (left singular vectors) used to project the training data.

    regs : two positive floats
        Regularization hyperparameters (non-quadratic, quadratic) to use in
        the Operator Inference least-squares problem for training the ROM.
    """
    utils.reset_logger(trainsize)

    # Validate inputs.
    d = check_lstsq_size(trainsize, r)
    λ1, λ2 = check_regs(regs)

    # Load training data.
    Q_, Qdot_, t = utils.load_projected_data(trainsize, r)
    U = config.U(t)

    # Train and save the ROM.
    with utils.timed_block(f"Training ROM with k={trainsize:d}, "
                           f"r={r:d}, λ1={λ1:.0f}, λ2={λ2:.0f}"):
        rom = roi.InferredContinuousROM(config.MODELFORM)
        rom.fit(None, Q_, Qdot_, U, P=regularizer(r, d, λ1, λ2))
        save_trained_rom(trainsize, r, regs, rom)
def train_single(trainsize, r, regs):
    """Train and save a ROM with the given dimension and regularization
    hyperparameters.

    Parameters
    ----------
    trainsize : int
        Number of snapshots to use to train the ROM.

    r : int
        Dimension of the desired ROM. Also the number of retained POD modes
        (left singular vectors) used to project the training data.

    regs : two or three non-negative floats
        Regularization hyperparameters (first-order, quadratic, cubic) to use
        in the Operator Inference least-squares problem for training the ROM.
    """
    utils.reset_logger(trainsize)

    # Validate inputs.
    modelform = get_modelform(regs)
    check_lstsq_size(trainsize, r, modelform)
    check_regs(regs)

    # Load training data.
    Q_, Qdot_, t = utils.load_projected_data(trainsize, r)
    U = config.U(t)

    # Train and save the ROM.
    with utils.timed_block(f"Training ROM with k={trainsize:d}, "
                           f"{config.REGSTR(regs)}"):
        rom = opinf.InferredContinuousROM(modelform)
        rom.fit(None, Q_, Qdot_, U, P=regularizer(r, *list(regs)))
        save_trained_rom(trainsize, r, regs, rom)
def train_and_save_all(trainsize, num_modes, regs):
    """Train and save ROMs with the given dimension and regularization.

    Parameters
    ----------
    trainsize : int
        Number of snapshots to use to train the ROM(s).

    num_modes : int or list(int)
        Dimension of the ROM(s) to train, i.e., the number of retained POD
        modes (left singular vectors) used to project the training data.

    regs : float or list(float)
        regularization parameter(s) to use in the training.
    """
    utils.reset_logger(trainsize)

    logging.info(f"TRAINING {len(num_modes)*len(regs)} ROMS")
    for r in num_modes:
        # Load training data.
        X_, Xdot_, time_domain, _ = utils.load_projected_data(trainsize, r)

        # Evaluate inputs over the training time domain.
        Us = config.U(time_domain)

        # Train and save each ROM.
        for reg in regs:
            with utils.timed_block(f"Training ROM with r={r:d}, reg={reg:e}"):
                rom = train_rom(X_, Xdot_, Us, reg)
                if rom:
                    rom.save_model(config.rom_path(trainsize, r, reg),
                                   save_basis=False, overwrite=True)
def spatial_statistics(trainsize, r, regs):
    """Plot spatially averaged temperature and spacially itegrated (summed)
    species concentrations over the full time domain.

    Parameters
    ----------
    trainsize : int
        Number of snapshots used to train the ROM.

    r : int
        Dimension of the ROM.

    regs : two or three positive floats
        Regularization hyperparameters used to train the ROM.
    """
    # Load the true results.
    keys = [f"{var}_mean" for var in config.ROM_VARIABLES[:4]]
    keys += [f"{var}_sum" for var in config.SPECIES]
    feature_gems, t = utils.load_spatial_statistics(keys)
    keys = np.reshape(keys, (4, 2), order='F')

    # Load and simulate the ROM.
    t, V, qbar, scales, q_rom = simulate_rom(trainsize, r, regs)

    # Initialize the figure.
    fig, axes = plt.subplots(keys.shape[0],
                             keys.shape[1],
                             figsize=(9, 6),
                             sharex=True)

    # Calculate and plot the results.
    for ax, key in zip(axes.flat, keys.flat):
        with utils.timed_block("Reconstructing"):
            feature_rom = get_feature(key, q_rom, V, qbar, scales)
        ax.plot(t, feature_gems[key], lw=1, **config.GEMS_STYLE)
        ax.plot(t[:q_rom.shape[1]], feature_rom, lw=1, **config.ROM_STYLE)
        ax.axvline(t[trainsize], color='k')
        ax.set_ylabel(config.VARLABELS[key.split('_')[0]])
        ax.locator_params(axis='y', nbins=2)

    # Set titles, labels, ticks, and draw a single legend.
    for ax in axes[-1, :]:
        ax.set_xlim(t[0], t[-1])
        ax.set_xticks(np.arange(t[0], t[-1] + .001, .002))
        ax.set_xlabel("Time [s]", fontsize=12)
    axes[0, 0].set_title("Spatial Averages", fontsize=14)
    axes[0, 1].set_title("Spatial Integrals", fontsize=14)

    # Legend on the right.
    fig.tight_layout(rect=[0, 0, .85, 1])
    leg = axes[0, 0].legend(loc="center right",
                            fontsize=14,
                            bbox_to_anchor=(1, .5),
                            bbox_transform=fig.transFigure)
    for line in leg.get_lines():
        line.set_linewidth(2)

    utils.save_figure("statfeatures.pdf")
Exemple #13
0
def save_statistical_features():
    """Compute the spatial and temporal statistics (min, max, mean, etc.)
    for each variable and save them for later. This only needs to be done once.
    """
    # Load the full data set.
    gems_data, t = utils.load_gems_data()

    # Lift the data (convert to molar concentrations).
    with utils.timed_block("Lifting GEMS data"):
        lifted_data = dproc.lift(gems_data)

    # Compute statistical features.
    with utils.timed_block("Computing statistical features of variables"):
        mins, maxs, sums, stds, means = {}, {}, {}, {}, {}
        for var in config.ROM_VARIABLES:
            val = dproc.getvar(var, lifted_data)
            for axis, label in enumerate(["space", "time"]):
                name = f"{label}/{var}"
                print(f"\n\tmin_{label}({var})", end='..', flush=True)
                mins[name] = val.min(axis=axis)
                print(f"max_{label}({var})", end='..', flush=True)
                maxs[name] = val.max(axis=axis)
                print(f"sum_{label}({var})", end='..', flush=True)
                sums[name] = val.sum(axis=axis)
                print(f"std_{label}({var})", end='..', flush=True)
                stds[name] = val.std(axis=axis)
            means[f"space/{var}"] = sums[f"space/{var}"] / val.shape[0]
            means[f"time/{var}"] = sums[f"time/{var}"] / t.size

    # Save the data.
    data_path = config.statistical_features_path()
    with utils.timed_block("Saving statistical features"):
        with h5py.File(data_path, 'w') as hf:
            for var in config.ROM_VARIABLES:
                for prefix in ["space", "time"]:
                    name = f"{prefix}/{var}"
                    hf.create_dataset(f"{name}_min", data=mins[name])
                    hf.create_dataset(f"{name}_max", data=maxs[name])
                    hf.create_dataset(f"{name}_sum", data=sums[name])
                    hf.create_dataset(f"{name}_std", data=stds[name])
                    hf.create_dataset(f"{name}_mean", data=means[name])
            hf.create_dataset("t", data=t)
    logging.info(f"Statistical features saved to {data_path}")
Exemple #14
0
def test_scalers(lifted_data):
    """Test data_processing.scale() and data_processing.unscale(),
    including checking that they are inverses.
    """
    # Shift the test data (learning the scaling simultaneously).
    with utils.timed_block("Scaling lifted test data"):
        shifted_data, scales = dproc.scale(lifted_data.copy())
        assert np.allclose(scales[:, -2:], config.SCALE_TO)

    # Verify the scales and that the shift worked for each variable.
    with utils.timed_block("Verifying shift results with scales"):
        for i, v in enumerate(config.ROM_VARIABLES):
            s = slice(i * config.DOF, (i + 1) * config.DOF)
            if v in ["vx", "vy"]:
                assert -scales[i, 0] == scales[i, 1]
                assert scales[i, 1] == np.abs(lifted_data[s]).max()
                assert np.isclose(np.abs(shifted_data[s]).max(), 1)
            else:
                assert lifted_data[s].min() == scales[i, 0]
                assert lifted_data[s].max() == scales[i, 1]
                assert np.isclose(shifted_data[s].min(), scales[i, 2])
                assert np.isclose(shifted_data[s].max(), scales[i, 3])

    # Redo the shift with the given scales and compare the results.
    with utils.timed_block("Verifying repeat shift with given scales"):
        shifted_data2, _ = dproc.scale(lifted_data.copy(), scales)
        assert np.allclose(shifted_data2, shifted_data)

    # Undo the shift and compare the results.
    with utils.timed_block("Verifying inverse scaling"):
        unshifted_data = dproc.unscale(shifted_data, scales)
        assert np.allclose(unshifted_data, lifted_data)

    # Check the inverse property for a subset of the variables.
    with utils.timed_block("Repeating experiment with nontrivial varindices"):
        variables = np.random.choice(config.ROM_VARIABLES,
                                     size=4,
                                     replace=False)
        subset = np.vstack([dproc.getvar(v, lifted_data) for v in variables])
        shifted_subset, _ = dproc.scale(subset.copy(), scales, variables)
        unshifted_subset = dproc.unscale(shifted_subset, scales, variables)
        assert np.allclose(unshifted_subset, subset)
Exemple #15
0
def compute_and_save_pod_basis(num_modes, training_data, qbar, scales):
    """Compute and save the POD basis via a randomized SVD.

    Parameters
    ----------
    num_modes : int
        Number of POD modes to compute.

    training_data : (NUM_ROMVARS*DOF,trainsize) ndarray
        Training snapshots to take the SVD of.

    qbar : (NUM_ROMVARS*DOF,) ndarray
        Mean snapshot of the scaled training data.

    scales : (NUM_ROMVARS,2) ndarray
        Info on how the snapshot data was scaled.

    Returns
    -------
    V : (NUM_ROMVARS*DOF,r) ndarray
        POD basis of rank r.
    """
    # Compute the randomized SVD from the training data.
    with utils.timed_block(f"Computing {num_modes}-component randomized SVD"):
        V, svdvals = opinf.pre.pod_basis(training_data,
                                         r=num_modes,
                                         mode="randomized",
                                         n_iter=15,
                                         random_state=42)

    # Save the POD basis.
    save_path = config.basis_path(training_data.shape[1])
    with utils.timed_block("Saving POD basis"):
        with h5py.File(save_path, 'w') as hf:
            hf.create_dataset("basis", data=V)
            hf.create_dataset("svdvals", data=svdvals)
            hf.create_dataset("mean", data=qbar)
            hf.create_dataset("scales", data=scales)
    logging.info(f"POD bases of rank {num_modes} saved to {save_path}.\n")

    return V
def simulate_rom(trainsize, r, regs, steps=None):
    """Load everything needed to simulate a given ROM, run the simulation,
    and return the simulation results and everything needed to reconstruct
    the results in the original high-dimensional space.
    Raise an Exception if any of the ingredients are missing.

    Parameters
    ----------
    trainsize : int
        Number of snapshots used to train the ROM.

    r : int
        Dimension of the ROM.

    regs : two or three positive floats
        Regularization hyperparameters used to train the ROM.

    steps : int or None
        Number of time steps to simulate the ROM.

    Returns
    -------
    t : (nt,) ndarray
        Time domain corresponding to the ROM outputs.

    V : (NUM_ROMVARS*DOF,r) ndarray
        POD basis used to project the training data (and for reconstructing
        the full-order scaled predictions).

    qbar : (NUM_ROMVARS*DOF,) ndarray
        Mean snapshot that the training data was shifted by after scaling
        but before projection.

    scales : (NUM_ROMVARS,4) ndarray
        Information for how the data was scaled. See data_processing.scale().

    q_rom : (nt,r) ndarray
        Prediction results from the ROM.
    """
    # Load the time domain, basis, initial conditions, and trained ROM.
    t = utils.load_time_domain(steps)
    V, qbar, scales = utils.load_basis(trainsize, r)
    Q_, _, _ = utils.load_projected_data(trainsize, r)
    rom = utils.load_rom(trainsize, r, regs)

    # Simulate the ROM over the full time domain.
    with utils.timed_block(f"Simulating ROM with k={trainsize:d}, r={r:d}, "
                           f"{config.REGSTR(regs)} over full time domain"):
        q_rom = rom.predict(Q_[:, 0], t, config.U, method="RK45")

    return t, V, qbar, scales, q_rom
def simulate_rom(trainsize, r, reg, steps=None):
    """Load everything needed to simulate a given ROM, simulate the ROM,
    and return the simulation results and everything needed to reconstruct
    the results in the original high-dimensional space.
    Raise an Exception if any of the ingredients are missing.

    Parameters
    ----------
    trainsize : int
        Number of snapshots used to train the ROM.

    r : int
        Dimension of the ROM. This is also the number of retained POD
        modes (left singular vectors) used to project the training data.

    reg : float
        Regularization parameter used to train the ROM.

    steps : int or None
        Number of time steps to simulate the ROM.

    Returns
    -------
    t : (nt,) ndarray
        Time domain corresponding to the ROM outputs.

    V : (config*NUM_ROMVARS*config.DOF,r) ndarray
        POD basis used to project the training data (and for reconstructing
        the full-order scaled predictions).

    scales : (NUM_ROMVARS,4) ndarray
        Information for how the data was scaled. See data_processing.scale().

    x_rom : (nt,r) ndarray
        Prediction results from the ROM.
    """
    # Load the time domain, basis, initial conditions, and trained ROM.
    t = utils.load_time_domain(steps)
    V, _ = utils.load_basis(trainsize, r)
    X_, _, _, scales = utils.load_projected_data(trainsize, r)
    rom = utils.load_rom(trainsize, r, reg)

    # Simulate the ROM over the full time domain.
    with utils.timed_block(f"Simulating ROM with r={r:d}, "
                           f"reg={reg:e} over full time domain"):
        x_rom = rom.predict(X_[:, 0], t, config.U, method="RK45")

    return t, V, scales, x_rom
        def training_error_from_rom(log10reg):
            reg = 10**log10reg

            # Train the ROM on all training snapshots.
            with utils.timed_block(f"Testing ROM with r={r:d}, reg={reg:e}"):
                rom = train_rom(X_, Xdot_, Us[:trainsize], reg)
                if not rom:
                    return _MAXFUN

                # Simulate the ROM over the full domain.
                with np.warnings.catch_warnings():
                    np.warnings.simplefilter("ignore")
                    x_rom = rom.predict(X_[:,0], t, config.U, method="RK45")

                # Check for boundedness of solution.
                if not is_bounded(x_rom, B):
                    return _MAXFUN

                # Calculate integrated relative errors in the reduced space.
                return roi.post.Lp_error(X_, x_rom[:,:trainsize],
                                                   t[:trainsize])[1]
def save_best_trained_rom(trainsize, r, reg, rom):
    """Save the trained ROM with the specified attributes.

    Parameters
    ----------
    trainsize : int
        Number of snapshots used to train the ROM.

    r : int
        Dimension of the trained ROM, i.e., the number of retained POD
        modes (left singular vectors) used to project the training data.

    reg : float > 0
        Regularization parameter used in the training.

    rom : rom_operator_inference.InferredContinuousROM
        Actual trained ROM object. Must have a `save_model()` method.
    """
    save_path = config.rom_path(trainsize, r, reg)
    with utils.timed_block(f"Best regularization for r={r:d}: {reg:.0f}"):
        rom.save_model(save_path, save_basis=False, overwrite=True)
    logging.info(f"ROM saved to {save_path}")
    def training_error(log10reg):
        """Return the training error resulting from the regularization
        hyperparameters λ1 = λ2 = 10^log10reg. If the resulting model
        violates the POD bound, return "infinity".
        """
        λ = 10**log10reg

        # Train the ROM on all training snapshots.
        with utils.timed_block(f"Testing ROM with λ={λ:e}"):
            rom._evaluate_solver(λ)

            # Simulate the ROM over the full domain.
            with np.warnings.catch_warnings():
                np.warnings.simplefilter("ignore")
                q_rom = rom.predict(Q_[:, 0], t, config.U, method="RK45")

            # Check for boundedness of solution.
            if not is_bounded(q_rom, B):
                return _MAXFUN

            # Calculate integrated relative errors in the reduced space.
            return opinf.post.Lp_error(Q_, q_rom[:, :trainsize],
                                       t[:trainsize])[1]
def load_and_lift_gems_data(trainsize):
    """Lift raw GEMS training snapshots (columnwise) to the learning variables.

    Parameters
    ----------
    trainsize : int
        Number of snapshots to lift.

    Returns
    -------
    lifted_data : (NUM_ROMVARS*DOF,trainsize) ndarray
        The lifted snapshots.

    time_domain : (trainsize,) ndarray
        The time domain corresponding to the lifted snapshots.
    """
    # Load as many snapshots of GEMS training data as are needed.
    gems_data, time_domain = utils.load_gems_data(cols=trainsize)

    # Lift the training data to the learning variables.
    with utils.timed_block(f"Lifting {trainsize:d} GEMS snapshots"):
        lifted_data = dproc.lift(gems_data)

    return lifted_data, time_domain
def main(data_folder, overwrite=False, serial=False):
    """Extract snapshot data, in parallel, from the .tar files in the
    specified folder of the form Data_<first-snapshot>to<last-snapshot>.tar.

    Parameters
    ----------
    data_folder : str
        Path to the folder that contains the raw GEMS .tar data files,
        preferably as an absolute path (e.g., /path/to/folder).

    overwrite : bool
        If False and the snapshot matrix file exists, raise an error.
        If True, overwrite the existing snapshot matrix file if it exists.

    serial : bool
        If True, do the unpacking sequentially in 10,000 snapshot chunks.
        If False, do the unpacking in parallel with 10,000 snapshot chunks.
    """
    utils.reset_logger()

    # If it exists, copy the grid file to the Tecplot data directory.
    source = os.path.join(data_folder, config.GRID_FILE)
    if os.path.isfile(source):
        target = config.grid_data_path()
        with utils.timed_block(f"Copying {source} to {target}"):
            shutil.copy(source, target)
    else:
        logging.warning(f"Grid file {source} not found!")

    # Locate and sort raw .tar files.
    target_pattern = os.path.join(data_folder, "Data_*to*.tar")
    tarfiles = sorted(glob.glob(target_pattern))
    if not tarfiles:
        raise FileNotFoundError(target_pattern)

    # Get the snapshot indices corresponding to each file from the file names.
    starts, stops = [], []
    for i,tfile in enumerate(tarfiles):
        matches = re.findall(r"Data_(\d+)to(\d+).tar", tfile)
        if not matches:
            raise ValueError(f"file {tfile} not named with convention "
                             "Data_<first-snapshot>to<last-snapshot>.tar")
        start, stop = [int(d) for d in matches[0]]
        if i == 0:
            start0 = start  # Offset
        starts.append(start - start0)
        stops.append(stop + 1 - start0)

        if i > 0 and stops[i-1] != starts[i]:
            raise ValueError(f"file {tfile} not continuous from previous set")
    num_snapshots = stops[-1]

    # Create an empty HDF5 file of appropriate size for the data.
    save_path = config.gems_data_path()
    if os.path.isfile(save_path) and not overwrite:
        raise FileExistsError(f"{save_path} (use --overwrite to overwrite)")
    with utils.timed_block("Initializing HDF5 file for data"):
        with h5py.File(save_path, 'w') as hf:
            hf.create_dataset("data", shape=(config.DOF*config.NUM_GEMSVARS,
                                             num_snapshots),
                                      dtype=np.float64)
            hf.create_dataset("time", shape=(num_snapshots,),
                                      dtype=np.float64)
    logging.info(f"Data file initialized as {save_path}.")

    # Read the files in chunks.
    args = zip(tarfiles, starts, stops)
    if serial:       # Read the files serially (sequentially).
        for tf, start, stop in args:
            _read_tar_and_save_data(tf, start, stop, parallel=False)
    else:            # Read the files in parallel.
        with mp.Pool(initializer=_globalize_lock, initargs=(mp.Lock(),),
                     processes=min([len(tarfiles), mp.cpu_count()])) as pool:
            pool.starmap(_read_tar_and_save_data, args)
Exemple #23
0
def errors_in_time(trainsize, r, regs, cutoff=60000):
    """Plot spatially averaged errors, and the projection error, in time.

    Parameters
    ----------
    trainsize : int
        Number of snapshots used to train the ROM.

    r : int
        Dimension of the ROM.

    regs : two positive floats
        Regularization hyperparameters used to train the ROM.

    cutoff : int
        Numer of time steps to plot.
    """
    # Load and simulate the ROM.
    t, V, scales, q_rom = simulate_rom(trainsize, r, regs, cutoff)

    # Load and lift the true results.
    data, _ = utils.load_gems_data(cols=cutoff)
    with utils.timed_block("Lifting GEMS data"):
        data_gems = dproc.lift(data[:, :cutoff])
    del data

    # Shift and project the data (unscaling done later by chunk).
    with utils.timed_block("Projecting GEMS data to POD subspace"):
        data_shifted, _ = dproc.scale(data_gems.copy(), scales)
        data_proj = V.T @ data_shifted
        del data_shifted

    # Initialize the figure.
    fig, axes = plt.subplots(3, 3, figsize=(12, 6), sharex=True)

    # Compute and plot errors in each variable.
    for var, ax in zip(config.ROM_VARIABLES, axes.flat):

        with utils.timed_block(f"Reconstructing results for {var}"):
            Vvar = dproc.getvar(var, V)
            gems_var = dproc.getvar(var, data_gems)
            proj_var = dproc.unscale(Vvar @ data_proj, scales, var)
            pred_var = dproc.unscale(Vvar @ q_rom, scales, var)

        with utils.timed_block(f"Calculating error in {var}"):
            denom = np.abs(gems_var).max(axis=0)
            proj_error = np.mean(np.abs(proj_var - gems_var), axis=0) / denom
            pred_error = np.mean(np.abs(pred_var - gems_var), axis=0) / denom

        # Plot results.
        ax.plot(t,
                proj_error,
                '-',
                lw=1,
                label="Projection Error",
                c=config.GEMS_STYLE['color'])
        ax.plot(t,
                pred_error,
                '-',
                lw=1,
                label="ROM Error",
                c=config.ROM_STYLE['color'])
        ax.axvline(t[trainsize], color='k')
        ax.set_ylabel(config.VARTITLES[var])

    # Format the figure.
    for ax in axes[-1, :]:
        ax.set_xlim(t[0], t[-1])
        ax.set_xticks(np.arange(t[0], t[-1] + .001, .002))
        ax.set_xlabel("Time [s]", fontsize=12)

    # Make legend centered below the subplots.
    fig.tight_layout(rect=[0, .1, 1, 1])
    leg = axes[0, 0].legend(ncol=2,
                            fontsize=14,
                            loc="lower center",
                            bbox_to_anchor=(.5, 0),
                            bbox_transform=fig.transFigure)
    for line in leg.get_lines():
        line.set_linestyle('-')
        line.set_linewidth(5)

    # Save the figure.
    utils.save_figure(f"errors"
                      f"_{config.TRNFMT(trainsize)}"
                      f"_{config.DIMFMT(r)}"
                      f"_{config.REGFMT(regs)}.pdf")
Exemple #24
0
def point_traces(trainsize, r, regs, elems, cutoff=60000):
    """Plot the time trace of each variable in the original data at the monitor
    location, and the time trace of each variable of the ROM reconstruction at
    the same locations. One figure is generated per variable.

    Parameters
    ----------
    trainsize : int
        Number of snapshots used to train the ROM.

    r : int
        Dimension of the ROM.

    regs : two positive floats
        Regularization hyperparameters used to train the ROM.

    elems : list(int) or ndarray(int)
        Indices in the spatial domain at which to compute the time traces.

    cutoff : int
        Numer of time steps to plot.
    """
    if elems is None:
        elems = config.MONITOR_LOCATIONS

    # Get the indicies for each variable.
    elems = np.atleast_1d(elems)
    nelems = elems.size
    nrows = (nelems // 2) + (1 if nelems % 2 != 0 else 0)
    elems = np.concatenate(
        [elems + i * config.DOF for i in range(config.NUM_ROMVARS)])

    # Load and lift the true results.
    data, _ = utils.load_gems_data(rows=elems[:nelems * config.NUM_GEMSVARS])
    with utils.timed_block("Lifting GEMS time trace data"):
        traces_gems = dproc.lift(data[:, :cutoff])

    # Load and simulate the ROM.
    t, V, scales, q_rom = simulate_rom(trainsize, r, regs, cutoff)

    # Reconstruct and rescale the simulation results.
    simend = q_rom.shape[1]
    with utils.timed_block("Reconstructing simulation results"):
        traces_rom = dproc.unscale(V[elems] @ q_rom, scales)

    # Save a figure for each variable.
    xticks = np.arange(t[0], t[-1] + .001, .002)
    for i, var in enumerate(config.ROM_VARIABLES):
        fig, axes = plt.subplots(nrows,
                                 2 if nelems > 1 else 1,
                                 figsize=(9, 3 * nrows),
                                 sharex=True)
        axes = np.atleast_2d(axes)
        for j, ax in enumerate(axes.flat):
            idx = j + i * nelems
            ax.plot(t, traces_gems[idx, :], lw=1, **config.GEMS_STYLE)
            ax.plot(t[:simend], traces_rom[idx, :], lw=1, **config.ROM_STYLE)
            ax.axvline(t[trainsize], color='k', lw=1)
            ax.set_xlim(t[0], t[-1])
            ax.set_xticks(xticks)
            ax.set_title(f"Location ${j+1}$", fontsize=12)
            ax.locator_params(axis='y', nbins=2)
        for ax in axes[-1, :]:
            ax.set_xlabel("Time [s]", fontsize=12)
        for ax in axes[:, 0]:
            ax.set_ylabel(config.VARLABELS[var], fontsize=12)

        # Single legend to the right of the subplots.
        fig.tight_layout(rect=[0, 0, .85, 1])
        leg = axes[0, 0].legend(loc="center right",
                                fontsize=14,
                                bbox_to_anchor=(1, .5),
                                bbox_transform=fig.transFigure)
        for line in leg.get_lines():
            line.set_linewidth(2)

        # Save the figure.
        utils.save_figure("pointtrace"
                          f"_{config.TRNFMT(trainsize)}"
                          f"_{config.DIMFMT(r)}"
                          f"_{config.REGFMT(regs)}_{var}.pdf")
def main(timeindices,
         variables=None,
         snaptype=["gems", "rom", "error"],
         trainsize=None,
         r=None,
         reg=None):
    """Convert a snapshot in .h5 format to a .dat file that matches the format
    of grid.dat. The new file is saved in `config.tecplot_path()` with the same
    filename and the new file extension .dat.

    Parameters
    ----------
    timeindices : ndarray(int) or int
        Indices (one-based) in the full time domain of the snapshots to save.

    variables : str or list(str)
        The variables to scale, a subset of config.ROM_VARIABLES.
        Defaults to all variables.

    snaptype : {"rom", "gems", "error"} or list(str)
        Which kinds of snapshots to save. Options:
        * "gems": snapshots from the full-order GEMS data;
        * "rom": reconstructed snapshots produced by a ROM;
        * "error": absolute error between the full-order data
                   and the reduced-order reconstruction.
        If "rom" or "error" are selected, the ROM is selected by the
        remaining arguments.

    trainsize : int
        Number of snapshots used to train the ROM.

    r : int
        Number of retained modes in the ROM.

    reg : float
        Regularization factor used to train the ROM.
    """
    utils.reset_logger(trainsize)

    # Parse parameters.
    timeindices = np.sort(np.atleast_1d(timeindices))
    simtime = timeindices.max()
    t = utils.load_time_domain(simtime + 1)

    # Parse the variables.
    if variables is None:
        variables = config.ROM_VARIABLES
    elif isinstance(variables, str):
        variables = [variables]
    varnames = '\n'.join(f'"{v}"' for v in variables)

    if isinstance(snaptype, str):
        snaptype = [snaptype]
    for stype in snaptype:
        if stype not in ("gems", "rom", "error"):
            raise ValueError(f"invalid snaptype '{stype}'")

    # Read the grid file.
    with utils.timed_block("Reading Tecplot grid data"):
        # Parse the header.
        grid_path = config.grid_data_path()
        with open(grid_path, 'r') as infile:
            grid = infile.read()
        if int(re.findall(r"Elements=(\d+)", grid)[0]) != config.DOF:
            raise RuntimeError(f"{grid_path} DOF and config.DOF do not match")
        num_nodes = int(re.findall(r"Nodes=(\d+)", grid)[0])
        end_of_header = re.findall(r"DT=.*?\n", grid)[0]
        headersize = grid.find(end_of_header) + len(end_of_header)

        # Extract geometry information.
        grid_data = grid[headersize:].split()
        x = grid_data[:num_nodes]
        y = grid_data[num_nodes:2 * num_nodes]
        cell_volume = grid_data[2 * num_nodes:3 * num_nodes]
        connectivity = grid_data[3 * num_nodes:]

    # Extract full-order data if needed.
    if ("gems" in snaptype) or ("error" in snaptype):
        gems_data, _ = utils.load_gems_data(cols=timeindices)
        with utils.timed_block("Lifting selected snapshots of GEMS data"):
            lifted_data = dproc.lift(gems_data)
            true_snaps = np.concatenate(
                [dproc.getvar(v, lifted_data) for v in variables])
    # Simulate ROM if needed.
    if ("rom" in snaptype) or ("error" in snaptype):
        # Load the SVD data.
        V, _ = utils.load_basis(trainsize, r)

        # Load the initial conditions and scales.
        X_, _, _, scales = utils.load_projected_data(trainsize, r)

        # Load the appropriate ROM.
        rom = utils.load_rom(trainsize, r, reg)

        # Simulate the ROM over the time domain.
        with utils.timed_block(f"Simulating ROM with r={r:d}, reg={reg:.0e}"):
            x_rom = rom.predict(X_[:, 0], t, config.U, method="RK45")
            if np.any(np.isnan(x_rom)) or x_rom.shape[1] < simtime:
                raise ValueError("ROM unstable!")

        # Reconstruct the results (only selected variables / snapshots).
        with utils.timed_block("Reconstructing simulation results"):
            x_rec = dproc.unscale(V[:, :r] @ x_rom[:, timeindices], scales)
            x_rec = np.concatenate([dproc.getvar(v, x_rec) for v in variables])

    dsets = {}
    if "rom" in snaptype:
        dsets["rom"] = x_rec
    if "gems" in snaptype:
        dsets["gems"] = true_snaps
    if "error" in snaptype:
        with utils.timed_block("Computing absolute error of reconstruction"):
            abs_err = np.abs(true_snaps - x_rec)
        dsets["error"] = abs_err

    # Save each of the selected snapshots in Tecplot format matching grid.dat.
    for j, tindex in enumerate(timeindices):

        header = HEADER.format(varnames, tindex, t[tindex], num_nodes,
                               config.DOF,
                               len(variables) + 2, "SINGLE " * len(variables))
        for label, dset in dsets.items():

            if label == "gems":
                save_path = config.gems_snapshot_path(tindex)
            if label in ("rom", "error"):
                folder = config.rom_snapshot_path(trainsize, r, reg)
                save_path = os.path.join(folder, f"{label}_{tindex:05d}.dat")
            with utils.timed_block(f"Writing {label} snapshot {tindex:05d}"):
                with open(save_path, 'w') as outfile:
                    # Write the header.
                    outfile.write(header)

                    # Write the geometry data (x,y coordinates).
                    for i in range(0, len(x), NCOLS):
                        outfile.write(' '.join(x[i:i + NCOLS]) + '\n')
                    for i in range(0, len(y), NCOLS):
                        outfile.write(' '.join(y[i:i + NCOLS]) + '\n')

                    # Write the data for each variable.
                    for i in range(0, dset.shape[0], NCOLS):
                        row = ' '.join(f"{v:.9E}"
                                       for v in dset[i:i + NCOLS, j])
                        outfile.write(row + '\n')

                    # Write connectivity information.
                    for i in range(0, len(connectivity), NCOLS):
                        outfile.write(' '.join(connectivity[i:i + NCOLS]) +
                                      '\n')
def _train_minimize_1D(trainsize, r, regs, testsize=None, margin=1.1):
    """Train ROMs with the given dimension(s), saving only the ROM with
    the least training error that satisfies a bound on the integrated POD
    coefficients, using a search algorithm to choose the regularization
    parameter.

    Parameters
    ----------
    trainsize : int
        Number of snapshots to use to train the ROM.

    r : int
        Dimension of the desired ROM. Also the number of retained POD modes
        (left singular vectors) used to project the training data.

    regs : two non-negative floats
        Bounds for the (single) regularization hyperparameter to use in the
        Operator Inference least-squares problem for training the ROM.

    testsize : int
        Number of time steps for which a valid ROM must satisfy the POD bound.

    margin : float ≥ 1
        Amount that the integrated POD coefficients of a valid ROM are allowed
        to deviate in magnitude from the maximum magnitude of the training
        data Q, i.e., bound = margin * max(abs(Q)).
    """
    utils.reset_logger(trainsize)

    # Parse aguments.
    check_lstsq_size(trainsize, r, modelform="cAHB")
    log10regs = np.log10(regs)

    # Load training data.
    t = utils.load_time_domain(testsize)
    Q_, Qdot_, _ = utils.load_projected_data(trainsize, r)
    U = config.U(t[:trainsize])

    # Compute the bound to require for integrated POD modes.
    B = margin * np.abs(Q_).max()

    # Create a solver mapping regularization hyperparameters to operators.
    with utils.timed_block(f"Constructing least-squares solver, r={r:d}"):
        rom = opinf.InferredContinuousROM("cAHB")
        rom._construct_solver(None, Q_, Qdot_, U, 1)

    # Test each regularization hyperparameter.
    def training_error(log10reg):
        """Return the training error resulting from the regularization
        hyperparameters λ1 = λ2 = 10^log10reg. If the resulting model
        violates the POD bound, return "infinity".
        """
        λ = 10**log10reg

        # Train the ROM on all training snapshots.
        with utils.timed_block(f"Testing ROM with λ={λ:e}"):
            rom._evaluate_solver(λ)

            # Simulate the ROM over the full domain.
            with np.warnings.catch_warnings():
                np.warnings.simplefilter("ignore")
                q_rom = rom.predict(Q_[:, 0], t, config.U, method="RK45")

            # Check for boundedness of solution.
            if not is_bounded(q_rom, B):
                return _MAXFUN

            # Calculate integrated relative errors in the reduced space.
            return opinf.post.Lp_error(Q_, q_rom[:, :trainsize],
                                       t[:trainsize])[1]

    opt_result = opt.minimize_scalar(training_error,
                                     method="bounded",
                                     bounds=log10regs)
    if opt_result.success and opt_result.fun != _MAXFUN:
        λ = 10**opt_result.x
        with utils.timed_block(f"Best regularization for k={trainsize:d}, "
                               f"r={r:d}: λ={λ:.0f}"):
            rom._evaluate_solver(λ)
            save_trained_rom(trainsize, r, (λ, λ), rom)
    else:
        message = "Regularization search optimization FAILED"
        print(message)
        logging.info(message)
def train_gridsearch(trainsize, r, regs, testsize=None, margin=1.1):
    """Train ROMs with the given dimension over a grid of potential
    regularization hyperparameters, saving only the ROM with the least
    training error that satisfies a bound on the integrated POD coefficients.

    Parameters
    ----------
    trainsize : int
        Number of snapshots to use to train the ROM.

    r : int
        Dimension of the desired ROM. Also the number of retained POD modes
        (left singular vectors) used to project the training data.

    regs : (float, float, int, float, float, int)
        Bounds and sizes for the grid of regularization hyperparameters.
        First-order: search in [regs[0], regs[1]] at regs[2] points.
        Quadratic:   search in [regs[3], regs[4]] at regs[5] points.
        Cubic:       search in [regs[6], regs[7]] at regs[8] points.

    testsize : int
        Number of time steps for which a valid ROM must satisfy the POD bound.

    margin : float ≥ 1
        Amount that the integrated POD coefficients of a valid ROM are allowed
        to deviate in magnitude from the maximum magnitude of the training
        data Q, i.e., bound = margin * max(abs(Q)).
    """
    utils.reset_logger(trainsize)

    # Parse aguments.
    if len(regs) not in [6, 9]:
        raise ValueError("6 or 9 regs required (bounds / sizes of grids")
    grids = []
    for i in range(0, len(regs), 3):
        check_regs(regs[i:i + 2])
        grids.append(
            np.logspace(np.log10(regs[i]), np.log10(regs[i + 1]),
                        int(regs[i + 2])))
    modelform = get_modelform(grids)
    d = check_lstsq_size(trainsize, r, modelform)

    # Load training data.
    t = utils.load_time_domain(testsize)
    Q_, Qdot_, _ = utils.load_projected_data(trainsize, r)
    U = config.U(t[:trainsize])

    # Compute the bound to require for integrated POD modes.
    M = margin * np.abs(Q_).max()

    # Create a solver mapping regularization hyperparameters to operators.
    num_tests = np.prod([grid.size for grid in grids])
    print(f"TRAINING {num_tests} ROMS")
    with utils.timed_block(f"Constructing least-squares solver, r={r:d}"):
        rom = opinf.InferredContinuousROM(modelform)
        rom._construct_solver(None, Q_, Qdot_, U, np.ones(d))

    # Test each regularization hyperparameter.
    errors_pass = {}
    errors_fail = {}
    for i, regs in enumerate(itertools.product(*grids)):
        with utils.timed_block(f"({i+1:d}/{num_tests:d}) Testing ROM with "
                               f"{config.REGSTR(regs)}"):
            # Train the ROM on all training snapshots.
            rom._evaluate_solver(regularizer(r, *list(regs)))

            # Simulate the ROM over the full domain.
            with np.warnings.catch_warnings():
                np.warnings.simplefilter("ignore")
                q_rom = rom.predict(Q_[:, 0], t, config.U, method="RK45")

            # Check for boundedness of solution.
            errors = errors_pass if is_bounded(q_rom, M) else errors_fail

            # Calculate integrated relative errors in the reduced space.
            if q_rom.shape[1] > trainsize:
                errors[tuple(regs)] = opinf.post.Lp_error(
                    Q_, q_rom[:, :trainsize], t[:trainsize])[1]

    # Choose and save the ROM with the least error.
    if not errors_pass:
        message = f"NO STABLE ROMS for r={r:d}"
        print(message)
        logging.info(message)
        return

    err2reg = {err: reg for reg, err in errors_pass.items()}
    regs = list(err2reg[min(err2reg.keys())])
    with utils.timed_block(f"Best regularization for k={trainsize:d}, "
                           f"r={r:d}: {config.REGSTR(regs)}"):
        rom._evaluate_solver(regularizer(r, *regs))
        save_trained_rom(trainsize, r, regs, rom)
Exemple #28
0
def basis(trainsize, r, variables=None):
    """Export the POD basis vectors to Tecplot format.

    Parameters
    ----------
    trainsize : int
        Number of snapshots used to compute the basis.

    r : int
        Number of basis vectors to save.

    variables : str or list(str)
        Variables to save, a subset of config.ROM_VARIABLES.
        Defaults to all variables.
    """
    utils.reset_logger(trainsize)

    if variables is None:
        variables = config.ROM_VARIABLES
    elif isinstance(variables, str):
        variables = [variables]
    varnames = '\n'.join(f'"{v}"' for v in variables)

    # Read the grid file.
    with utils.timed_block("Reading Tecplot grid data"):
        # Parse the header.
        grid_path = config.grid_data_path()
        with open(grid_path, 'r') as infile:
            grid = infile.read()
        if int(re.findall(r"Elements=(\d+)", grid)[0]) != config.DOF:
            raise RuntimeError(f"{grid_path} DOF and config.DOF do not match")
        num_nodes = int(re.findall(r"Nodes=(\d+)", grid)[0])
        end_of_header = re.findall(r"DT=.*?\n", grid)[0]
        headersize = grid.find(end_of_header) + len(end_of_header)

        # Extract geometry information.
        grid_data = grid[headersize:].split()
        x = grid_data[:num_nodes]
        y = grid_data[num_nodes:2 * num_nodes]
        # cell_volume = grid_data[2*num_nodes:3*num_nodes]
        connectivity = grid_data[3 * num_nodes:]

    # Load the basis and extract desired variables.
    V, _, _ = utils.load_basis(trainsize, r)
    V = np.concatenate([dproc.getvar(var, V) for var in variables])

    # Save each of the basis vectors in Tecplot format matching grid.dat.
    for j in range(r):
        header = HEADER.format(varnames, j, j, num_nodes, config.DOF,
                               len(variables) + 2, "DOUBLE " * len(variables))
        save_folder = config._makefolder(config.tecplot_path(), "basis",
                                         config.TRNFMT(trainsize))
        save_path = os.path.join(save_folder, f"vec_{j+1:03d}.dat")
        with utils.timed_block(f"Writing basis vector {j+1:d}"):
            with open(save_path, 'w') as outfile:
                # Write the header.
                outfile.write(header)

                # Write the geometry data (x,y coordinates).
                for i in range(0, len(x), NCOLS):
                    outfile.write(' '.join(x[i:i + NCOLS]) + '\n')
                for i in range(0, len(y), NCOLS):
                    outfile.write(' '.join(y[i:i + NCOLS]) + '\n')

                # Write the data for each variable.
                for i in range(0, V.shape[0], NCOLS):
                    row = ' '.join(f"{v:.9E}" for v in V[i:i + NCOLS, j])
                    outfile.write(row + '\n')

                # Write connectivity information.
                for i in range(0, len(connectivity), NCOLS):
                    outfile.write(' '.join(connectivity[i:i + NCOLS]) + '\n')
    print(f"Basis info exported to {save_folder}/*.dat.")
def _read_tar_and_save_data(tfile, start, stop, parallel=True):
    """Read snapshot data directly from a .tar archive (without untar-ing it)
    and copy the data to the snapshot matrix HDF5 file config.GEMS_DATA_FILE.

    Parameters
    ----------
    tfile : str
        Name of a .tar file to read data from.

    start : int
        Index of the first snapshot contained in the .tar file.

    stop : int
        Index of the last snapshot contained in the .tar file.

    parallel : bool
        If True, then only print progress if start == 0 and lock / unlock
        when writing to the HDF5 file.
    """
    # Allocate space for the snapshots in this .tar file.
    num_snapshots = stop - start
    gems_data = np.empty((config.DOF*config.NUM_GEMSVARS, num_snapshots),
                         dtype=np.float64)
    times = np.empty(num_snapshots, dtype=np.float64)

    # Extract the data from the .tar file.
    with tarfile.open(tfile, 'r') as archive:
        for j,tarinfo in enumerate(archive):

            # Read the contents of one file.
            with archive.extractfile(tarinfo) as datfile:
                contents = datfile.read().decode()

            # Get the simulation time from the file name.
            simtime = float(_SIMTIME.findall(tarinfo.name)[0]) * config.DT

            # Parse and verify the header.
            header_end = _HEADEREND.findall(contents)[0]
            headersize = contents.find(header_end) + len(header_end)
            if int(_ELEMENTS.findall(contents[:headersize])[0]) != config.DOF:
                raise RuntimeError(f"{tarinfo.name} DOF != config.DOF")

            # Extract and store the variable data.
            data = contents[headersize:].split()[:gems_data.shape[0]],
            gems_data[:,j] = np.array(data, dtype=np.float64)
            times[j] = simtime
            if start == 0 or not parallel:
                print(f"\rProcessed file {j+1:05d}/{num_snapshots}",
                      end='', flush=True)
    if start == 0 or not parallel:
        print()

    # Save the data to the appropriate slice.
    save_path = config.gems_data_path()
    if parallel:
        lock.acquire()  # Only allow one process to open the file at a time.
    with utils.timed_block(f"Saving snapshots {start}-{stop} to HDF5"):
        with h5py.File(save_path, 'a') as hf:
            hf["data"][:,start:stop] = gems_data
            hf["time"][  start:stop] = times
    print(f"Data saved to {save_path}.")
    if parallel:
        lock.release()  # Let other processes resume.
Exemple #30
0
def test_getvar(lifted_data):
    """Test data_processing.getvar()."""
    with utils.timed_block("Verifying variable extraction"):
        for i, v in enumerate(config.ROM_VARIABLES):
            s = slice(i * config.DOF, (i + 1) * config.DOF)
            assert np.all(dproc.getvar(v, lifted_data) == lifted_data[s])