Ejemplo n.º 1
0
def save_statistical_features():
    """Compute the (spatial) mean temperatures on the full time domain and
    save them for later. This only needs to be done once.
    """
    # Load the full data set.
    gems_data, t = utils.load_gems_data()

    # Lift the data (convert to molar concentrations).
    with utils.timed_block("Lifting GEMS data"):
        lifted_data = dproc.lift(gems_data)

    # Compute statistical features.
    with utils.timed_block("Computing statistical features of variables"):
        mins, maxs, sums, stds, means = {}, {}, {}, {}, {}
        for var in config.ROM_VARIABLES:
            val = dproc.getvar(var, lifted_data)
            mins[var] = val.min(axis=0)
            maxs[var] = val.max(axis=0)
            sums[var] = val.sum(axis=0)
            stds[var] = val.std(axis=0)
            means[var] = sums[var] / val.shape[0]

    # Save the data.
    data_path = config.statistical_features_path()
    with utils.timed_block("Saving statistical features"):
        with h5py.File(data_path, 'w') as hf:
            for var in config.ROM_VARIABLES:
                hf.create_dataset(f"{var}_min", data=mins[var])
                hf.create_dataset(f"{var}_max", data=maxs[var])
                hf.create_dataset(f"{var}_sum", data=sums[var])
                hf.create_dataset(f"{var}_std", data=stds[var])
                hf.create_dataset(f"{var}_mean", data=means[var])
            hf.create_dataset("time", data=t)
    logging.info(f"Statistical features saved to {data_path}")
Ejemplo n.º 2
0
def load_statistical_features(keys, k=None):
    """Load statistical features of the lifted data, computed over the
    spatial domain at each point in time.

    Parameters
    ----------
    keys : list(str)
        Which data set(s) to load. Options:
        * {var}_min : minimum of variable var
        * {var}_max : maximum of variable var
        * {var}_sum : sum (integral) of variable var
        * {var}_std : standard deviation of variable var
        * {var}_mean : mean of variable var
        Here var is a member of config.ROM_VARIABLES. Examples:
        * "T_mean" -> mean temperature
        * "vx_min" -> minimum x-velocity
        * "CH4_sum" -> methane molar concentration integral

    k : int, slice, or one-dimensional ndarray of sorted integer indices
        Number of time steps of data to load (default all).

    Returns
    -------
    features : dict(str -> (k,) ndarray) or (k,) ndarray
        Dictionary of statistical feature arrays with keys `keys`.
        If only one key is given, return the actual array, not a dict.

    t : (k,) ndarray
        Time domain corresponding to the statistical features.
    """
    # Locate the data.
    data_path = _checkexists(config.statistical_features_path())

    # Parse arguments.
    if isinstance(keys, str):
        keys = [keys]
    elif keys is None:
        keys = ["T_mean"] + [f"{spc}_int" for spc in config.SPECIES]
    if np.isscalar(k) or k is None:
        k = slice(None, k)

    # Extract the data.
    features = {}
    with timed_block(f"Loading statistical features from {data_path}"):
        with h5py.File(data_path, 'r') as hf:
            if len(keys) == 1:
                return hf[keys[0]][k], hf["time"][k]
            else:
                return {key: hf[key][k] for key in keys}, hf["time"][k]
Ejemplo n.º 3
0
def save_statistical_features():
    """Compute the spatial and temporal statistics (min, max, mean, etc.)
    for each variable and save them for later. This only needs to be done once.
    """
    # Load the full data set.
    gems_data, t = utils.load_gems_data()

    # Lift the data (convert to molar concentrations).
    with utils.timed_block("Lifting GEMS data"):
        lifted_data = dproc.lift(gems_data)

    # Compute statistical features.
    with utils.timed_block("Computing statistical features of variables"):
        mins, maxs, sums, stds, means = {}, {}, {}, {}, {}
        for var in config.ROM_VARIABLES:
            val = dproc.getvar(var, lifted_data)
            for axis, label in enumerate(["space", "time"]):
                name = f"{label}/{var}"
                print(f"\n\tmin_{label}({var})", end='..', flush=True)
                mins[name] = val.min(axis=axis)
                print(f"max_{label}({var})", end='..', flush=True)
                maxs[name] = val.max(axis=axis)
                print(f"sum_{label}({var})", end='..', flush=True)
                sums[name] = val.sum(axis=axis)
                print(f"std_{label}({var})", end='..', flush=True)
                stds[name] = val.std(axis=axis)
            means[f"space/{var}"] = sums[f"space/{var}"] / val.shape[0]
            means[f"time/{var}"] = sums[f"time/{var}"] / t.size

    # Save the data.
    data_path = config.statistical_features_path()
    with utils.timed_block("Saving statistical features"):
        with h5py.File(data_path, 'w') as hf:
            for var in config.ROM_VARIABLES:
                for prefix in ["space", "time"]:
                    name = f"{prefix}/{var}"
                    hf.create_dataset(f"{name}_min", data=mins[name])
                    hf.create_dataset(f"{name}_max", data=maxs[name])
                    hf.create_dataset(f"{name}_sum", data=sums[name])
                    hf.create_dataset(f"{name}_std", data=stds[name])
                    hf.create_dataset(f"{name}_mean", data=means[name])
            hf.create_dataset("t", data=t)
    logging.info(f"Statistical features saved to {data_path}")
Ejemplo n.º 4
0
def main(trainsize,
         r,
         regs,
         elems=None,
         plotPointTrace=False,
         plotRelativeErrors=False,
         plotSpatialStatistics=False):
    """Make the indicated visualization.

    Parameters
    ----------
    trainsize : int
        Number of snapshots used to train the ROM.

    r : int
        Dimension of the ROM.

    regs : two positive floats
        Regularization hyperparameters used to train the ROM.

    elems : list(int) or ndarray(int)
        Indices in the spatial domain at which to compute time traces.
    """
    utils.reset_logger(trainsize)

    # Point traces in time.
    if plotPointTrace:
        logging.info("POINT TRACES")
        point_traces(trainsize, r, regs, elems)

    # Relative projection / prediction errors in time.
    if plotRelativeErrors:
        logging.info("ERRORS IN TIME")
        errors_in_time(trainsize, r, regs)

    # Spatial statistic in time.
    if plotSpatialStatistics:
        logging.info("SPATIAL STATISTICS")
        # Compute GEMS features if needed (only done once).
        if not os.path.isfile(config.statistical_features_path()):
            save_statistical_features()
        spatial_statistics(trainsize, r, regs)
Ejemplo n.º 5
0
def main(trainsize,
         r,
         reg,
         elems,
         plotTimeTrace=False,
         plotStatisticalFeatures=False):
    """Make the indicated visualization.

    Parameters
    ----------
    trainsize : int
        Number of snapshots used to train the ROM.

    r : int
        Dimension of the ROM. This is also the number of retained POD
        modes (left singular vectors) used to project the training data.

    reg : float
        The regularization parameters used to train each ROM.

    elems : list(int) or ndarray(int)
        Indices in the spatial domain at which to compute time traces.
    """
    utils.reset_logger(trainsize)

    # Time traces (single ROM, several monitoring locations).
    if plotTimeTrace:
        logging.info("TIME TRACES")
        time_traces(trainsize, r, reg, elems)

    # Statistical features (single ROM, several features).
    if plotStatisticalFeatures:
        logging.info("STATISTICAL FEATURES")
        # Compute GEMS features if needed (only done once).
        if not os.path.isfile(config.statistical_features_path()):
            save_statistical_features()
        statistical_features(trainsize, r, reg)
Ejemplo n.º 6
0
def load_temporal_statistics(keys):
    """Load statistical features of the lifted data, computed over the
    temporal domain at each spatial point.

    Parameters
    ----------
    keys : list(str)
        Which data set(s) to load. Options:
        * {var}_min : minimum of variable var
        * {var}_max : maximum of variable var
        * {var}_sum : sum (integral) of variable var
        * {var}_std : standard deviation of variable var
        * {var}_mean : mean of variable var
        Here var is a member of config.ROM_VARIABLES. Examples:
        * "T_mean" -> time-averaged temperature
        * "vx_min" -> minimum x-velocity
        * "CH4_sum" -> methane molar concentration time integral

    Returns
    -------
    features : dict(str -> (N,) ndarray) or (N,) ndarray
        Dictionary of statistical feature arrays with keys `keys`.
        If only one key is given, return the actual array, not a dict.
    """
    # Locate the data.
    data_path = _checkexists(config.statistical_features_path())

    # Parse arguments.
    if isinstance(keys, str):
        keys = [keys]

    # Extract the data.
    with timed_block(f"Loading statistical features from {data_path}"):
        with h5py.File(data_path, 'r') as hf:
            if len(keys) == 1:
                return hf[f"time/{keys[0]}"][:]
            return {key: hf[f"time/{key}"][:] for key in keys}