Ejemplo n.º 1
0
def generate_metrics(
    predictions: np.ndarray,
    targets: np.ndarray,
    plevel_predictions: Dict[str, np.ndarray],
    plevel_targets: Dict[str, np.ndarray],
    save_path: Union[os.PathLike, str]
) -> None:
    # Pressure Level Specific Metrics
    metrics = {
        "maes": mean_absolute_error(targets, predictions, multioutput="raw_values"),
        "rmse": mean_squared_error(targets, predictions, multioutput="raw_values", squared=False),
        "stds": np.std(targets, axis=1),
        "mins": np.min(targets, axis=1),
        "maxes": np.max(targets, axis=1),
        "means": np.mean(targets, axis=1),
        "medians": np.median(targets, axis=1),
    }

    metrics["r_squared"] = calculate_r_squared(
        test_predictions=plevel_predictions,
        test_targets=plevel_targets,
    )

    to_pickle(os.path.join(save_path, "metrics.pkl"), metrics)

    return metrics
Ejemplo n.º 2
0
def save_metadata(
    path: os.PathLike,
    total_samples: int,
    features: List[str],
    plevels: int,
    output_shape: Tuple[int],
    indx: List[int],
) -> None:
    """
    Pickles Metadata:
        input_shape: shape of train tensor for vertical column
        output_shape: shape of gwfu or gwfv tensor for vertical column
        indx: indicies that we used to shuffle
        total samples extracted
    """
    input_shape = len(features)
    num_vc_feat = 0
    for vc_feat in VERTICAL_COLUMN_FEATURES:
        if vc_feat in features:
            input_shape -= 1
            num_vc_feat += 1

    input_shape = input_shape*plevels + num_vc_feat
    to_pickle(
        path=path,
        obj={
            "total_samples": total_samples,
            "input_shape": input_shape,
            "output_shape": output_shape,
            "indx": indx,
        }
    )
Ejemplo n.º 3
0
def save_metadata(
    save_path: Union[os.PathLike, str],
    source_path: Union[os.PathLike, str],
    metadata: Any,
):
    prev_metadata = from_pickle(os.path.join(source_path, "metadata.pkl"))
    # Shallow Merge
    metadata = {**prev_metadata, **metadata}
    to_pickle(path=os.path.join(save_path, "metadata.pkl"), obj=metadata)
Ejemplo n.º 4
0
def extract_tensors(
    data: Data,
    save_path: Union[os.PathLike, str],
    num_samples: Union[int, None],
    plevels: int,
    batch_size: int,
) -> None:
    """
    Extracts feature tensors and target columns from raw data.

    Arguments:
    ----------
    data (Data):
    save_path (Union[os.PathLike, str]): path to save all files
    num_samples (Union[int, None]): number of samples to extract from data.
        If None, extracts all samples.
    plevels (int): number of pressure levels to include in tensors.
        Use to ignore low altitude pressure levels
    batch_size (int): number of samples to gather before writing to disk. Useful for
        environment with more memory or in time termination.

    Returns:
    --------
    None
    """
    raw_data = data.raw_data

    # If num_samples not set, default to using all data
    max_samples = data.time*data.lat*data.lon
    if not num_samples or num_samples > max_samples:
        logger.warning("Extracting all possible samples")
        num_samples = max_samples

    first_batch = True
    tensors, tensors_labels = (pd.DataFrame(), pd.DataFrame())
    targets_gwfu, targets_gwfv = (pd.DataFrame(), pd.DataFrame())
    for i in tqdm(range(num_samples), "Extracting Tensors"):
        t, lat, lon = np.unravel_index(i, (data.time, data.lat, data.lon))
        tensor, tensor_labels = (pd.DataFrame(), pd.DataFrame())

        for feat in TENSOR:
            if feat == "slp":
                # Labels
                labels = pd.DataFrame([f"slp_{t}_{lat}_{lon}"])
                tensor_labels = pd.concat([tensor_labels, labels], copy=False)

                # Vertical Column
                slp = pd.DataFrame(data=[raw_data[feat][t, lat, lon]])
                tensor = pd.concat([tensor, slp], copy=False)
            else:
                if feat in TRAIN_FEATURES:
                    # Labels
                    labels = pd.DataFrame([
                        f"{feat}_{t}_{plevel}_{lat}_{lon}"
                        for plevel in range(0, plevels)
                    ])
                    vertical_column = pd.DataFrame(data=raw_data[feat][t, :plevels, lat, lon])

                    tensor = pd.concat([tensor, vertical_column], copy=False)
                    tensor_labels = pd.concat([tensor_labels, labels], copy=False)
                elif feat in TARGET_FEATURES:
                    # Vertical Column
                    vertical_column = pd.DataFrame(
                        data=raw_data[feat][t, :NON_ZERO_GWD_PLEVELS, lat, lon],
                    )
                    if feat == "gwfu_cgwd":
                        targets_gwfu = pd.concat([targets_gwfu, vertical_column], axis=1)
                    else:
                        targets_gwfv = pd.concat([targets_gwfv, vertical_column], axis=1)
                else:
                    logger.warning("Unused attribute")


        # Concat tensors to batch
        tensors = pd.concat([tensors, tensor], axis=1)
        tensors_labels = pd.concat([tensors_labels, tensor_labels], axis=1)

        if tensors.shape[1] == batch_size:
            save_batch(
                tensors=tensors,
                labels=tensors_labels,
                targets_gwfu=targets_gwfu,
                targets_gwfv=targets_gwfv,
                save_path=save_path,
                include_header=first_batch
            )
            if first_batch:
                to_pickle(
                    path=os.path.join(save_path, "metadata.pkl"),
                    obj={
                        "total_samples": num_samples,
                        "input_shape": tensors.iloc[:,0].shape,
                        "output_shape": targets_gwfu.iloc[:,0].shape,
                    }
                )
            first_batch=False
            tensors, tensors_labels = (pd.DataFrame(), pd.DataFrame())
            targets_gwfu, targets_gwfv = (pd.DataFrame(), pd.DataFrame())
Ejemplo n.º 5
0
    def __init__(
        self,
        source_path: Union[os.PathLike, str],
        scaler_path: Union[os.PathLike, str],
        num_samples: Union[None, float],
        target: str,
        remove_outliers: Union[str, float],
        save_path: Union[os.PathLike, str],
        model,
        evaluate_with_random: bool = False,
    ) -> None:

        test_tensors_fp = os.path.join(source_path, "tensors.csv")
        test_targets_fp = os.path.join(source_path, f"{target}.csv")

        # Get Scalers
        tensors_scaler_fp = os.path.join(scaler_path, "tensors_scaler.pkl")
        tensors_scaler = from_pickle(tensors_scaler_fp)

        target_scaler_fp = os.path.join(scaler_path, f"{target}_scaler.pkl")
        target_scaler = from_pickle(target_scaler_fp)

        self.predictions = []
        self.targets = []
        chunksize = 100000
        num_total_predictions = 0
        if num_samples is not None and int(num_samples) < chunksize:
            num_samples = int(num_samples)
            chunksize = num_samples

        for test_tensors, test_targets in tqdm(
                zip(
                    pd.read_csv(test_tensors_fp,
                                header=None,
                                chunksize=chunksize),
                    pd.read_csv(test_targets_fp,
                                header=None,
                                chunksize=chunksize),
                ), "Load test data"):
            if num_samples is not None and num_total_predictions >= int(
                    num_samples):
                break

            test_tensors = test_tensors.to_numpy()
            test_targets = test_targets.to_numpy()

            # Transform Targets
            test_tensors = tensors_scaler.transform(test_tensors)
            if evaluate_with_random:
                test_tensors = np.random.normal(loc=0.0,
                                                scale=1.0,
                                                size=test_tensors.shape)

            self.targets.append(test_targets)
            self.predictions.append(
                self.predict(
                    model=model,
                    tensors=test_tensors,
                    target_scaler=target_scaler,
                ))

            num_total_predictions += chunksize

        self.predictions = np.concatenate(np.array(self.predictions), axis=0)
        self.targets = np.concatenate(np.array(self.targets), axis=0)

        # Removes outliers and returns dictionary keyed on each pressure level
        self.plevel_predictions, self.plevel_targets = self.split_predictions_on_plevel(
            predictions=self.predictions,
            targets=self.targets,
            outliers=remove_outliers,
        )

        # Save unaltered predictions and targets
        to_pickle(path=os.path.join(save_path, "predictions.pkl"),
                  obj={
                      "predictions": self.predictions,
                      "targets": self.targets,
                  })