def __init__(self,
              ref_spd: List,
              target_spd,
              averaging_prd='1H',
              coverage_threshold=0.9,
              preprocess=True):
     self.ref_spd = pd.concat(ref_spd, axis=1, join='inner')
     self.target_spd = target_spd
     self.averaging_prd = averaging_prd
     self.coverage_threshold = coverage_threshold
     self.preprocess = preprocess
     if preprocess:
         self.data = pd.concat(list(
             tf._preprocess_data_for_correlations(self.ref_spd,
                                                  self.target_spd,
                                                  averaging_prd,
                                                  coverage_threshold)),
                               axis=1,
                               join='inner')
     else:
         self.data = pd.concat(list(self.ref_spd, self.target_spd),
                               axis=1,
                               join='inner')
     self.data.columns = [
         'ref_spd_' + str(i + 1)
         for i in range(0, len(self.ref_spd.columns))
     ] + ['target_spd']
     self.data = self.data.dropna()
     self.params = 'not run yet'
Exemple #2
0
 def _averager(ref_spd, target_spd, averaging_prd, coverage_threshold, ref_dir, target_dir):
     data = pd.concat(list(tf._preprocess_data_for_correlations(
         ref_spd, target_spd, averaging_prd, coverage_threshold)),
         axis=1, join='inner')
     if ref_dir is not None and target_dir is not None:
         data = pd.concat([data] + list(tf._preprocess_dir_data_for_correlations(
             ref_spd, ref_dir, target_spd, target_dir, averaging_prd, coverage_threshold)),
                          axis=1, join='inner')
     return data
Exemple #3
0
def concurrent_coverage(ref,
                        target,
                        averaging_prd,
                        aggregation_method_target='mean'):
    """
    Accepts ref and target data and returns the coverage of concurrent data.

    :param ref: Reference data
    :type ref: pandas.Series
    :param target: Target data
    :type target: pandas.Series
    :param averaging_prd: Groups data by the period specified by period.

            * 2T, 2 min for minutely average
            * Set period to 1D for a daily average, 3D for three hourly average, similarly 5D, 7D, 15D etc.
            * Set period to 1H for hourly average, 3H for three hourly average and so on for 5H, 6H etc.
            * Set period to 1M for monthly average
            * Set period to 1AS fo annual average

    :type averaging_prd: str
    :param aggregation_method_target: (Optional) Calculates mean of the data for the given averaging_prd by default.
            Can be changed to 'sum', 'std', 'max', 'min', etc. or a user defined function
    :return: A DataFrame with concurrent coverage and resolution of the new data. The columns with coverage are named as
            <column name>_Coverage

    """
    coverage_df = tf._preprocess_data_for_correlations(
        ref=ref,
        target=target,
        averaging_prd=averaging_prd,
        coverage_threshold=0,
        aggregation_method_target=aggregation_method_target,
        get_coverage=True)
    coverage_df.columns = [
        "Coverage" if "_Coverage" in col else col
        for col in coverage_df.columns
    ]
    return coverage_df