def __init__(self, ref_spd: List, target_spd, averaging_prd='1H', coverage_threshold=0.9, preprocess=True): self.ref_spd = pd.concat(ref_spd, axis=1, join='inner') self.target_spd = target_spd self.averaging_prd = averaging_prd self.coverage_threshold = coverage_threshold self.preprocess = preprocess if preprocess: self.data = pd.concat(list( tf._preprocess_data_for_correlations(self.ref_spd, self.target_spd, averaging_prd, coverage_threshold)), axis=1, join='inner') else: self.data = pd.concat(list(self.ref_spd, self.target_spd), axis=1, join='inner') self.data.columns = [ 'ref_spd_' + str(i + 1) for i in range(0, len(self.ref_spd.columns)) ] + ['target_spd'] self.data = self.data.dropna() self.params = 'not run yet'
def _averager(ref_spd, target_spd, averaging_prd, coverage_threshold, ref_dir, target_dir): data = pd.concat(list(tf._preprocess_data_for_correlations( ref_spd, target_spd, averaging_prd, coverage_threshold)), axis=1, join='inner') if ref_dir is not None and target_dir is not None: data = pd.concat([data] + list(tf._preprocess_dir_data_for_correlations( ref_spd, ref_dir, target_spd, target_dir, averaging_prd, coverage_threshold)), axis=1, join='inner') return data
def concurrent_coverage(ref, target, averaging_prd, aggregation_method_target='mean'): """ Accepts ref and target data and returns the coverage of concurrent data. :param ref: Reference data :type ref: pandas.Series :param target: Target data :type target: pandas.Series :param averaging_prd: Groups data by the period specified by period. * 2T, 2 min for minutely average * Set period to 1D for a daily average, 3D for three hourly average, similarly 5D, 7D, 15D etc. * Set period to 1H for hourly average, 3H for three hourly average and so on for 5H, 6H etc. * Set period to 1M for monthly average * Set period to 1AS fo annual average :type averaging_prd: str :param aggregation_method_target: (Optional) Calculates mean of the data for the given averaging_prd by default. Can be changed to 'sum', 'std', 'max', 'min', etc. or a user defined function :return: A DataFrame with concurrent coverage and resolution of the new data. The columns with coverage are named as <column name>_Coverage """ coverage_df = tf._preprocess_data_for_correlations( ref=ref, target=target, averaging_prd=averaging_prd, coverage_threshold=0, aggregation_method_target=aggregation_method_target, get_coverage=True) coverage_df.columns = [ "Coverage" if "_Coverage" in col else col for col in coverage_df.columns ] return coverage_df