def combinations_with_pearson(map_array, reference_series, combination_func, measures, labels, scaling_func=comp.binning_values_to_quantiles, level=0): """ Return the combinations of values from different similarity measures with the of Pearson's Correlation. The combination_func defines how the values are combined. Before the values are combined with the absolute values of Pearson's Correlation, they are scaled to make value ranges combinable (default: binned in 10% bins using comparing.binning_values_to_quantiles). Args: map_array (numpy.ndarray): Map with 4 dimensions - time, level, latitude, longitude reference_series (numpy.ndarray): 1 dimensional reference series combination_func (function): Function that combines two similarity values into one measures (list): List of similarity measures to compute similarity between two time series labels (list): List of labels for the measures scaling_func (function, optional): Function that takes a map of similarity values and scales them in order to make the similarity values of different similarity measures comparable Defaults to comp.binning_values_to_quantiles level (int, optional): Level on which the similarity should be calculated Defaults to 0 Returns: Array with the resulting combination arrays """ similarities = [] combinations = [] for i, measure in enumerate(measures): similarity = calc.calculate_series_similarity(map_array, reference_series, level, measure) similarities.append(scaling_func(similarity)) n_measures = len(measures) pearson_similarity = calc.calculate_series_similarity( map_array, reference_series, level, sim.pearson_correlation) for i in range(n_measures): combination = calc.combine_similarity_measures(pearson_similarity, similarities[i], combination_func) combinations.append(combination) return combinations
def plot_similarities_to_different_datasets( datasets, dataset_labels, reference_series, measures, measure_labels, scaling_func=comp.binning_values_to_quantiles, level=0): """ Plot the similarities for different similarity measures between a reference series and different datasets. The results are made comparable using the scaling_func. The results of Pearson's Correlation stay unscaled. Args: datasets (list): List with datasets to compute the similarity to dataset_labels (list): List of labels for the datasets reference_series (numpy.ndarray): 1 dimensional reference series measures (list): List of similarity measures to compute similarity between two time series measure_labels (list): List of labels for the measures scaling_func (function, optional): Function that takes a map of similarity values and scales them in order to make the similarity values of different similarity measures comparable Defaults to comp.binning_values_to_quantiles level (int, optional): Level on which the similarity should be calculated Defaults to 0 """ n_datasets = len(datasets) len_measures = len(measures) fig, ax = plt.subplots(nrows=n_datasets, ncols=len_measures, figsize=(10 * len_measures, 14 * n_datasets)) for j, file in enumerate(datasets): for i, measure in enumerate(measures): similarity = calc.calculate_series_similarity( file, reference_series, level, measure) #Scale results for similarity measures different than Pearson's if (measure != sim.pearson_correlation or measure != sim.pearson_correlation_abs): similarity = scaling_func(similarity) #Check axis axis = check_axis(ax, row=j, column=i, row_count=n_datasets, column_count=len_measures) #Plot results on map plot_map(similarity, axis) #Annotate rows and columns annotate(ax, row_count=n_datasets, column_count=len_measures, row_labels=dataset_labels, column_labels=measure_labels) fig.suptitle("Similarities to different datasets")
def plot_similarity_measures_combinations( map_array, reference_series, combination_func, measures, labels, scaling_func=comp.binning_values_to_quantiles, level=0): """ Plot a matrix of combinations of two similarity measures. The combination_func defines how the values are combined. Before the values are combined, they are binned in 10% bins using comparing.binning_values_to_quantiles. Args: map_array (numpy.ndarray): Map with 4 dimensions - time, level, latitude, longitude reference_series (numpy.ndarray): 1 dimensional reference series combination_func (function): Function that combines two similarity values into one measures (list): List of similarity measures to compute similarity between two time series labels (list): List of labels for the measures scaling_func (function, optional): Function that takes a map of similarity values and scales them in order to make the similarity values of different similarity measures comparable Defaults to comp.binning_values_to_quantiles level (int, optional): Level on which the similarity should be calculated Defaults to 0 """ #Compute similarities similarities = [] for i, measure in enumerate(measures): sim = calc.calculate_series_similarity(map_array, reference_series, level, measure) similarities.append(scaling_func(sim)) n_measures = len(measures) #Plot dependencies in matrix fig, ax = plt.subplots(nrows=n_measures, ncols=n_measures, figsize=(8 * n_measures, 8 * n_measures)) for i in range(n_measures): for j in range(n_measures): combination = calc.combine_similarity_measures( similarities[i], similarities[j], combination_func) axis = check_axis(ax, row=i, column=j, row_count=n_measures, column_count=n_measures) plot_map(combination[:], axis) annotate(ax, row_count=n_measures, column_count=n_measures, row_labels=labels, column_labels=labels) fig.suptitle("Combination of similarity measures") plt.show()
def plot_similarities_winter_only( map_array, reference_series, measures, labels, scaling_func=comp.binning_values_to_quantiles, level=0): """ Plot the similarity of a reference data series and all points on the map for the whole period, but only winter months are taken into account, regarding different similarity measures Each column contains a different similarity measure. In order to make the values of the different similarity measures comparable, they are binned in 10% bins using comparing.binning_values_to_quantiles. Args: map_array (numpy.ndarray): Map with 4 dimensions - time, level, latitude, longitude referenceSeries (numpy.ndarray): 1 dimensional reference series measures (list): List with similarity measures to compute similarity between two time series labels (list): List of labels for the measures scaling_func (function, optional): Function that takes a map of similarity values and scales them in order to make the similarity values of different similarity measures comparable Defaults to comp.binning_values_to_quantiles level (int, optional): Level on which the similarity should be calculated Defaults to 0 """ fig, ax = plt.subplots(nrows=1, ncols=len(measures), figsize=(8 * len(measures), 10)) winter_indices = [] for i in range(40): year = 12 * i winter_indices.append(year) #January winter_indices.append(year + 1) #February winter_indices.append(year + 11) #December #Extract winter values reference_series_winter = reference_series[winter_indices] map_array_winter = map_array[winter_indices, :, :, :] for i, measure in enumerate(measures): #Compute similarity sim_whole_period_winter = calc.calculate_series_similarity( map_array_winter, reference_series_winter, level, measure) #Check if only one map axis = check_axis(ax, column=i, column_count=len(measures)) #Draw map plot_map(scaling_func(sim_whole_period_winter), axis) annotate(ax, column_count=len(measures), column_labels=labels) fig.suptitle( "Similarity between QBO and all other points 1979 - 2019 for Winter months" ) plt.show()
def plot_sign_of_correlation_strength_of_all( map_array, reference_series, combination_func, measures, labels, scaling_func=comp.binning_values_to_quantiles, level=0): """ Plot the combination of different similarity measures by taking the sign of Pearson's Correlation and combining the absolute values of the similarity measures using combination_func The combination_func defines how the values are combined. Before the values are combined, they are scaled to make value ranges combinable (default: binned in 10% bins using comparing.binning_values_to_quantiles). Args: map_array (numpy.ndarray): Map with 4 dimensions - time, level, latitude, longitude reference_series (numpy.ndarray): 1 dimensional reference series combination_func (function): Function that combines two similarity values into one measures (list): List of similarity measures to compute similarity between two time series labels (list): List of labels for the measures scaling_func (function, optional): Function that takes a map of similarity values and scales them in order to make the similarity values of different similarity measures comparable Defaults to comp.binning_values_to_quantiles level (int, optional): Level on which the similarity should be calculated Defaults to 0 """ #Compute similarities sign_map = None similarities = [] for i, measure in enumerate(measures): similarity = calc.calculate_series_similarity(map_array, reference_series, level, measure) if measure == sim.pearson_correlation: sign_map = similarity similarities.append(scaling_func(similarity)) fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(14 * len(measures), 10)) combination = comb.combine_power_with_sign(combination_func, similarities, sign_map) plot_map(combination, ax) fig.suptitle("Sign of Pearson's and values of {} combined by {}".format( labels, combination_func.__name__)) plt.show()
def plot_similarity_dependency(map_array, reference_series, measures, labels, level=0): """ Plot a matrix of dependcies between two similarity measures with one similarity measure on the x-axis and one on the y-axis Args: map_array (numpy.ndarray): Map with 4 dimensions - time, level, latitude, longitude reference_series (numpy.ndarray): 1 dimensional reference series measures (list): List of similarity measures to compute similarity between two time series labels (list): List of labels for the measures level (int, optional): Level on which the similarity should be calculated Defaults to 0 """ #Compute similarities similarities = [] for i, measure in enumerate(measures): similarities.append( np.array( calc.calculate_series_similarity(map_array, reference_series, level, measure))) n_measures = len(measures) #Plot dependencies in matrix fig, ax = plt.subplots(nrows=n_measures, ncols=n_measures, figsize=(8 * n_measures, 8 * n_measures)) for i, measure_i in enumerate(measures): for j, measure_j in enumerate(measures): axis = check_axis(ax, row=i, column=j, row_count=n_measures, column_count=n_measures) axis.scatter(similarities[j], similarities[i]) annotate(ax, row_count=n_measures, column_count=n_measures, row_labels=labels, column_labels=labels) fig.suptitle("Dependency between pairs of similarity measures") plt.show()
def plot_similarities_whole_period( map_array, reference_series, measures, labels, scaling_func=comp.binning_values_to_quantiles, level=0): """ Plot the similarity of a reference data series and all points on the map for the whole period regarding different similarity measures Each column contains a different similarity measure. In order to make the values of the different similarity measures comparable, they are binned in 10% bins using comparing.binning_values_to_quantiles. Args: map_array (numpy.ndarray): Map with 4 dimensions - time, level, latitude, longitude referenceSeries (numpy.ndarray): 1 dimensional reference series measures (list): List with similarity measures to compute similarity between two time series labels (list): List of labels for the measures scaling_func (function, optional): Function that takes a map of similarity values and scales them in order to make the similarity values of different similarity measures comparable Defaults to comp.binning_values_to_quantiles level (int, optional): Level on which the similarity should be calculated Defaults to 0 """ fig, ax = plt.subplots(nrows=1, ncols=len(measures), figsize=(8 * len(measures), 10)) for i, measure in enumerate(measures): #Compute similarity sim_whole_period = calc.calculate_series_similarity( map_array, reference_series, level, measure) #Check if only one map axis = check_axis(ax, column=i, column_count=len(measures)) #Draw map plot_map(scaling_func(sim_whole_period), axis) annotate(ax, column_count=len(measures), column_labels=labels) fig.suptitle( "Similarity between QBO and all other points for the whole period") plt.show()
def plot_similarity_measures_combination( map_array, reference_series, combination_func, measures, labels, scaling_func=comp.binning_values_to_quantiles, level=0): """ Plot the combinations of similarity measures. The combination_func defines how the different similarity values are combined. Before the values are combined, they are scaled using the scaling_func. Args: map_array (numpy.ndarray): Map with 4 dimensions - time, level, latitude, longitude reference_series (numpy.ndarray): 1 dimensional reference series combination_func (function): Function that combines a list of similarity maps into one measures (list): List of similarity measures to compute similarity between two time series labels (list): List of labels for the measures scaling_func (function, optional): Function that takes a map of similarity values and scales them in order to make the similarity values of different similarity measures comparable Defaults to comp.binning_values_to_quantiles level (int, optional): Level on which the similarity should be calculated Defaults to 0 """ #Compute similarities similarities = [] for i, measure in enumerate(measures): sim = calc.calculate_series_similarity(map_array, reference_series, level, measure) similarities.append(scaling_func(sim)) n_measures = len(measures) #Plot dependencies in matrix fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(14, 10)) combination = combination_func(similarities) plot_map(combination, ax) fig.suptitle("Combination of {} by {}".format(labels, combination_func.__name__)) plt.show()
def plot_time_delayed_similarities_to_different_datasets( datasets, dataset_labels, reference_series, time_shifts, measure, scaling_func=comp.binning_values_to_quantiles, level=0): """ Plot the similarities between a reference series and different datasets delayed by different time steps. Before computing the similarity, the dataset is shifted by a given index and the reference series stays unchanged. This procedure is repeated for every index-shit (time_shifts) and for every dataset. The results are made comparable using the scaling_func. The results of Pearson's Correlation stay unscaled. Args: datasets (list): List with datasets to compute the similarity to dataset_labels (list): List of labels for the datasets reference_series (numpy.ndarray): 1 dimensional reference series time_shifts (array): List of integers that indicate by how many time units the dataset should be shifted measure (function): Similarity measure to compute similarity between two time series scaling_func (function, optional): Function that takes a map of similarity values and scales them in order to make the similarity values of different similarity measures comparable Defaults to comp.binning_values_to_quantiles level (int, optional): Level on which the similarity should be calculated Defaults to 0 """ n_datasets = len(datasets) len_shifts = len(time_shifts) fig, ax = plt.subplots(nrows=n_datasets, ncols=len_shifts, figsize=(10 * len_shifts, 14 * n_datasets)) for i, shift in enumerate(time_shifts): for j, dataset in enumerate(datasets): shifted_reference_series = calc.shift(reference_series, shift) similarity = calc.calculate_series_similarity( dataset, shifted_reference_series, level, measure) #Scale results for similarity measures different than Pearson's if (measure != sim.pearson_correlation or measure != sim.pearson_correlation_abs): similarity = scaling_func(similarity) #Check axis axis = check_axis(ax, row=j, column=i, row_count=n_datasets, column_count=len_shifts) #Plot results on map plot_map(similarity, axis) #Annotate rows and columns shift_labels = ["Shifted by {}".format(i) for i in time_shifts] annotate(ax, row_count=n_datasets, column_count=len_shifts, row_labels=dataset_labels, column_labels=shift_labels) fig.suptitle( "Similarities to different datasets for different time delays using {}" .format(measure.__name__))
def plot_time_delayed_dependencies( map_array, reference_series, time_shifts, measures, labels, scaling_func=comp.binning_values_to_quantiles, level=0): """ Plot the similarities for different similarity measures between a reference series and the map delayed by different time steps. Before computing the similarity, the map is shifted by a given index and the reference series stays unchanged. The results are made comparable using the scaling_func. The results of Pearson's Correlation stay unscaled. Args: map_array (numpy.ndarray): Map with 4 dimensions - time, level, latitude, longitude reference_series (numpy.ndarray): 1 dimensional reference series time_shifts (array): List of integers that indicate by how many time units the map should be shifted measures (list): List of similarity measures to compute similarity between two time series labels (list): List of labels for the measures scaling_func (function, optional): Function that takes a map of similarity values and scales them in order to make the similarity values of different similarity measures comparable Defaults to comp.binning_values_to_quantiles level (int, optional): Level on which the similarity should be calculated Defaults to 0 """ #Compute time delayed similarities len_time_shifts = len(time_shifts) len_measures = len(measures) fig, ax = plt.subplots(nrows=len_time_shifts, ncols=len_measures, figsize=(10 * len_measures, 14 * len_time_shifts)) for j, shift in enumerate(time_shifts): shifted_reference_series = calc.shift(reference_series, shift) for i, measure in enumerate(measures): similarity = calc.calculate_series_similarity( map_array, shifted_reference_series, level, measure) #Scale results for similarity measures different than Pearson's if (measure != sim.pearson_correlation or measure != sim.pearson_correlation_abs): similarity = scaling_func(similarity) #Check axis axis = check_axis(ax, row=j, column=i, row_count=len_time_shifts, column_count=len_measures) #Plot results on map plot_map(similarity, axis) #Annotate rows and columns shift_labels = ["Shifted by {}".format(i) for i in time_shifts] annotate(ax, row_count=len_time_shifts, column_count=len_measures, row_labels=shift_labels, column_labels=measure_labels) fig.suptitle("Similarities to different time steps")
def plot_level_of_agreement(map_array, reference_series, scoring_func, measures, labels, scaling_func=comp.binning_values_to_quantiles, level=0): """ Plot a map with the agreement of several similarity measures. For each similarity measure the scoring function will determine if there is a value that can be considered a dependency or not. The plotted map contains the percentages of how many of the similarity measures voted there is a dependency. Typical scoring function would be scoring_func = lambda x : x >= 0.8 and typical scaling function would be scaling_func=comp.binning_values_to_quantiles. Using this functions, the output map will show for how many similarity measures the similarity value between the time series of the point and the reference series is in the upper 20%. Args: map_array (numpy.ndarray): Map with 4 dimensions - time, level, latitude, longitude reference_series (numpy.ndarray): 1 dimensional reference series scoring_func (function): Function that takes in a value and outputs a boolean (whether there is a dependency or not) measures (list): List of similarity measures to compute similarity between two time series labels (list): List of labels for the measures scaling_func (function, optional): Function that takes a map of similarity values and scales them in order to make the similarity values of different similarity measures comparable Defaults to comp.binning_values_to_quantiles level (int, optional): Level on which the similarity should be calculated Defaults to 0 """ #Compute agreement similarities = [] agreement = np.zeros((256, 512)) for i, measure in enumerate(measures): similarity = calc.calculate_series_similarity(map_array, reference_series, level, measure) similarities.append(scaling_func(similarity)) n_measures = len(measures) for similarity_map in similarities: agreement = sum( [agreement, np.vectorize(scoring_func)(similarity_map)]) agreement = agreement / n_measures #Draw Map fig, (ax, cax) = plt.subplots(nrows=2, figsize=(12, 8), gridspec_kw={"height_ratios": [1, 0.05]}) plot_map(agreement, ax) #Draw Colorbar cmap = matplotlib.cm.viridis bounds = np.linspace(0, 100, n_measures + 2) norm = matplotlib.colors.BoundaryNorm(bounds, cmap.N) cbar = matplotlib.colorbar.ColorbarBase(cax, cmap=cmap, norm=norm, orientation='horizontal', ticks=np.linspace( 0, 100, n_measures + 1), boundaries=bounds) plt.title("Level of agreement (in %) between {}".format(labels)) plt.show()
def plot_similarities_whole_period_per_month( map_array, reference_series, measures, labels, scaling_func=comp.binning_values_to_quantiles, level=0): """ Plot the similarity of a reference data series and all points on the map for the whole period, but every month seperately, regarding different similarity measures Each column contains a different similarity measure and each row contains a different month. In order to make the values of the different similarity measures comparable, they are binned in 10% bins using comparing.binning_values_to_quantiles. Args: map_array (numpy.ndarray): Map with 4 dimensions - time, level, latitude, longitude referenceSeries (numpy.ndarray): 1 dimensional reference series measures (list): List with similarity measures to compute similarity between two time series labels (list): List of labels for the measures scaling_func (function, optional): Function that takes a map of similarity values and scales them in order to make the similarity values of different similarity measures comparable Defaults to comp.binning_values_to_quantiles level (int, optional): Level on which the similarity should be calculated Defaults to 0 """ len_measures = len(measures) fig, ax = plt.subplots(figsize=(8 * len_measures, 14 * len_measures), nrows=12, ncols=len(measures)) for month in range(len(months)): #Extract monthly values map_array_month = np.array( [map_array[12 * i + month, :, :, :] for i in range(40)]) reference_series_month = [ reference_series[12 * i + month] for i in range(40) ] for i, measure in enumerate(measures): #Calculate similarity similarity_month = calc.calculate_series_similarity( map_array_month, reference_series_month, level, measure) axis = check_axis(ax, row=month, column=i, row_count=len(months), column_count=len_measures) #Plot Map scaled_similarity = scaling_func(similarity_month) plot_map(scaled_similarity, axis, colorbar=False) annotate(ax, row_count=len(months), column_count=len_measures, row_labels=months, column_labels=labels) fig.suptitle( "Similarity between QBO and all other points 1979 - 2019 per month") plt.show()