Esempio n. 1
0
    def compute_distance_matrix_datasets(self, datasets: List[pd.DataFrame],
                                         distance_metric: str) -> pd.DataFrame:
        """
        Compute the pairwise distance matrix for a list of datasets. The distance matrix is used for the implementation
        of k-medoids.

        :param datasets: List of DataFrames.
        :param distance_metric: Distance metric to use for calculating distance between datasets.
        :return: Pairwise distance matrix.
        """

        distances = pd.DataFrame(index=range(0, len(datasets)),
                                 columns=range(0, len(datasets)))
        DMNoOrdering = PersonDistanceMetricsNoOrdering()
        DMOrdering = PersonDistanceMetricsOrdering()

        # Compute the distances for each pair. Distanced are assumed to be symmetric
        for i in range(0, len(datasets)):
            for j in range(i, len(datasets)):
                if distance_metric == self.abstraction_p:
                    distances.iloc[i, j] = DMNoOrdering.p_distance(
                        datasets[i], datasets[j])
                elif distance_metric == self.abstraction_euclidean:
                    distances.iloc[i, j] = DMOrdering.euclidean_distance(
                        datasets[i], datasets[j])
                elif distance_metric == self.abstraction_lag:
                    distances.iloc[i, j] = DMOrdering.lag_correlation(
                        datasets[i], datasets[j], self.max_lag)
                elif distance_metric == self.abstraction_dtw:
                    distances.iloc[i, j] = DMOrdering.dynamic_time_warping(
                        datasets[i], datasets[j])
                distances.iloc[j, i] = distances.iloc[i, j]
        return distances
Esempio n. 2
0
    def aggregate_datasets(self, datasets, cols, abstraction_method):
        temp_datasets = []
        DM = PersonDistanceMetricsNoOrdering()

        # Flatten all datasets and add them to the newly formed dataset.
        for i in range(0, len(datasets)):
            temp_dataset = datasets[i][cols]
            temp_datasets.append(temp_dataset)

        if abstraction_method == self.abstraction_normal:
            return DM.create_instances_normal_distribution(temp_datasets)
        else:
            return DM.create_instances_mean(temp_datasets)
Esempio n. 3
0
    def compute_distance_matrix_datasets(self, datasets, distance_metric):
        distances = pd.DataFrame(index=range(0, len(datasets)), columns=range(0, len(datasets)))
        DMNoOrdering = PersonDistanceMetricsNoOrdering()
        DMOrdering = PersonDistanceMetricsOrdering()

        # And compute the distances for each pair. Note that we assume the distances to be symmetric.
        for i in range(0, len(datasets)):
            for j in range(i, len(datasets)):
                if distance_metric == self.abstraction_p:
                    distances.iloc[i,j] = DMNoOrdering.p_distance(datasets[i], datasets[j])
                elif distance_metric == self.abstraction_euclidean:
                    distances.iloc[i,j] = DMOrdering.euclidean_distance(datasets[i], datasets[j])
                elif distance_metric == self.abstraction_lag:
                    distances.iloc[i,j] = DMOrdering.lag_correlation(datasets[i], datasets[j], self.max_lag)
                elif distance_metric == self.abstraction_dtw:
                    distances.iloc[i,j] = DMOrdering.dynamic_time_warping(datasets[i], datasets[j])
                distances.iloc[j,i] = distances.iloc[i,j]
        return distances
Esempio n. 4
0
    def aggregate_datasets(self, datasets: List[pd.DataFrame], cols: List[str], abstraction_method: str) \
            -> pd.DataFrame:
        """
        Flatten each dataset to a single record/instance for comparing datasets between persons. This is done based on
        the approaches defined in the distance metrics file.

        :param datasets: List of DataFrames to aggregate.
        :param cols: Columns to keep while aggregating.
        :param abstraction_method: Abstraction method to use for aggregation.
        :return: Aggregated DataFrame.
        """

        temp_datasets = []
        DM = PersonDistanceMetricsNoOrdering()

        # Flatten all datasets and add them to the newly formed dataset
        for i in range(0, len(datasets)):
            temp_dataset = datasets[i][cols]
            temp_datasets.append(temp_dataset)

        if abstraction_method == self.abstraction_normal:
            return DM.create_instances_normal_distribution(temp_datasets)
        else:
            return DM.create_instances_mean(temp_datasets)