def calculate_manhattan(self): column_count = len(self.df.columns) columns = [] columns = self.df.columns for i in range(self.size): for j in range(i): sum = 0 for k in range(column_count - 1): sum += math.fabs(self.df.at[i, columns[k]] - self.df.at[j, columns[k]]) distance: Distance = Distance(sum, i, j) self.manhattan_distance[i][j] = distance
def calculate_euclidean(self): column_count = len(self.df.columns) columns = [] columns = self.df.columns for i in range(self.size): for j in range(i): sum = 0 for k in range(column_count - 1): sum += math.pow((self.df.at[i, columns[k]] - self.df.at[j, columns[k]]), 2) distance: Distance = Distance(math.sqrt(sum), i, j) self.euclidean_distance[i][j] = distance
def calculate_chebyshev(self): column_count = len(self.df.columns) columns = [] columns = self.df.columns for i in range(self.size): for j in range(i): sum = 0 for k in range(column_count - 1): if math.fabs(self.df.at[i, columns[k]] - self.df.at[j, columns[k]]) > sum: sum = math.fabs(self.df.at[i, columns[k]] - self.df.at[j, columns[k]]) distance: Distance = Distance(sum, i, j) self.chebyshev_distance[i][j] = distance
def calculate_manhattan_normalize(self): column_count = math.floor(len(self.df.columns) / 2) columns = [] columns = self.df.columns columns = columns[column_count + 1: len(self.df.columns)] for i in range(self.size): for j in range(i): sum = 0 for k in range(column_count): sum += math.fabs(self.df.at[i, columns[k]] - self.df.at[j, columns[k]]) distance: Distance = Distance(sum, i, j) self.manhattan_distance[i][j] = distance
def calculate_euclidean_normalize(self): column_count = math.floor(len(self.df.columns) / 2) columns = [] columns = self.df.columns columns = columns[column_count + 1: len(self.df.columns)] for i in range(self.size): for j in range(i): sum = 0 for k in range(column_count): sum += math.pow( (self.df.at[i, columns[k]] - self.df.at[j, columns[k]]), 2) distance: Distance = Distance(math.sqrt(sum), i, j) self.euclidean_distance[i][j] = distance
def calculate_mahalanobis(self): column_count = len(self.df.columns) columns = [] columns = self.df.columns cov = self.df.cov().to_numpy() inv_cov = linalg.inv(cov) for i in range(self.size): for j in range(i): first = [] second = [] for k in range(column_count - 1): first.append(self.df.at[i, columns[k]]) second.append(self.df.at[j, columns[k]]) subtract = np.subtract(first, second) subtract_t = subtract.T multiply = subtract_t.dot(inv_cov).dot(subtract) distance: Distance = Distance(multiply, i, j) self.mahalanobis_distance[i][j] = distance
def calculate_mahalanobis_normalize(self): column_count = math.floor(len(self.df.columns) / 2) columns = [] columns = self.df.columns columns = columns[column_count + 1: len(self.df.columns)] cov = self.df[self.df.select_dtypes(['float', 'int']).columns.tolist()[column_count:]].cov().to_numpy() inv_cov = linalg.inv(cov) copy_df = self.df[self.df.select_dtypes(['float', 'int']).columns.tolist()] for i in range(self.size): for j in range(i): first = [] second = [] for k in range(column_count): first.append(copy_df.at[i, columns[k]]) second.append(copy_df.at[j, columns[k]]) subtract = np.subtract(first, second) subtract_t = subtract.T multiply = subtract_t.dot(inv_cov).dot(subtract) distance: Distance = Distance(multiply, i, j) self.mahalanobis_distance[i][j] = distance