def assign_data(dataset, clusters): ''' Calculates the cluster assignments given a dataset and cluster centers Args: dataset: Set of points clusters: Centers of clusters Returns: min_dist: List of point classes in the same order they appear in the dataset ''' dists = square_distance(clusters, dataset) # ||x - mu|| min_dist = tf.argmin(dists, 1) # argmin ||x - mu|| return min_dist
def loss_function(mu, data): ''' Definition of the loss function for K-Means Clustering L(mu) = sum_n( min_k( x_n - mu_k ) ) Args: mu: Data cluster centers data: Data set Returns: cost: Definition of the loss ''' dists = square_distance(mu, data) # ||x - mu|| min_dist = tf.reduce_min(dists, 1) # min ||x - mu|| cost = tf.reduce_mean(min_dist) # sum(min ||x - mu||) return cost
def log_density(x, mu, sigma): ''' Calculates log P(x | mu, sigma), i.e. the log probability density function for the mixture of gaussians Args: x: Data points mu: Cluster centers sigma: Cluster variance Returns: Log probability density function of the data ''' den = tf.sqrt(2 * pi * sigma) # sqrt(2pi*sigma^2) D = tf.to_float(tf.rank(x)) logp = -D * tf.log(den) # 1/(sqrt(2pi) * sigma) dists = square_distance(x, mu) # (x - mu)^2 logp -= dists / (2 * sigma) # 1/den - (x - mu)^2 / (2*sigma^2) return logp
def log_density(x, mu, sigma): ''' Calculates log P(x | mu, sigma), i.e. the log probability density function for the mixture of gaussians Args: x: Data points mu: Cluster centers sigma: Cluster variance Returns: Log probability density function of the data ''' den = tf.sqrt(2 * pi * sigma) # sqrt(2pi*sigma^2) D = tf.to_float(tf.rank(x)) logp = -D*tf.log(den) # 1/(sqrt(2pi) * sigma) dists = square_distance(x, mu) # (x - mu)^2 logp -= dists / (2*sigma) # 1/den - (x - mu)^2 / (2*sigma^2) return logp