def main(): data = readfile('data.txt') plot_data(data) # show data # metoda shlukove hladiny logging.info('metoda shlukove hladiny') cluster() # metoda retezove mapy logging.info('metoda retezove mapy') chmap() # metoda maximin logging.info('metoda maximin') maximin() # nerovnomerne binarni deleni logging.info('nerovnomerne binarni deleni') unebin() # kmeans logging.info('kmeans') kmeans() # bayesuv klasifikator logging.info('bayesuv klasifikator') bayes() # klasifikator podle minimalni vzdalenosti logging.info('klasifikator podle minimalni vzdalenosti') mindist() # klasifikator podle k-nejblizsiho souseda logging.info('klasifikator podle k-nejblizsiho souseda') nearneigh() # klasifikator s linearnimi diskriminacnimi funkcemi logging.info('klasifikator s linearnimi diskriminacnimi funkcemi') lindisc()
def unequal_binary(data: List[tuple]): dist = kmeans(data, 2) # 1st split to two # plot_kmeans(dist) # inter step logging.info('First split') crits = criterion(dist) max_key = max(crits, key=crits.get) # take bigger value dist2 = kmeans(dist.pop(max_key), 2) # 2nd split to two logging.info('Second split') return {**dist, **dist2} # combine dicts
def test_kmeans(self): result = kmeans.kmeans(self.data, 3) expect = { (1, 1): [(0, 1), (2, 1), (1, 3), (1, -1)], (1, 7): [(1, 5), (1, 9), (-1, 7), (3, 7)] } self.assertEqual(result, expect)
def main(): data = readfile('data.txt') data = kmeans(data, 3) ross = rossenblatt(data) plot_kmeans(ross) const_incr = constant_increment(data, 0.5) plot_kmeans(const_incr) mod_const_incr = constant_increment(data, 0.5) plot_kmeans(mod_const_incr)
def minimal_distance(data, classes, space_size=(-20, 20), step=1): dist = kmeans(data, classes) trypoints = generate_points(space_size[0], space_size[1], step) for point in trypoints: distances = {key: distanc(point, key) for key in dist.keys() } # dict for each point -> key: distance to him key_of_min = min(distances.keys(), key=( lambda key: distances[key])) # select key with minimum distance dist[key_of_min].append(point) # add point to this key return dist
def main(): # data = [(-3, 0), (3, 2), (-2, 0), (3, 3), (2, 2), (3, -2), (4, -2), (3, -3)] data = readfile('data.txt') logging.info('k-means') means = kmeans(data, 3) j_kmeans = sum(criterion(means).values()) plot_kmeans(means) logging.info('Unequal binary') dist = unequal_binary(data) plot_kmeans(dist) j_binary = sum(criterion(dist).values()) logging.info('J kmeans: {}, J binary: {}'.format(j_kmeans, j_binary))
def nearest_neighbour(data, classes, space_size=(-20, 20), step=1): k_means = kmeans(data, classes) trypoints = generate_points(space_size[0], space_size[1], step) points_in_kmeans = list(itertools.chain(*k_means.values())) kmeans_toplot = dict(k_means) for trypoint in trypoints: sorted_means = sorted(points_in_kmeans, key=lambda p: distanc(trypoint, p)) for key, value in k_means.items(): for val in value: if val == sorted_means[0]: kmeans_toplot[key].append(trypoint) return kmeans_toplot
def knearest_neighbour(data, classes, space_size=(-20, 20), step=1): k_means = kmeans(data, classes) trypoints = generate_points(space_size[0], space_size[1], step) means_toplot = dict(k_means) for trypoint in trypoints: for val in k_means.values(): val.sort(key=lambda p: distanc(trypoint, p)) newdict = { key: average_dist(trypoint, k_means[key]) for key in k_means.keys() } keywithminvalue = min(newdict, key=newdict.get) means_toplot[keywithminvalue].append(trypoint) return means_toplot
def bayes(data, classes, space_size=(-20, 20), step=1): """ :param data: :param classes: :param space_size: :param float step: :return: """ dist = kmeans(data, classes) trypoints = generate_points(space_size[0], space_size[1], step) prob = probability(dist) prumery = means(dist) sigm = sigma(dist, prumery) for point in trypoints: rozhodovaci = {key: normal(point, sigm[key], prumery[key], prob[key]) for key in dist.keys()} keywithmaxvalue = max(rozhodovaci, key=rozhodovaci.get) dist[keywithmaxvalue].append(point) return dist