def run(self): worstCluster = self.dataset while len(self.means) < self.k: if isinstance(worstCluster, Dataset): worstDataset = worstCluster elif isinstance(worstCluster, Mean): worstDataset = worstCluster.coveredDataset bisection = Kmeans(dataset=worstDataset, k=2, trials=self.trials, maxRounds=self.trials, key=self.key) bisection.run() bisectionSolution = bisection.getBestSolution() self.means += bisectionSolution.means worstCluster = max(self.means, key=lambda m: m.getMeanSquaredError()) # if the number of means is not enouth remove the worst cluster # found to bisect it in the next iteration. if len(self.means) < self.k: self.means.remove(worstCluster) self.setMeanSquaredError()
def get_colors_from_image(filepath, number_of_colors): # print 'processing '+filepath kmeans = Kmeans(number_of_colors, 6, 5, 200) # default 6,5,200 im = Image.open(filepath) colors = kmeans.run(im) im.close() return colors
def get_color_from_image(filepath): # print 'processing '+filepath kmeans = Kmeans(1) im = Image.open(filepath) color = kmeans.run(im)[0] im.close() return color
def main(): files = [] # 2 - generate database schema Base.metadata.create_all(engine) # 3 - create a new session session = Session() musics = session.query(Music).all() musics, distances = Kmeans.split(musics) session.commit() return
def run(self): self.bisecting = [ BisectingKmeans(dataset=self.dataset, k=i, trials=self.trials, maxRounds=self.maxRounds) for i in self.krange ] self.normal = [ Kmeans(dataset=self.dataset, k=i, trials=self.trials, maxRounds=self.maxRounds) for i in self.krange ] for i, j in enumerate(self.krange): self.bisecting[i].run() self.normal[i].run()
def index(request): template_name = "index.html" if request.method == "POST": if "crawl-btn" in request.POST: crawler_form = CrawlerForm(request.POST) if crawler_form.is_valid(): n_docs = crawler_form.cleaned_data["n_docs"] in_degree = crawler_form.cleaned_data["in_degree"] out_degree = crawler_form.cleaned_data["out_degree"] starting_url = crawler_form.cleaned_data["starting_url"] urls = [x.strip() for x in starting_url.split(",")] crawler = Scheduler(starting_url=urls, num=n_docs, in_degre=in_degree, out_degree=out_degree) crawler.crawl() else: return render(request, template_name, {"crawler_form": crawler_form}) if "index-btn" in request.POST: index_form = IndexForm(request.POST) if index_form.is_valid(): direction = index_form.cleaned_data["direction"] print(direction) searcher.index(es, direction) else: return render(request, template_name, {"index_form", index_form}) if "cluster-btn" in request.POST: cluster_form = ClusterForm(request.POST) if cluster_form.is_valid(): path_to_json = cluster_form.cleaned_data["direction"] titles = [] json_files = [pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith(".json")] for js in json_files: with open(os.path.join(path_to_json, js)) as json_file: data = json.load(json_file) titles.append(data["title"]) json_file.close() vectorizer = CountVectorizer() vectors = vectorizer.fit_transform(titles).todense().tolist() kmeans = Kmeans(vectors) k_points = [] j_points = [] for i in range(len(vectors)): k_points.append(i + 1) j_points.append(kmeans.kmenas(i + 1)) plt.plot(j_points, k_points) plt.show() else: return render(request, template_name, {"cluster_form": cluster_form}) if "page-rank-btn" in request.POST: page_rank_form = PageRankForm(request.POST) if page_rank_form.is_valid(): alpha = page_rank_form.cleaned_data["alpha"] threshold = page_rank_form.cleaned_data["threshold"] else: return render(request, template_name, {"page_rank_form": page_rank_form}) if "query-btn" in request.POST: query_form = QueryForm(request.POST) if query_form.is_valid(): cluster = query_form.cleaned_data["cluster"] pagerank = query_form.cleaned_data["pagerank"] query = query_form.cleaned_data["query"] pass else: return render(request, template_name, {"query_form": query_form}) else: query_form = QueryForm() page_rank_form = PageRankForm() cluster_form = ClusterForm() crawler_form = CrawlerForm() index_form = IndexForm() return render( request, template_name, { "query_form": query_form, "page_rank_form": page_rank_form, "cluster_form": cluster_form, "crawler_form": crawler_form, "index_form": index_form, }, )