def _search_and_cluster(self, query, query_md5): results = bossapiv2.search(query, 100) if len(results) == 0: self.redirect("/") return None else: wordlist, wordvectors = bosstextproc.textprocess(results) clusts = cluster.hcluster(rows=wordvectors,distance=distance.pearson, threshold=1.03) clusts = cluster.sortBySmallestId(clusts) cluster_model = ClusterModel() cluster_model.query = query.decode('utf-8') cluster_model.query_md5 = query_md5 cluster_model.results = pickle.dumps(results) cluster_model.wordlist = wordlist cluster_model.clusts = pickle.dumps(clusts) cluster_model.put() return cluster_model
def _view(self, query, query_md5, cluster_id, sub_cluster_id, page, cluster_model): # cluster query = cluster_model.query wordlist = cluster_model.wordlist results = pickle.loads(cluster_model.results) clusts = pickle.loads(cluster_model.clusts) labels = self._labels(clusts, wordlist, cluster_id) # sub cluster if cluster_id >= 0: sub_clusts = cluster.divide(clusts[cluster_id], 0.85) sub_clusts = cluster.sortBySmallestId(sub_clusts) sub_labels = self._labels(sub_clusts, wordlist, sub_cluster_id) else: sub_labels = [] # all if cluster_id == -1: selected_all = True selected_cluster = False selected_sub_cluster = False display_results = results else: # cluster if sub_cluster_id == -1: selected_all = False selected_cluster = True selected_sub_cluster = False clust = clusts[cluster_id] ids = cluster.topNIds(clust, clust.size) display_results = [] for id in ids: display_results.append(results[id]) # sub cluster else: selected_all = False selected_cluster = False selected_sub_cluster = True clust = sub_clusts[sub_cluster_id] # search results for cluster ids = cluster.topNIds(clust, clust.size) display_results = [] for id in ids: display_results.append(results[id]) #paging if cluster_id == -1: pages = self._pages(len(results), page) else: pages = self._pages(clust.size, page) start = (page - 1) * 10 end = page * 10 if len(pages) > 1: paging = True else: paging = False template_values = { 'selected_all': selected_all, 'selected_cluster': selected_cluster, 'selected_subcluster': False, 'cluster_id': cluster_id, 'sub_cluster_id': sub_cluster_id, 'query': query, 'query_md5': query_md5, 'hits': len(results), 'pages': pages, 'paging': paging, 'labels': labels, 'sublabels': sub_labels, 'results': display_results[start:end], } self._render('search.html', template_values)