Example #1
0
 def _search_and_cluster(self, query, query_md5):
   results = bossapiv2.search(query, 100)
   if len(results) == 0:
     self.redirect("/")
     return None
   else:
     wordlist, wordvectors = bosstextproc.textprocess(results)
     clusts = cluster.hcluster(rows=wordvectors,distance=distance.pearson, threshold=1.03)
     clusts = cluster.sortBySmallestId(clusts)
     cluster_model = ClusterModel()
     cluster_model.query      = query.decode('utf-8')
     cluster_model.query_md5  = query_md5
     cluster_model.results    = pickle.dumps(results)
     cluster_model.wordlist   = wordlist
     cluster_model.clusts     = pickle.dumps(clusts)
     cluster_model.put()
   return cluster_model
Example #2
0
  def _view(self, query, query_md5, cluster_id, sub_cluster_id, page, cluster_model):
    # cluster
    query    = cluster_model.query
    wordlist = cluster_model.wordlist
    results  = pickle.loads(cluster_model.results)
    clusts   = pickle.loads(cluster_model.clusts)
    labels = self._labels(clusts, wordlist, cluster_id)

    # sub cluster
    if cluster_id >= 0:
      sub_clusts = cluster.divide(clusts[cluster_id], 0.85)
      sub_clusts = cluster.sortBySmallestId(sub_clusts)
      sub_labels = self._labels(sub_clusts, wordlist, sub_cluster_id)
    else:
      sub_labels = []

    # all
    if cluster_id == -1:
      selected_all = True
      selected_cluster = False
      selected_sub_cluster = False
      display_results = results
    else:
      # cluster
      if sub_cluster_id == -1:
        selected_all = False
        selected_cluster = True
        selected_sub_cluster = False
        clust = clusts[cluster_id]
        ids = cluster.topNIds(clust, clust.size)
        display_results = []
        for id in ids:
          display_results.append(results[id])
      # sub cluster
      else:
        selected_all = False
        selected_cluster = False
        selected_sub_cluster = True
        clust = sub_clusts[sub_cluster_id]
      # search results for cluster
      ids = cluster.topNIds(clust, clust.size)
      display_results = []
      for id in ids:
        display_results.append(results[id])

    #paging
    if cluster_id == -1:
      pages = self._pages(len(results), page)
    else:
      pages = self._pages(clust.size, page)
    start = (page - 1) * 10
    end   = page * 10
    if len(pages) > 1:
      paging = True
    else:
      paging = False

    template_values = {
      'selected_all': selected_all,
      'selected_cluster': selected_cluster,
      'selected_subcluster': False,
      'cluster_id': cluster_id,
      'sub_cluster_id': sub_cluster_id,
      'query': query,
      'query_md5': query_md5,
      'hits': len(results),
      'pages':   pages,
      'paging': paging,
      'labels': labels,
      'sublabels': sub_labels,
      'results': display_results[start:end],
    }
    self._render('search.html', template_values)