def test_get_cluster_queries(self): queries = { SearchQuery("a"): [1, 2, 3], SearchQuery("b"): [1, 4], SearchQuery("c"): [1] } clusters = get_clusters(queries).keys() queries = set(get_cluster_queries(clusters)) good_queries = [ # get_cluster_queries generates queries non-deterministically ( '((a) AND (b) AND (c))', '((a) AND (c) AND (b))', '((b) AND (a) AND (c))', '((b) AND (c) AND (a))', '((c) AND (a) AND (b))', '((c) AND (b) AND (a))', ), ('((a)) NOT ((b) OR (c))', '((a)) NOT ((c) OR (b))'), ('((b)) NOT ((c) OR (a))', '((b)) NOT ((a) OR (c))') ] for qs in good_queries: self.assertTrue(any(q in queries for q in qs))
def run(self, form): selection = SelectionSearch(form) queries = selection.get_article_ids_per_query() if form.cleaned_data["output_type"] == "application/json+clustermap": clusters, articles = zip(*get_clusters(queries).items()) cluster_queries = get_cluster_queries(clusters) image, html = get_clustermap_image(queries) coords = tuple(clustermap_html_to_coords(html)) return json.dumps({ "coords": coords, "image": b64encode(image).decode("ascii"), "clusters": [{ "query": q, "articles": tuple(a) } for q, a in zip(cluster_queries, articles)] }) headers, rows = get_clustermap_table(queries) if form.cleaned_data["output_type"] == "application/spss-sav": # *sigh*.. this code is fugly. _headers = {str(h): i for i, h in enumerate(headers)} return table2sav( Table(rows=list(rows), columns=list(map(str, headers)), columnTypes=[int] * len(headers), cellfunc=lambda row, col: row[_headers[col]])) dialect = 'excel' if form.cleaned_data["output_type"] == "text/csv+tab": dialect = 'excel-tab' result = StringIO() csvf = csv.writer(result, dialect=dialect) csvf.writerow(list(map(str, headers))) csvf.writerows(sorted(rows)) if form.cleaned_data[ "output_type"] == "application/json+clustermap+table": return json.dumps({ "csv": result.getvalue(), "queries": {q.label: q.query for q in queries} }) return result.getvalue()
def run(self, form): selection = SelectionSearch.get_instance(form) queries = selection.get_article_ids_per_query() if form.cleaned_data["output_type"] == "application/json+clustermap": try: clusters, articles = zip(*get_clusters(queries).items()) except ValueError as e: raise ValueError("Cannot build clustermap of empty query result.") cluster_queries = get_cluster_queries(clusters) image, html = get_clustermap_image(queries) coords = tuple(clustermap_html_to_coords(html)) return json.dumps( {"coords": coords, "image": b64encode(image).decode("ascii"), "clusters": [ {"query": q, "articles": tuple(a)} for q, a in zip(cluster_queries, articles) ]} ) headers, rows = get_clustermap_table(queries) if form.cleaned_data["output_type"] == "application/spss-sav": # *sigh*.. this code is fugly. _headers = {str(h): i for i, h in enumerate(headers)} return table2sav(Table( rows=list(rows), columns=list(map(str, headers)), columnTypes=[int]*len(headers), cellfunc=lambda row, col: row[_headers[col]] )) dialect = 'excel' if form.cleaned_data["output_type"] == "text/csv+tab": dialect = 'excel-tab' result = StringIO() csvf = csv.writer(result, dialect=dialect) csvf.writerow(list(map(str, headers))) csvf.writerows(sorted(rows)) if form.cleaned_data["output_type"] == "application/json+clustermap+table": return json.dumps({ "csv": result.getvalue(), "queries": {q.label: q.query for q in queries} }) return result.getvalue()
def test_get_clusters(self): queries = {"a": [1, 2, 3], "b": [1, 4], "c": [1]} clusters = dict(get_clusters(queries)) # Assert clusters self.assertIn(frozenset({'a', 'b', 'c'}), clusters) self.assertIn(frozenset({'a'}), clusters) self.assertIn(frozenset({'b'}), clusters) self.assertNotIn(frozenset({'c'}), clusters) self.assertNotIn(frozenset({'a', 'b'}), clusters) self.assertNotIn(frozenset({'c', 'a'}), clusters) self.assertNotIn(frozenset({'c', 'b'}), clusters) # Assert clustervalues self.assertEqual(clusters[frozenset({'a', 'b', 'c'})], {1}) self.assertEqual(clusters[frozenset({'b'})], {4}) self.assertEqual(clusters[frozenset({'a'})], {2, 3})
def test_get_table(self): queries = {"a": [1, 2, 3], "b": [1, 4], "c": [1]} clusters = dict(get_clusters(queries))