Example #1
0
    def test_get_cluster_queries(self):
        queries = {
            SearchQuery("a"): [1, 2, 3],
            SearchQuery("b"): [1, 4],
            SearchQuery("c"): [1]
        }

        clusters = get_clusters(queries).keys()
        queries = set(get_cluster_queries(clusters))

        good_queries = [
            # get_cluster_queries generates queries non-deterministically
            (
                '((a) AND (b) AND (c))',
                '((a) AND (c) AND (b))',
                '((b) AND (a) AND (c))',
                '((b) AND (c) AND (a))',
                '((c) AND (a) AND (b))',
                '((c) AND (b) AND (a))',
            ),
            ('((a)) NOT ((b) OR (c))', '((a)) NOT ((c) OR (b))'),
            ('((b)) NOT ((c) OR (a))', '((b)) NOT ((a) OR (c))')
        ]

        for qs in good_queries:
            self.assertTrue(any(q in queries for q in qs))
Example #2
0
    def run(self, form):
        selection = SelectionSearch(form)
        queries = selection.get_article_ids_per_query()

        if form.cleaned_data["output_type"] == "application/json+clustermap":
            clusters, articles = zip(*get_clusters(queries).items())
            cluster_queries = get_cluster_queries(clusters)
            image, html = get_clustermap_image(queries)
            coords = tuple(clustermap_html_to_coords(html))

            return json.dumps({
                "coords":
                coords,
                "image":
                b64encode(image).decode("ascii"),
                "clusters": [{
                    "query": q,
                    "articles": tuple(a)
                } for q, a in zip(cluster_queries, articles)]
            })

        headers, rows = get_clustermap_table(queries)

        if form.cleaned_data["output_type"] == "application/spss-sav":
            # *sigh*.. this code is fugly.
            _headers = {str(h): i for i, h in enumerate(headers)}

            return table2sav(
                Table(rows=list(rows),
                      columns=list(map(str, headers)),
                      columnTypes=[int] * len(headers),
                      cellfunc=lambda row, col: row[_headers[col]]))

        dialect = 'excel'
        if form.cleaned_data["output_type"] == "text/csv+tab":
            dialect = 'excel-tab'

        result = StringIO()
        csvf = csv.writer(result, dialect=dialect)
        csvf.writerow(list(map(str, headers)))
        csvf.writerows(sorted(rows))

        if form.cleaned_data[
                "output_type"] == "application/json+clustermap+table":
            return json.dumps({
                "csv": result.getvalue(),
                "queries": {q.label: q.query
                            for q in queries}
            })

        return result.getvalue()
Example #3
0
    def run(self, form):
        selection = SelectionSearch.get_instance(form)
        queries = selection.get_article_ids_per_query()

        if form.cleaned_data["output_type"] == "application/json+clustermap":
            try:
                clusters, articles = zip(*get_clusters(queries).items())
            except ValueError as e:
                raise ValueError("Cannot build clustermap of empty query result.")

            cluster_queries = get_cluster_queries(clusters)
            image, html = get_clustermap_image(queries)
            coords = tuple(clustermap_html_to_coords(html))

            return json.dumps(
                {"coords": coords, "image": b64encode(image).decode("ascii"),
                 "clusters": [
                     {"query": q, "articles": tuple(a)}
                     for q, a in zip(cluster_queries, articles)
                 ]}
            )

        headers, rows = get_clustermap_table(queries)

        if form.cleaned_data["output_type"] == "application/spss-sav":
            # *sigh*.. this code is fugly.
            _headers = {str(h): i for i, h in enumerate(headers)}

            return table2sav(Table(
                rows=list(rows),
                columns=list(map(str, headers)),
                columnTypes=[int]*len(headers),
                cellfunc=lambda row, col: row[_headers[col]]
            ))

        dialect = 'excel'
        if form.cleaned_data["output_type"] == "text/csv+tab":
            dialect = 'excel-tab'

        result = StringIO()
        csvf = csv.writer(result, dialect=dialect)
        csvf.writerow(list(map(str, headers)))
        csvf.writerows(sorted(rows))

        if form.cleaned_data["output_type"] == "application/json+clustermap+table":
            return json.dumps({
                "csv": result.getvalue(),
                "queries": {q.label: q.query for q in queries}
            })

        return result.getvalue()
Example #4
0
    def test_get_cluster_queries(self):
        queries = {
            SearchQuery("a"): [1, 2, 3],
            SearchQuery("b"): [1, 4],
            SearchQuery("c"): [1]
        }

        clusters = get_clusters(queries).keys()
        queries = set(get_cluster_queries(clusters))

        good_queries = [
            # get_cluster_queries generates queries non-deterministically
            (
                '((a) AND (b) AND (c))', '((a) AND (c) AND (b))',
                '((b) AND (a) AND (c))', '((b) AND (c) AND (a))',
                '((c) AND (a) AND (b))', '((c) AND (b) AND (a))',
            ),
            ('((a)) NOT ((b) OR (c))', '((a)) NOT ((c) OR (b))'),
            ('((b)) NOT ((c) OR (a))', '((b)) NOT ((a) OR (c))')
        ]

        for qs in good_queries:
            self.assertTrue(any(q in queries for q in qs))