Ejemplo n.º 1
0
    def run(self, form):
        selection = SelectionSearch(form)
        queries = selection.get_article_ids_per_query()

        if form.cleaned_data["output_type"] == "application/json+clustermap":
            clusters, articles = zip(*get_clusters(queries).items())
            cluster_queries = get_cluster_queries(clusters)
            image, html = get_clustermap_image(queries)
            coords = tuple(clustermap_html_to_coords(html))

            return json.dumps({
                "coords":
                coords,
                "image":
                b64encode(image).decode("ascii"),
                "clusters": [{
                    "query": q,
                    "articles": tuple(a)
                } for q, a in zip(cluster_queries, articles)]
            })

        headers, rows = get_clustermap_table(queries)

        if form.cleaned_data["output_type"] == "application/spss-sav":
            # *sigh*.. this code is fugly.
            _headers = {str(h): i for i, h in enumerate(headers)}

            return table2sav(
                Table(rows=list(rows),
                      columns=list(map(str, headers)),
                      columnTypes=[int] * len(headers),
                      cellfunc=lambda row, col: row[_headers[col]]))

        dialect = 'excel'
        if form.cleaned_data["output_type"] == "text/csv+tab":
            dialect = 'excel-tab'

        result = StringIO()
        csvf = csv.writer(result, dialect=dialect)
        csvf.writerow(list(map(str, headers)))
        csvf.writerows(sorted(rows))

        if form.cleaned_data[
                "output_type"] == "application/json+clustermap+table":
            return json.dumps({
                "csv": result.getvalue(),
                "queries": {q.label: q.query
                            for q in queries}
            })

        return result.getvalue()
Ejemplo n.º 2
0
    def run(self, form):
        selection = SelectionSearch(form)
        queries = selection.get_article_ids_per_query()

        if form.cleaned_data["output_type"] == "application/json+clustermap":
            clusters, articles = zip(*get_clusters(queries).items())
            cluster_queries = get_cluster_queries(clusters)
            image, html = get_clustermap_image(queries)
            coords = tuple(clustermap_html_to_coords(html))

            return json.dumps(
                {"coords": coords, "image": b64encode(image).decode("ascii"),
                 "clusters": [
                     {"query": q, "articles": tuple(a)}
                     for q, a in zip(cluster_queries, articles)
                 ]}
            )

        headers, rows = get_clustermap_table(queries)

        if form.cleaned_data["output_type"] == "application/spss-sav":
            # *sigh*.. this code is fugly.
            _headers = {str(h): i for i, h in enumerate(headers)}

            return table2sav(Table(
                rows=list(rows),
                columns=list(map(str, headers)),
                columnTypes=[int]*len(headers),
                cellfunc=lambda row, col: row[_headers[col]]
            ))

        dialect = 'excel'
        if form.cleaned_data["output_type"] == "text/csv+tab":
            dialect = 'excel-tab'

        result = StringIO()
        csvf = csv.writer(result, dialect=dialect)
        csvf.writerow(list(map(str, headers)))
        csvf.writerows(sorted(rows))

        if form.cleaned_data["output_type"] == "application/json+clustermap+table":
            return json.dumps({
                "csv": result.getvalue(),
                "queries": {q.label: q.query for q in queries}
            })

        return result.getvalue()
Ejemplo n.º 3
0
    def run(self, form):
        self.monitor.update(1, "Executing query..")
        selection = SelectionSearch(form)
        try:
            aggregation, primary, secondary, categories, values = self.get_cache(
            )
        except NotInCacheError:
            narticles = selection.get_count()
            self.monitor.update(
                10,
                "Found {narticles} articles. Aggregating..".format(**locals()))

            # Get aggregation
            codingjobs = form.cleaned_data["codingjobs"]
            primary = form.cleaned_data['primary']
            secondary = form.cleaned_data['secondary']
            value1 = form.cleaned_data['value1']
            value2 = form.cleaned_data['value2']

            article_ids = selection.get_article_ids()

            # This should probably happen in SelectionForm?
            coded_articles = CodedArticle.objects.all()
            coded_articles = coded_articles.filter(article__id__in=article_ids)
            coded_articles = coded_articles.filter(
                codingjob__id__in=codingjobs)

            coded_article_ids = set(coded_articles.values_list("id",
                                                               flat=True))
            for field_name in ("1", "2", "3"):
                if not coded_article_ids:
                    break

                schemafield = form.cleaned_data["codingschemafield_{}".format(
                    field_name)]
                schemafield_values = form.cleaned_data[
                    "codingschemafield_value_{}".format(field_name)]
                schemafield_include_descendants = form.cleaned_data[
                    "codingschemafield_include_descendants_{}".format(
                        field_name)]

                if schemafield and schemafield_values:
                    code_ids = get_code_filter(
                        schemafield.codebook, schemafield_values,
                        schemafield_include_descendants)
                    coding_values = CodingValue.objects.filter(
                        coding__coded_article__id__in=coded_article_ids)
                    coding_values = coding_values.filter(
                        field__id=schemafield.id)
                    coding_values = coding_values.filter(intval__in=code_ids)
                    coded_article_ids &= set(
                        coding_values.values_list("coding__coded_article__id",
                                                  flat=True))

            codings = Coding.objects.filter(
                coded_article__id__in=coded_article_ids)

            terms = selection.get_article_ids_per_query()
            orm_aggregate = ORMAggregate(codings, flat=False, terms=terms)
            categories = list(filter(None, [primary, secondary]))
            values = list(filter(None, [value1, value2]))
            aggregation = orm_aggregate.get_aggregate(categories, values)
            aggregation = sorted(aggregation, key=to_sortable_tuple)

            self.set_cache(
                [aggregation, primary, secondary, categories, values])
        else:
            self.monitor.update(
                10, "Found in cache. Rendering..".format(**locals()))

        if form.cleaned_data.get("primary_fill_zeroes") and hasattr(
                primary, 'interval'):
            aggregation = list(
                aggregate_es.fill_zeroes(aggregation, primary, secondary))
        # Matrices are very annoying to construct in javascript due to missing hashtables. If
        # the user requests a table, we thus first convert it to a different format which should
        # be easier to render.
        if form.cleaned_data["output_type"] == "text/json+aggregation+table":
            aggregation = aggregation_to_matrix(aggregation, categories)

        if form.cleaned_data["output_type"] == "text/csv":
            return aggregation_to_csv(aggregation, categories, values)

        self.monitor.update(60, "Serialising..".format(**locals()))
        return json.dumps(aggregation,
                          cls=AggregationEncoder,
                          check_circular=False)