def run(self, form): selection = SelectionSearch(form) queries = selection.get_article_ids_per_query() if form.cleaned_data["output_type"] == "application/json+clustermap": clusters, articles = zip(*get_clusters(queries).items()) cluster_queries = get_cluster_queries(clusters) image, html = get_clustermap_image(queries) coords = tuple(clustermap_html_to_coords(html)) return json.dumps({ "coords": coords, "image": b64encode(image).decode("ascii"), "clusters": [{ "query": q, "articles": tuple(a) } for q, a in zip(cluster_queries, articles)] }) headers, rows = get_clustermap_table(queries) if form.cleaned_data["output_type"] == "application/spss-sav": # *sigh*.. this code is fugly. _headers = {str(h): i for i, h in enumerate(headers)} return table2sav( Table(rows=list(rows), columns=list(map(str, headers)), columnTypes=[int] * len(headers), cellfunc=lambda row, col: row[_headers[col]])) dialect = 'excel' if form.cleaned_data["output_type"] == "text/csv+tab": dialect = 'excel-tab' result = StringIO() csvf = csv.writer(result, dialect=dialect) csvf.writerow(list(map(str, headers))) csvf.writerows(sorted(rows)) if form.cleaned_data[ "output_type"] == "application/json+clustermap+table": return json.dumps({ "csv": result.getvalue(), "queries": {q.label: q.query for q in queries} }) return result.getvalue()
def run(self, form): selection = SelectionSearch(form) queries = selection.get_article_ids_per_query() if form.cleaned_data["output_type"] == "application/json+clustermap": clusters, articles = zip(*get_clusters(queries).items()) cluster_queries = get_cluster_queries(clusters) image, html = get_clustermap_image(queries) coords = tuple(clustermap_html_to_coords(html)) return json.dumps( {"coords": coords, "image": b64encode(image).decode("ascii"), "clusters": [ {"query": q, "articles": tuple(a)} for q, a in zip(cluster_queries, articles) ]} ) headers, rows = get_clustermap_table(queries) if form.cleaned_data["output_type"] == "application/spss-sav": # *sigh*.. this code is fugly. _headers = {str(h): i for i, h in enumerate(headers)} return table2sav(Table( rows=list(rows), columns=list(map(str, headers)), columnTypes=[int]*len(headers), cellfunc=lambda row, col: row[_headers[col]] )) dialect = 'excel' if form.cleaned_data["output_type"] == "text/csv+tab": dialect = 'excel-tab' result = StringIO() csvf = csv.writer(result, dialect=dialect) csvf.writerow(list(map(str, headers))) csvf.writerows(sorted(rows)) if form.cleaned_data["output_type"] == "application/json+clustermap+table": return json.dumps({ "csv": result.getvalue(), "queries": {q.label: q.query for q in queries} }) return result.getvalue()
def run(self, form): self.monitor.update(1, "Executing query..") selection = SelectionSearch(form) try: aggregation, primary, secondary, categories, values = self.get_cache( ) except NotInCacheError: narticles = selection.get_count() self.monitor.update( 10, "Found {narticles} articles. Aggregating..".format(**locals())) # Get aggregation codingjobs = form.cleaned_data["codingjobs"] primary = form.cleaned_data['primary'] secondary = form.cleaned_data['secondary'] value1 = form.cleaned_data['value1'] value2 = form.cleaned_data['value2'] article_ids = selection.get_article_ids() # This should probably happen in SelectionForm? coded_articles = CodedArticle.objects.all() coded_articles = coded_articles.filter(article__id__in=article_ids) coded_articles = coded_articles.filter( codingjob__id__in=codingjobs) coded_article_ids = set(coded_articles.values_list("id", flat=True)) for field_name in ("1", "2", "3"): if not coded_article_ids: break schemafield = form.cleaned_data["codingschemafield_{}".format( field_name)] schemafield_values = form.cleaned_data[ "codingschemafield_value_{}".format(field_name)] schemafield_include_descendants = form.cleaned_data[ "codingschemafield_include_descendants_{}".format( field_name)] if schemafield and schemafield_values: code_ids = get_code_filter( schemafield.codebook, schemafield_values, schemafield_include_descendants) coding_values = CodingValue.objects.filter( coding__coded_article__id__in=coded_article_ids) coding_values = coding_values.filter( field__id=schemafield.id) coding_values = coding_values.filter(intval__in=code_ids) coded_article_ids &= set( coding_values.values_list("coding__coded_article__id", flat=True)) codings = Coding.objects.filter( coded_article__id__in=coded_article_ids) terms = selection.get_article_ids_per_query() orm_aggregate = ORMAggregate(codings, flat=False, terms=terms) categories = list(filter(None, [primary, secondary])) values = list(filter(None, [value1, value2])) aggregation = orm_aggregate.get_aggregate(categories, values) aggregation = sorted(aggregation, key=to_sortable_tuple) self.set_cache( [aggregation, primary, secondary, categories, values]) else: self.monitor.update( 10, "Found in cache. Rendering..".format(**locals())) if form.cleaned_data.get("primary_fill_zeroes") and hasattr( primary, 'interval'): aggregation = list( aggregate_es.fill_zeroes(aggregation, primary, secondary)) # Matrices are very annoying to construct in javascript due to missing hashtables. If # the user requests a table, we thus first convert it to a different format which should # be easier to render. if form.cleaned_data["output_type"] == "text/json+aggregation+table": aggregation = aggregation_to_matrix(aggregation, categories) if form.cleaned_data["output_type"] == "text/csv": return aggregation_to_csv(aggregation, categories, values) self.monitor.update(60, "Serialising..".format(**locals())) return json.dumps(aggregation, cls=AggregationEncoder, check_circular=False)