def run(self, form): selection = SelectionSearch(form) try: # Try to retrieve cache values primary, secondary, categories, aggregation = self.get_cache() except NotInCacheError: self.monitor.update(message="Executing query..") narticles = selection.get_count() self.monitor.update(message="Found {narticles} articles. Aggregating..".format(**locals())) # Get aggregation order_by = form.cleaned_data["order_by"] primary = form.cleaned_data["primary"] secondary = form.cleaned_data["secondary"] categories = list(filter(None, [primary, secondary])) aggregation = list(selection.get_aggregate(categories, flat=False)) aggregation = sorted_aggregation(*order_by, aggregation) self.set_cache([primary, secondary, categories, aggregation]) else: self.monitor.update(2) # Matrices are very annoying to construct in javascript due to missing hashtables. If # the user requests a table, we thus first convert it to a different format which should # be easier to render. if form.cleaned_data["output_type"] == "text/json+aggregation+table": aggregation = aggregation_to_matrix(aggregation, categories) if form.cleaned_data["output_type"] == "text/csv": return aggregation_to_csv(aggregation, categories, [CountArticlesValue()]) self.monitor.update(message="Serialising..".format(**locals())) return json.dumps(aggregation, cls=AggregationEncoder, check_circular=False)
def run(self, form): form_data = json.dumps(dict(form.data._iterlists())) size = form.cleaned_data['size'] offset = form.cleaned_data['offset'] show_aggregation = form.cleaned_data['aggregations'] with Timer() as timer: selection = SelectionSearch(form) self.monitor.update(1, "Executing query..") narticles = selection.get_count() self.monitor.update(39, "Fetching mediums..".format(**locals())) mediums = selection.get_mediums() self.monitor.update(59, "Fetching articles..".format(**locals())) articles = selection.get_articles(size=size, offset=offset) if show_aggregation: self.monitor.update(69, "Aggregating..".format(**locals())) date_aggr = selection.get_aggregate(x_axis="date", y_axis="total", interval="day") medium_aggr = selection.get_aggregate(x_axis="medium", y_axis="date", interval="day") self.monitor.update(79, "Rendering results..".format(**locals())) return TEMPLATE.render( Context( dict(locals(), **{ "project": self.project, "user": self.user })))
def run(self, form): form_data = json.dumps(dict(form.data._iterlists())) size = form.cleaned_data['size'] offset = form.cleaned_data['offset'] show_aggregation = form.cleaned_data['aggregations'] with Timer() as timer: selection = SelectionSearch(form) self.monitor.update(1, "Executing query..") narticles = selection.get_count() self.monitor.update(39, "Fetching mediums..".format(**locals())) mediums = selection.get_mediums() self.monitor.update(59, "Fetching articles..".format(**locals())) articles = selection.get_articles(size=size, offset=offset) if show_aggregation: self.monitor.update(69, "Aggregating..".format(**locals())) date_aggr = selection.get_aggregate(x_axis="date", y_axis="total", interval="day") medium_aggr = selection.get_aggregate(x_axis="medium", y_axis="date", interval="day") self.monitor.update(79, "Rendering results..".format(**locals())) return TEMPLATE.render(Context(dict(locals(), **{ "project": self.project, "user": self.user })))
def run(self, form): selection = SelectionSearch(form) try: # Try to retrieve cache values primary, secondary, categories, aggregation = self.get_cache() except NotInCacheError: self.monitor.update(message="Executing query..") narticles = selection.get_count() self.monitor.update(message="Found {narticles} articles. Aggregating..".format(**locals())) # Get aggregation primary = form.cleaned_data["primary"] secondary = form.cleaned_data["secondary"] categories = list(filter(None, [primary, secondary])) aggregation = list(selection.get_aggregate(categories, flat=False)) self.set_cache([primary, secondary, categories, aggregation]) else: self.monitor.update(2) # Matrices are very annoying to construct in javascript due to missing hashtables. If # the user requests a table, we thus first convert it to a different format which should # be easier to render. if form.cleaned_data["output_type"] == "text/json+aggregation+table": aggregation = aggregation_to_matrix(aggregation, categories) if form.cleaned_data["output_type"] == "text/csv": return aggregation_to_csv(aggregation, categories, [CountArticlesValue()]) self.monitor.update(message="Serialising..".format(**locals())) return json.dumps(aggregation, cls=AggregationEncoder, check_circular=False)
def run(self, form): form_data = dict(form.data.lists()) for value in form_data.values(): if value == [None]: value.pop() form_data = json.dumps(form_data, indent=4) size = form.cleaned_data['size'] offset = form.cleaned_data['offset'] number_of_fragments = form.cleaned_data['number_of_fragments'] fragment_size = form.cleaned_data['fragment_size'] show_fields = sorted(form.cleaned_data['show_fields']) show_aggregation = form.cleaned_data['aggregations'] with Timer() as timer: selection = SelectionSearch(form) self.monitor.update(message="Executing query..") narticles = selection.get_count() self.monitor.update(message="Fetching articles..".format( **locals())) articles = selection.get_articles(size=size, offset=offset).as_dicts() articles = get_fragments(selection.get_query(), [a["id"] for a in articles], fragment_size, number_of_fragments) if show_aggregation: self.monitor.update(message="Aggregating..".format(**locals())) statistics = selection.get_statistics() try: delta_start_end = statistics.end_date - statistics.start_date interval = next( interval for (interval, delta) in TIMEDELTAS if MAX_DATE_GROUPS * delta > delta_start_end) except (StopIteration, TypeError): interval = "day" date_aggr = selection.get_aggregate( [IntervalCategory(interval)], objects=False) else: # Increase progress without doing anything (because we don't have to aggregate) self.monitor.update() self.monitor.update(message="Rendering results..".format( **locals())) return TEMPLATE.render( Context( dict(locals(), **{ "project": self.project, "user": self.user })))
def _run_query(self, form_data, expected_indices=None, expected_count=None, msg=None): self._setUp() sets = ArticleSet.objects.filter(pk=self.articleset.pk) form = SelectionForm(articlesets=sets, project=self.articleset.project, data=form_data) form.full_clean() self.assertFalse(form.errors, "Form contains errors") search = SelectionSearch(form) if expected_indices: article_ids = search.get_article_ids() articles = Article.objects.filter(id__in=article_ids) expected = [self.articles[i] for i in expected_indices] self.assertSetEqual(set(articles), set(expected), msg=msg) if expected_count: self.assertEqual(search.get_count(), expected_count, msg=msg)
def run(self, form): self.monitor.update(1, "Executing query..") selection = SelectionSearch(form) narticles = selection.get_count() self.monitor.update(10, "Found {narticles} articles. Aggregating..".format(**locals())) # Get aggregation aggregation = selection.get_aggregate( form.cleaned_data['x_axis'], form.cleaned_data['y_axis'], form.cleaned_data['interval'] ) # self.monitor.update(20, "Calculating relative values..".format(**locals())) column = form.cleaned_data['relative_to'] if column is not None: aggregation = list(get_relative(aggregation, column)) self.monitor.update(60, "Serialising..".format(**locals())) return json.dumps(list(aggregation), cls=AggregationEncoder, check_circular=False)
def run(self, form): self.monitor.update(1, "Executing query..") selection = SelectionSearch(form) narticles = selection.get_count() self.monitor.update( 10, "Found {narticles} articles. Aggregating..".format(**locals())) # Get aggregation aggregation = selection.get_aggregate(form.cleaned_data['x_axis'], form.cleaned_data['y_axis'], form.cleaned_data['interval']) # self.monitor.update(20, "Calculating relative values..".format(**locals())) column = form.cleaned_data['relative_to'] if column is not None: aggregation = list(get_relative(aggregation, column)) self.monitor.update(60, "Serialising..".format(**locals())) return json.dumps(list(aggregation), cls=AggregationEncoder, check_circular=False)
def run(self, form): form_data = json.dumps(dict(form.data.lists())) size = form.cleaned_data['size'] offset = form.cleaned_data['offset'] show_aggregation = form.cleaned_data['aggregations'] with Timer() as timer: selection = SelectionSearch(form) self.monitor.update(1, "Executing query..") narticles = selection.get_count() self.monitor.update(39, "Fetching mediums..".format(**locals())) mediums = selection.get_mediums() self.monitor.update(59, "Fetching articles..".format(**locals())) articles = [escape_article_result(art) for art in selection.get_articles(size=size, offset=offset)] if show_aggregation: self.monitor.update(69, "Aggregating..".format(**locals())) statistics = selection.get_statistics() try: delta_start_end = statistics.end_date - statistics.start_date interval = next(interval for (interval, delta) in TIMEDELTAS if MAX_DATE_GROUPS * delta > delta_start_end) except (StopIteration, TypeError): interval = "day" date_aggr = selection.get_nested_aggregate([IntervalCategory(interval)]) date_aggr = fill_zeroes((((date,),(value,)) for date,value in date_aggr), IntervalCategory(interval)) medium_aggr = selection.get_nested_aggregate([MediumCategory()]) self.monitor.update(79, "Rendering results..".format(**locals())) return TEMPLATE.render(Context(dict(locals(), **{ "project": self.project, "user": self.user })))
def run(self, form): self.monitor.update(1, "Executing query..") selection = SelectionSearch(form) try: aggregation, primary, secondary, categories, values = self.get_cache( ) except NotInCacheError: narticles = selection.get_count() self.monitor.update( 10, "Found {narticles} articles. Aggregating..".format(**locals())) # Get aggregation codingjobs = form.cleaned_data["codingjobs"] primary = form.cleaned_data['primary'] secondary = form.cleaned_data['secondary'] value1 = form.cleaned_data['value1'] value2 = form.cleaned_data['value2'] article_ids = selection.get_article_ids() # This should probably happen in SelectionForm? coded_articles = CodedArticle.objects.all() coded_articles = coded_articles.filter(article__id__in=article_ids) coded_articles = coded_articles.filter( codingjob__id__in=codingjobs) coded_article_ids = set(coded_articles.values_list("id", flat=True)) for field_name in ("1", "2", "3"): if not coded_article_ids: break schemafield = form.cleaned_data["codingschemafield_{}".format( field_name)] schemafield_values = form.cleaned_data[ "codingschemafield_value_{}".format(field_name)] schemafield_include_descendants = form.cleaned_data[ "codingschemafield_include_descendants_{}".format( field_name)] if schemafield and schemafield_values: code_ids = get_code_filter( schemafield.codebook, schemafield_values, schemafield_include_descendants) coding_values = CodingValue.objects.filter( coding__coded_article__id__in=coded_article_ids) coding_values = coding_values.filter( field__id=schemafield.id) coding_values = coding_values.filter(intval__in=code_ids) coded_article_ids &= set( coding_values.values_list("coding__coded_article__id", flat=True)) codings = Coding.objects.filter( coded_article__id__in=coded_article_ids) terms = selection.get_article_ids_per_query() orm_aggregate = ORMAggregate(codings, flat=False, terms=terms) categories = list(filter(None, [primary, secondary])) values = list(filter(None, [value1, value2])) aggregation = orm_aggregate.get_aggregate(categories, values) aggregation = sorted(aggregation, key=to_sortable_tuple) self.set_cache( [aggregation, primary, secondary, categories, values]) else: self.monitor.update( 10, "Found in cache. Rendering..".format(**locals())) if form.cleaned_data.get("primary_fill_zeroes") and hasattr( primary, 'interval'): aggregation = list( aggregate_es.fill_zeroes(aggregation, primary, secondary)) # Matrices are very annoying to construct in javascript due to missing hashtables. If # the user requests a table, we thus first convert it to a different format which should # be easier to render. if form.cleaned_data["output_type"] == "text/json+aggregation+table": aggregation = aggregation_to_matrix(aggregation, categories) if form.cleaned_data["output_type"] == "text/csv": return aggregation_to_csv(aggregation, categories, values) self.monitor.update(60, "Serialising..".format(**locals())) return json.dumps(aggregation, cls=AggregationEncoder, check_circular=False)