예제 #1
0
파일: aggregation.py 프로젝트: amcat/amcat
    def run(self, form):
        selection = SelectionSearch(form)

        try:
            # Try to retrieve cache values
            primary, secondary, categories, aggregation = self.get_cache()
        except NotInCacheError:
            self.monitor.update(message="Executing query..")
            narticles = selection.get_count()
            self.monitor.update(message="Found {narticles} articles. Aggregating..".format(**locals()))

            # Get aggregation
            order_by = form.cleaned_data["order_by"]
            primary = form.cleaned_data["primary"]
            secondary = form.cleaned_data["secondary"]
            categories = list(filter(None, [primary, secondary]))
            aggregation = list(selection.get_aggregate(categories, flat=False))
            aggregation = sorted_aggregation(*order_by, aggregation)

            self.set_cache([primary, secondary, categories, aggregation])
        else:
            self.monitor.update(2)

        # Matrices are very annoying to construct in javascript due to missing hashtables. If
        # the user requests a table, we thus first convert it to a different format which should
        # be easier to render.
        if form.cleaned_data["output_type"] == "text/json+aggregation+table":
            aggregation = aggregation_to_matrix(aggregation, categories)

        if form.cleaned_data["output_type"] == "text/csv":
            return aggregation_to_csv(aggregation, categories, [CountArticlesValue()])

        self.monitor.update(message="Serialising..".format(**locals()))
        return json.dumps(aggregation, cls=AggregationEncoder, check_circular=False)
예제 #2
0
    def run(self, form):
        form_data = json.dumps(dict(form.data._iterlists()))

        size = form.cleaned_data['size']
        offset = form.cleaned_data['offset']
        show_aggregation = form.cleaned_data['aggregations']

        with Timer() as timer:
            selection = SelectionSearch(form)
            self.monitor.update(1, "Executing query..")
            narticles = selection.get_count()
            self.monitor.update(39, "Fetching mediums..".format(**locals()))
            mediums = selection.get_mediums()
            self.monitor.update(59, "Fetching articles..".format(**locals()))
            articles = selection.get_articles(size=size, offset=offset)

            if show_aggregation:
                self.monitor.update(69, "Aggregating..".format(**locals()))
                date_aggr = selection.get_aggregate(x_axis="date",
                                                    y_axis="total",
                                                    interval="day")
                medium_aggr = selection.get_aggregate(x_axis="medium",
                                                      y_axis="date",
                                                      interval="day")

            self.monitor.update(79, "Rendering results..".format(**locals()))

        return TEMPLATE.render(
            Context(
                dict(locals(), **{
                    "project": self.project,
                    "user": self.user
                })))
예제 #3
0
파일: summary.py 프로젝트: CJStuart/amcat
    def run(self, form):
        form_data = json.dumps(dict(form.data._iterlists()))

        size = form.cleaned_data['size']
        offset = form.cleaned_data['offset']
        show_aggregation = form.cleaned_data['aggregations']

        with Timer() as timer:
            selection = SelectionSearch(form)
            self.monitor.update(1, "Executing query..")
            narticles = selection.get_count()
            self.monitor.update(39, "Fetching mediums..".format(**locals()))
            mediums = selection.get_mediums()
            self.monitor.update(59, "Fetching articles..".format(**locals()))
            articles = selection.get_articles(size=size, offset=offset)

            if show_aggregation:
                self.monitor.update(69, "Aggregating..".format(**locals()))
                date_aggr = selection.get_aggregate(x_axis="date", y_axis="total", interval="day")
                medium_aggr = selection.get_aggregate(x_axis="medium", y_axis="date", interval="day")

            self.monitor.update(79, "Rendering results..".format(**locals()))


        return TEMPLATE.render(Context(dict(locals(), **{
            "project": self.project, "user": self.user
        })))
예제 #4
0
    def run(self, form):
        selection = SelectionSearch(form)

        try:
            # Try to retrieve cache values
            primary, secondary, categories, aggregation = self.get_cache()
        except NotInCacheError:
            self.monitor.update(message="Executing query..")
            narticles = selection.get_count()
            self.monitor.update(message="Found {narticles} articles. Aggregating..".format(**locals()))

            # Get aggregation
            primary = form.cleaned_data["primary"]
            secondary = form.cleaned_data["secondary"]
            categories = list(filter(None, [primary, secondary]))
            aggregation = list(selection.get_aggregate(categories, flat=False))

            self.set_cache([primary, secondary, categories, aggregation])
        else:
            self.monitor.update(2)

        # Matrices are very annoying to construct in javascript due to missing hashtables. If
        # the user requests a table, we thus first convert it to a different format which should
        # be easier to render.
        if form.cleaned_data["output_type"] == "text/json+aggregation+table":
            aggregation = aggregation_to_matrix(aggregation, categories)

        if form.cleaned_data["output_type"] == "text/csv":
            return aggregation_to_csv(aggregation, categories, [CountArticlesValue()])

        self.monitor.update(message="Serialising..".format(**locals()))
        return json.dumps(aggregation, cls=AggregationEncoder, check_circular=False)
예제 #5
0
파일: summary.py 프로젝트: isususi/amcat
    def run(self, form):
        form_data = dict(form.data.lists())
        for value in form_data.values():
            if value == [None]:
                value.pop()
        form_data = json.dumps(form_data, indent=4)

        size = form.cleaned_data['size']
        offset = form.cleaned_data['offset']
        number_of_fragments = form.cleaned_data['number_of_fragments']
        fragment_size = form.cleaned_data['fragment_size']
        show_fields = sorted(form.cleaned_data['show_fields'])
        show_aggregation = form.cleaned_data['aggregations']

        with Timer() as timer:
            selection = SelectionSearch(form)
            self.monitor.update(message="Executing query..")
            narticles = selection.get_count()
            self.monitor.update(message="Fetching articles..".format(
                **locals()))

            articles = selection.get_articles(size=size,
                                              offset=offset).as_dicts()
            articles = get_fragments(selection.get_query(),
                                     [a["id"] for a in articles],
                                     fragment_size, number_of_fragments)

            if show_aggregation:
                self.monitor.update(message="Aggregating..".format(**locals()))

                statistics = selection.get_statistics()
                try:
                    delta_start_end = statistics.end_date - statistics.start_date
                    interval = next(
                        interval for (interval, delta) in TIMEDELTAS
                        if MAX_DATE_GROUPS * delta > delta_start_end)
                except (StopIteration, TypeError):
                    interval = "day"

                date_aggr = selection.get_aggregate(
                    [IntervalCategory(interval)], objects=False)
            else:
                # Increase progress without doing anything (because we don't have to aggregate)
                self.monitor.update()

            self.monitor.update(message="Rendering results..".format(
                **locals()))

        return TEMPLATE.render(
            Context(
                dict(locals(), **{
                    "project": self.project,
                    "user": self.user
                })))
예제 #6
0
    def _run_query(self, form_data, expected_indices=None, expected_count=None, msg=None):
        self._setUp()
        sets = ArticleSet.objects.filter(pk=self.articleset.pk)
        form = SelectionForm(articlesets=sets, project=self.articleset.project, data=form_data)
        form.full_clean()
        self.assertFalse(form.errors, "Form contains errors")

        search = SelectionSearch(form)
        if expected_indices:
            article_ids = search.get_article_ids()
            articles = Article.objects.filter(id__in=article_ids)
            expected = [self.articles[i] for i in expected_indices]
            self.assertSetEqual(set(articles), set(expected), msg=msg)

        if expected_count:
            self.assertEqual(search.get_count(), expected_count, msg=msg)
예제 #7
0
    def run(self, form):
        self.monitor.update(1, "Executing query..")
        selection = SelectionSearch(form)
        narticles = selection.get_count()
        self.monitor.update(10, "Found {narticles} articles. Aggregating..".format(**locals()))

        # Get aggregation
        aggregation = selection.get_aggregate(
            form.cleaned_data['x_axis'],
            form.cleaned_data['y_axis'],
            form.cleaned_data['interval']
        )

        #
        self.monitor.update(20, "Calculating relative values..".format(**locals()))
        column = form.cleaned_data['relative_to']

        if column is not None:
            aggregation = list(get_relative(aggregation, column))

        self.monitor.update(60, "Serialising..".format(**locals()))
        return json.dumps(list(aggregation), cls=AggregationEncoder, check_circular=False)
예제 #8
0
    def run(self, form):
        self.monitor.update(1, "Executing query..")
        selection = SelectionSearch(form)
        narticles = selection.get_count()
        self.monitor.update(
            10, "Found {narticles} articles. Aggregating..".format(**locals()))

        # Get aggregation
        aggregation = selection.get_aggregate(form.cleaned_data['x_axis'],
                                              form.cleaned_data['y_axis'],
                                              form.cleaned_data['interval'])

        #
        self.monitor.update(20,
                            "Calculating relative values..".format(**locals()))
        column = form.cleaned_data['relative_to']

        if column is not None:
            aggregation = list(get_relative(aggregation, column))

        self.monitor.update(60, "Serialising..".format(**locals()))
        return json.dumps(list(aggregation),
                          cls=AggregationEncoder,
                          check_circular=False)
예제 #9
0
파일: summary.py 프로젝트: BBie/amcat
    def run(self, form):
        form_data = json.dumps(dict(form.data.lists()))

        size = form.cleaned_data['size']
        offset = form.cleaned_data['offset']
        show_aggregation = form.cleaned_data['aggregations']

        with Timer() as timer:
            selection = SelectionSearch(form)
            self.monitor.update(1, "Executing query..")
            narticles = selection.get_count()
            self.monitor.update(39, "Fetching mediums..".format(**locals()))
            mediums = selection.get_mediums()
            self.monitor.update(59, "Fetching articles..".format(**locals()))
            articles = [escape_article_result(art) for art in selection.get_articles(size=size, offset=offset)]

            if show_aggregation:
                self.monitor.update(69, "Aggregating..".format(**locals()))
                
                statistics = selection.get_statistics()
                try:
                    delta_start_end = statistics.end_date - statistics.start_date
                    interval = next(interval for (interval, delta) in TIMEDELTAS
                                    if MAX_DATE_GROUPS * delta > delta_start_end)
                except (StopIteration, TypeError):
                    interval = "day"

                date_aggr = selection.get_nested_aggregate([IntervalCategory(interval)])
                date_aggr = fill_zeroes((((date,),(value,)) for date,value in date_aggr), IntervalCategory(interval))
                medium_aggr = selection.get_nested_aggregate([MediumCategory()])
            
            self.monitor.update(79, "Rendering results..".format(**locals()))

        return TEMPLATE.render(Context(dict(locals(), **{
            "project": self.project, "user": self.user
        })))
예제 #10
0
    def run(self, form):
        self.monitor.update(1, "Executing query..")
        selection = SelectionSearch(form)
        try:
            aggregation, primary, secondary, categories, values = self.get_cache(
            )
        except NotInCacheError:
            narticles = selection.get_count()
            self.monitor.update(
                10,
                "Found {narticles} articles. Aggregating..".format(**locals()))

            # Get aggregation
            codingjobs = form.cleaned_data["codingjobs"]
            primary = form.cleaned_data['primary']
            secondary = form.cleaned_data['secondary']
            value1 = form.cleaned_data['value1']
            value2 = form.cleaned_data['value2']

            article_ids = selection.get_article_ids()

            # This should probably happen in SelectionForm?
            coded_articles = CodedArticle.objects.all()
            coded_articles = coded_articles.filter(article__id__in=article_ids)
            coded_articles = coded_articles.filter(
                codingjob__id__in=codingjobs)

            coded_article_ids = set(coded_articles.values_list("id",
                                                               flat=True))
            for field_name in ("1", "2", "3"):
                if not coded_article_ids:
                    break

                schemafield = form.cleaned_data["codingschemafield_{}".format(
                    field_name)]
                schemafield_values = form.cleaned_data[
                    "codingschemafield_value_{}".format(field_name)]
                schemafield_include_descendants = form.cleaned_data[
                    "codingschemafield_include_descendants_{}".format(
                        field_name)]

                if schemafield and schemafield_values:
                    code_ids = get_code_filter(
                        schemafield.codebook, schemafield_values,
                        schemafield_include_descendants)
                    coding_values = CodingValue.objects.filter(
                        coding__coded_article__id__in=coded_article_ids)
                    coding_values = coding_values.filter(
                        field__id=schemafield.id)
                    coding_values = coding_values.filter(intval__in=code_ids)
                    coded_article_ids &= set(
                        coding_values.values_list("coding__coded_article__id",
                                                  flat=True))

            codings = Coding.objects.filter(
                coded_article__id__in=coded_article_ids)

            terms = selection.get_article_ids_per_query()
            orm_aggregate = ORMAggregate(codings, flat=False, terms=terms)
            categories = list(filter(None, [primary, secondary]))
            values = list(filter(None, [value1, value2]))
            aggregation = orm_aggregate.get_aggregate(categories, values)
            aggregation = sorted(aggregation, key=to_sortable_tuple)

            self.set_cache(
                [aggregation, primary, secondary, categories, values])
        else:
            self.monitor.update(
                10, "Found in cache. Rendering..".format(**locals()))

        if form.cleaned_data.get("primary_fill_zeroes") and hasattr(
                primary, 'interval'):
            aggregation = list(
                aggregate_es.fill_zeroes(aggregation, primary, secondary))
        # Matrices are very annoying to construct in javascript due to missing hashtables. If
        # the user requests a table, we thus first convert it to a different format which should
        # be easier to render.
        if form.cleaned_data["output_type"] == "text/json+aggregation+table":
            aggregation = aggregation_to_matrix(aggregation, categories)

        if form.cleaned_data["output_type"] == "text/csv":
            return aggregation_to_csv(aggregation, categories, values)

        self.monitor.update(60, "Serialising..".format(**locals()))
        return json.dumps(aggregation,
                          cls=AggregationEncoder,
                          check_circular=False)