Esempio n. 1
0
    def run(self, form):
        # Get codebook object
        new_codebook = form.cleaned_data["new_codebook"]
        if new_codebook:
            codebook = Codebook(name=new_codebook, project=self.project)
            codebook.save()
        else:
            codebook = form.cleaned_data["existing_codebook"]
            codebook.cache()

        # Get queries and their labels
        indicator_language = form.cleaned_data["indicator_language"]
        roots = {r.label: r for r in codebook.get_roots()}
        queries = {q.label: q for q in SelectionSearch.get_instance(form).get_queries()}

        updated, new = 0, 0
        for label, query in queries.items():
            if label in roots:
                # Update existing code
                roots[label].add_label(indicator_language, query.query, replace=True)
                updated += 1
            else:
                # Create new code
                code = Code(label=label)
                code.save()
                code.add_label(indicator_language, query.query, replace=True)
                codebook.add_code(code)
                new += 1

        return "Updated {} code(s), added {} new code(s).".format(updated, new)
Esempio n. 2
0
    def run(self, form):
        provenance = None#form.cleaned_data["provenance"] #TODO: is dit correct?
        job_size = form.cleaned_data["job_size"]

        self.monitor.update(10, "Executing query..")
        article_ids = list(SelectionSearch.get_instance(form).get_article_ids())

        cj = CodingJob()
        cj.project = self.project
        cj.name = form.cleaned_data["name"]
        cj.unitschema = form.cleaned_data["unitschema"]
        cj.articleschema = form.cleaned_data["articleschema"]
        cj.coder = form.cleaned_data["coder"]
        cj.insertuser = self.user

        self.monitor.update(50, "Creating codingjobs..")

        if job_size == 0:
            job_size = len(article_ids)

        n_batches = len(article_ids) // job_size
        n_batches += 1 if len(article_ids) % job_size else 0
        
        for i, cid in enumerate(_create_codingjob_batches(cj, article_ids, job_size)):
            progress = int((i / float(n_batches)) * (100 // 2))
            msg = "Creating codingjob {} of {}..".format(i+1, n_batches)
            print(50 + progress)
            self.monitor.update(50 + progress, msg)

            if provenance:
                cj = CodingJob.objects.get(id=cid)
                cj.provenance = provenance
                cj.save()

        return "Codingjob(s) created."
Esempio n. 3
0
    def run(self, form):
        assert isinstance(self.data, QueryDict), "Class should have been instantiated with a django QueryDict as 'data'"

        selection = SelectionSearch.get_instance(form)
        data = {API_KEYWORD_MAP.get(k, k): v for k, v in self.data.lists()}
        data["q"] = ["{}#{}".format(q.label, q.query) for q in selection.get_queries()]
        data["ids"] = data.get("ids", selection.get_filters().get("ids", []))
        url = urlencode(data, doseq=True)
        rowlink = ARTICLE_ROWLINK.format(reverse("navigator:project-details", args=[self.project.id]), "{id}")
        table = Datatable(
            SearchResource,
            url="/api/v4/search",
            rowlink=rowlink,
            rowlink_open_in="new",
            checkboxes=True,
            allow_export_via_post=True,
            allow_html_export=True
        )
        table = table.add_arguments(minimal="1")
        table = table.add_arguments(project=str(self.project.id))

        for k, vs in data.items():
            for v in vs:
                table = table.add_arguments(**{k:v})

        return TABLE_TEMPLATE.render({"form": form, "url": url, "table": table})
Esempio n. 4
0
    def run(self, form):
        selection = SelectionSearch.get_instance(form)
        queries = selection.get_queries()
        articlesets = form.cleaned_data["articlesets"]
        codingjobs = form.cleaned_data["codingjobs"]

        statistics = selection.get_statistics()

        if hasattr(statistics, "start_date"):
            start_date = statistics.start_date
            end_date = statistics.end_date
        else:
            start_date = None
            end_date = None

        return json.dumps({
            "queries": {q.label: q.query for q in queries},
            "articlesets": {a.id: a.name for a in articlesets},
            "codingjobs": {cj.id: cj.name for cj in codingjobs},
            "codes_used": list(get_used_code_ids(codingjobs)),
            "statistics": {
                "start_date": start_date,
                "end_date": end_date,
                "narticles": statistics.n
            }
        }, cls=DjangoJSONEncoder)
Esempio n. 5
0
    def get_association(self, form):
        selection = SelectionSearch.get_instance(form)
        filters = selection.get_filters()
        queries = selection.get_queries()

        weighted = form.cleaned_data["weigh"]
        interval = form.cleaned_data["interval"]

        return Association(queries, filters, weighted=weighted, interval=interval)
Esempio n. 6
0
    def run(self, form):
        self.monitor.update(10, "Executing query..")
        article_ids = list(SelectionSearch.get_instance(form).get_article_ids())
        _check_read_access(self.user, article_ids)
        self.monitor.update(60, "Saving to set..")
        form.cleaned_data["articleset"].add_articles(article_ids)

        return OK_TEMPLATE.render({
            "project": self.project,
            "aset": form.cleaned_data["articleset"],
            "len": len(article_ids)
        })
Esempio n. 7
0
    def run(self, form):
        form_data = dict(form.data.lists())
        for value in form_data.values():
            if value == [None]:
                value.pop()
        form_data = json.dumps(form_data, indent=4)

        size = form.cleaned_data['size']
        offset = form.cleaned_data['offset']
        number_of_fragments = form.cleaned_data['number_of_fragments']
        fragment_size = form.cleaned_data['fragment_size']
        show_fields = sorted(form.cleaned_data['show_fields'])
        show_aggregation = form.cleaned_data['aggregations']
        sort_by = form.cleaned_data.get('sort_by')
        sort_desc = "desc" if form.cleaned_data.get('sort_descending', False) else "asc"

        if sort_by:
            sort = [":".join([sort_by, sort_desc])]
        else:
            sort = []

        with Timer() as timer:
            selection = SelectionSearch.get_instance(form)
            self.monitor.update(message="Executing query..")
            narticles = selection.get_count()
            self.monitor.update(message="Fetching articles..".format(**locals()))
            articles = selection.get_articles(size=size, offset=offset, sort=sort).as_dicts()
            articles = get_fragments(selection.get_query(), [a["id"] for a in articles], fragment_size, number_of_fragments)

            if show_aggregation:
                self.monitor.update(message="Aggregating..".format(**locals()))
                
                statistics = selection.get_statistics()
                try:
                    delta_start_end = statistics.end_date - statistics.start_date
                    interval = next(interval for (interval, delta) in TIMEDELTAS
                                    if MAX_DATE_GROUPS * delta > delta_start_end)
                except TypeError:
                    interval = "day"
                except StopIteration:
                    interval = "year"

                date_aggr = selection.get_aggregate([IntervalCategory(interval)], objects=False)
            else:
                # Increase progress without doing anything (because we don't have to aggregate)
                self.monitor.update()

            self.monitor.update(message="Rendering results..".format(**locals()))

        return TEMPLATE.render(dict(locals(), **{
            "project": self.project, "user": self.user
        }))
Esempio n. 8
0
    def run(self, form):
        selection = SelectionSearch.get_instance(form)
        queries = selection.get_article_ids_per_query()

        if form.cleaned_data["output_type"] == "application/json+clustermap":
            try:
                clusters, articles = zip(*get_clusters(queries).items())
            except ValueError as e:
                raise ValueError("Cannot build clustermap of empty query result.")

            cluster_queries = get_cluster_queries(clusters)
            image, html = get_clustermap_image(queries)
            coords = tuple(clustermap_html_to_coords(html))

            return json.dumps(
                {"coords": coords, "image": b64encode(image).decode("ascii"),
                 "clusters": [
                     {"query": q, "articles": tuple(a)}
                     for q, a in zip(cluster_queries, articles)
                 ]}
            )

        headers, rows = get_clustermap_table(queries)

        if form.cleaned_data["output_type"] == "application/spss-sav":
            # *sigh*.. this code is fugly.
            _headers = {str(h): i for i, h in enumerate(headers)}

            return table2sav(Table(
                rows=list(rows),
                columns=list(map(str, headers)),
                columnTypes=[int]*len(headers),
                cellfunc=lambda row, col: row[_headers[col]]
            ))

        dialect = 'excel'
        if form.cleaned_data["output_type"] == "text/csv+tab":
            dialect = 'excel-tab'

        result = StringIO()
        csvf = csv.writer(result, dialect=dialect)
        csvf.writerow(list(map(str, headers)))
        csvf.writerows(sorted(rows))

        if form.cleaned_data["output_type"] == "application/json+clustermap+table":
            return json.dumps({
                "csv": result.getvalue(),
                "queries": {q.label: q.query for q in queries}
            })

        return result.getvalue()
Esempio n. 9
0
    def run(self, form):
        self.monitor.update(1, "Executing query..")
        selection = SelectionSearch.get_instance(form)
        try:
            aggregation, primary, secondary, categories, values = self.get_cache()
        except NotInCacheError:
            narticles = selection.get_count()
            self.monitor.update(10, "Found {narticles} articles. Aggregating..".format(**locals()))

            # Get aggregation
            codingjobs = form.cleaned_data["codingjobs"]
            primary = form.cleaned_data['primary']
            secondary = form.cleaned_data['secondary']
            value1 = form.cleaned_data['value1']
            value2 = form.cleaned_data['value2']
            order_by = form.cleaned_data["order_by"]

            article_ids = list(selection.get_article_ids())

            codings = Coding.objects.filter(coded_article__article__id__in=article_ids,
                                            coded_article__codingjob__id__in=selection.data.codingjobs,
                                            coded_article__status=STATUS_COMPLETE)

            terms = selection.get_article_ids_per_query()
            orm_aggregate = ORMAggregate(codings, flat=False, terms=terms)
            categories = list(filter(None, [primary, secondary]))
            values = list(filter(None, [value1, value2]))
            aggregation = orm_aggregate.get_aggregate(categories, values)
            aggregation = sorted_aggregation(*order_by, aggregation)

            self.set_cache([aggregation, primary, secondary, categories, values])
        else:
            self.monitor.update(10, "Found in cache. Rendering..".format(**locals()))

        # Matrices are very annoying to construct in javascript due to missing hashtables. If
        # the user requests a table, we thus first convert it to a different format which should
        # be easier to render.
        if form.cleaned_data["output_type"] == "text/json+aggregation+table":
            aggregation = aggregation_to_matrix(aggregation, categories)

        if form.cleaned_data["output_type"] == "text/csv":
            return aggregation_to_csv(aggregation, categories, values)

        self.monitor.update(60, "Serialising..".format(**locals()))
        return json.dumps(aggregation, cls=AggregationEncoder, check_circular=False)