def run(self, form): # Get codebook object new_codebook = form.cleaned_data["new_codebook"] if new_codebook: codebook = Codebook(name=new_codebook, project=self.project) codebook.save() else: codebook = form.cleaned_data["existing_codebook"] codebook.cache() # Get queries and their labels indicator_language = form.cleaned_data["indicator_language"] roots = {r.label: r for r in codebook.get_roots()} queries = {q.label: q for q in SelectionSearch.get_instance(form).get_queries()} updated, new = 0, 0 for label, query in queries.items(): if label in roots: # Update existing code roots[label].add_label(indicator_language, query.query, replace=True) updated += 1 else: # Create new code code = Code(label=label) code.save() code.add_label(indicator_language, query.query, replace=True) codebook.add_code(code) new += 1 return "Updated {} code(s), added {} new code(s).".format(updated, new)
def run(self, form): provenance = None#form.cleaned_data["provenance"] #TODO: is dit correct? job_size = form.cleaned_data["job_size"] self.monitor.update(10, "Executing query..") article_ids = list(SelectionSearch.get_instance(form).get_article_ids()) cj = CodingJob() cj.project = self.project cj.name = form.cleaned_data["name"] cj.unitschema = form.cleaned_data["unitschema"] cj.articleschema = form.cleaned_data["articleschema"] cj.coder = form.cleaned_data["coder"] cj.insertuser = self.user self.monitor.update(50, "Creating codingjobs..") if job_size == 0: job_size = len(article_ids) n_batches = len(article_ids) // job_size n_batches += 1 if len(article_ids) % job_size else 0 for i, cid in enumerate(_create_codingjob_batches(cj, article_ids, job_size)): progress = int((i / float(n_batches)) * (100 // 2)) msg = "Creating codingjob {} of {}..".format(i+1, n_batches) print(50 + progress) self.monitor.update(50 + progress, msg) if provenance: cj = CodingJob.objects.get(id=cid) cj.provenance = provenance cj.save() return "Codingjob(s) created."
def run(self, form): assert isinstance(self.data, QueryDict), "Class should have been instantiated with a django QueryDict as 'data'" selection = SelectionSearch.get_instance(form) data = {API_KEYWORD_MAP.get(k, k): v for k, v in self.data.lists()} data["q"] = ["{}#{}".format(q.label, q.query) for q in selection.get_queries()] data["ids"] = data.get("ids", selection.get_filters().get("ids", [])) url = urlencode(data, doseq=True) rowlink = ARTICLE_ROWLINK.format(reverse("navigator:project-details", args=[self.project.id]), "{id}") table = Datatable( SearchResource, url="/api/v4/search", rowlink=rowlink, rowlink_open_in="new", checkboxes=True, allow_export_via_post=True, allow_html_export=True ) table = table.add_arguments(minimal="1") table = table.add_arguments(project=str(self.project.id)) for k, vs in data.items(): for v in vs: table = table.add_arguments(**{k:v}) return TABLE_TEMPLATE.render({"form": form, "url": url, "table": table})
def run(self, form): selection = SelectionSearch.get_instance(form) queries = selection.get_queries() articlesets = form.cleaned_data["articlesets"] codingjobs = form.cleaned_data["codingjobs"] statistics = selection.get_statistics() if hasattr(statistics, "start_date"): start_date = statistics.start_date end_date = statistics.end_date else: start_date = None end_date = None return json.dumps({ "queries": {q.label: q.query for q in queries}, "articlesets": {a.id: a.name for a in articlesets}, "codingjobs": {cj.id: cj.name for cj in codingjobs}, "codes_used": list(get_used_code_ids(codingjobs)), "statistics": { "start_date": start_date, "end_date": end_date, "narticles": statistics.n } }, cls=DjangoJSONEncoder)
def get_association(self, form): selection = SelectionSearch.get_instance(form) filters = selection.get_filters() queries = selection.get_queries() weighted = form.cleaned_data["weigh"] interval = form.cleaned_data["interval"] return Association(queries, filters, weighted=weighted, interval=interval)
def run(self, form): self.monitor.update(10, "Executing query..") article_ids = list(SelectionSearch.get_instance(form).get_article_ids()) _check_read_access(self.user, article_ids) self.monitor.update(60, "Saving to set..") form.cleaned_data["articleset"].add_articles(article_ids) return OK_TEMPLATE.render({ "project": self.project, "aset": form.cleaned_data["articleset"], "len": len(article_ids) })
def run(self, form): form_data = dict(form.data.lists()) for value in form_data.values(): if value == [None]: value.pop() form_data = json.dumps(form_data, indent=4) size = form.cleaned_data['size'] offset = form.cleaned_data['offset'] number_of_fragments = form.cleaned_data['number_of_fragments'] fragment_size = form.cleaned_data['fragment_size'] show_fields = sorted(form.cleaned_data['show_fields']) show_aggregation = form.cleaned_data['aggregations'] sort_by = form.cleaned_data.get('sort_by') sort_desc = "desc" if form.cleaned_data.get('sort_descending', False) else "asc" if sort_by: sort = [":".join([sort_by, sort_desc])] else: sort = [] with Timer() as timer: selection = SelectionSearch.get_instance(form) self.monitor.update(message="Executing query..") narticles = selection.get_count() self.monitor.update(message="Fetching articles..".format(**locals())) articles = selection.get_articles(size=size, offset=offset, sort=sort).as_dicts() articles = get_fragments(selection.get_query(), [a["id"] for a in articles], fragment_size, number_of_fragments) if show_aggregation: self.monitor.update(message="Aggregating..".format(**locals())) statistics = selection.get_statistics() try: delta_start_end = statistics.end_date - statistics.start_date interval = next(interval for (interval, delta) in TIMEDELTAS if MAX_DATE_GROUPS * delta > delta_start_end) except TypeError: interval = "day" except StopIteration: interval = "year" date_aggr = selection.get_aggregate([IntervalCategory(interval)], objects=False) else: # Increase progress without doing anything (because we don't have to aggregate) self.monitor.update() self.monitor.update(message="Rendering results..".format(**locals())) return TEMPLATE.render(dict(locals(), **{ "project": self.project, "user": self.user }))
def run(self, form): selection = SelectionSearch.get_instance(form) queries = selection.get_article_ids_per_query() if form.cleaned_data["output_type"] == "application/json+clustermap": try: clusters, articles = zip(*get_clusters(queries).items()) except ValueError as e: raise ValueError("Cannot build clustermap of empty query result.") cluster_queries = get_cluster_queries(clusters) image, html = get_clustermap_image(queries) coords = tuple(clustermap_html_to_coords(html)) return json.dumps( {"coords": coords, "image": b64encode(image).decode("ascii"), "clusters": [ {"query": q, "articles": tuple(a)} for q, a in zip(cluster_queries, articles) ]} ) headers, rows = get_clustermap_table(queries) if form.cleaned_data["output_type"] == "application/spss-sav": # *sigh*.. this code is fugly. _headers = {str(h): i for i, h in enumerate(headers)} return table2sav(Table( rows=list(rows), columns=list(map(str, headers)), columnTypes=[int]*len(headers), cellfunc=lambda row, col: row[_headers[col]] )) dialect = 'excel' if form.cleaned_data["output_type"] == "text/csv+tab": dialect = 'excel-tab' result = StringIO() csvf = csv.writer(result, dialect=dialect) csvf.writerow(list(map(str, headers))) csvf.writerows(sorted(rows)) if form.cleaned_data["output_type"] == "application/json+clustermap+table": return json.dumps({ "csv": result.getvalue(), "queries": {q.label: q.query for q in queries} }) return result.getvalue()
def run(self, form): self.monitor.update(1, "Executing query..") selection = SelectionSearch.get_instance(form) try: aggregation, primary, secondary, categories, values = self.get_cache() except NotInCacheError: narticles = selection.get_count() self.monitor.update(10, "Found {narticles} articles. Aggregating..".format(**locals())) # Get aggregation codingjobs = form.cleaned_data["codingjobs"] primary = form.cleaned_data['primary'] secondary = form.cleaned_data['secondary'] value1 = form.cleaned_data['value1'] value2 = form.cleaned_data['value2'] order_by = form.cleaned_data["order_by"] article_ids = list(selection.get_article_ids()) codings = Coding.objects.filter(coded_article__article__id__in=article_ids, coded_article__codingjob__id__in=selection.data.codingjobs, coded_article__status=STATUS_COMPLETE) terms = selection.get_article_ids_per_query() orm_aggregate = ORMAggregate(codings, flat=False, terms=terms) categories = list(filter(None, [primary, secondary])) values = list(filter(None, [value1, value2])) aggregation = orm_aggregate.get_aggregate(categories, values) aggregation = sorted_aggregation(*order_by, aggregation) self.set_cache([aggregation, primary, secondary, categories, values]) else: self.monitor.update(10, "Found in cache. Rendering..".format(**locals())) # Matrices are very annoying to construct in javascript due to missing hashtables. If # the user requests a table, we thus first convert it to a different format which should # be easier to render. if form.cleaned_data["output_type"] == "text/json+aggregation+table": aggregation = aggregation_to_matrix(aggregation, categories) if form.cleaned_data["output_type"] == "text/csv": return aggregation_to_csv(aggregation, categories, values) self.monitor.update(60, "Serialising..".format(**locals())) return json.dumps(aggregation, cls=AggregationEncoder, check_circular=False)