def run(self, form): selection = SelectionSearch(form) queries = selection.get_queries() articlesets = form.cleaned_data["articlesets"] mediums = form.cleaned_data["mediums"] statistics = selection.get_statistics() if hasattr(statistics, "start_date"): start_date = statistics.start_date end_date = statistics.end_date else: start_date = None end_date = None return json.dumps( { "queries": {q.label: q.query for q in queries}, "mediums": {m.id: m.name for m in mediums}, "articlesets": {a.id: a.name for a in articlesets}, "statistics": { "start_date": start_date, "end_date": end_date, "narticles": statistics.n } }, cls=DjangoJSONEncoder)
def run(self, form): selection = SelectionSearch(form) try: # Try to retrieve cache values primary, secondary, categories, aggregation = self.get_cache() except NotInCacheError: self.monitor.update(message="Executing query..") narticles = selection.get_count() self.monitor.update(message="Found {narticles} articles. Aggregating..".format(**locals())) # Get aggregation primary = form.cleaned_data["primary"] secondary = form.cleaned_data["secondary"] categories = list(filter(None, [primary, secondary])) aggregation = list(selection.get_aggregate(categories, flat=False)) self.set_cache([primary, secondary, categories, aggregation]) else: self.monitor.update(2) # Matrices are very annoying to construct in javascript due to missing hashtables. If # the user requests a table, we thus first convert it to a different format which should # be easier to render. if form.cleaned_data["output_type"] == "text/json+aggregation+table": aggregation = aggregation_to_matrix(aggregation, categories) if form.cleaned_data["output_type"] == "text/csv": return aggregation_to_csv(aggregation, categories, [CountArticlesValue()]) self.monitor.update(message="Serialising..".format(**locals())) return json.dumps(aggregation, cls=AggregationEncoder, check_circular=False)
def run(self, form): selection = SelectionSearch(form) queries = selection.get_queries() articlesets = form.cleaned_data["articlesets"] mediums = form.cleaned_data["mediums"] statistics = selection.get_statistics() if hasattr(statistics, "start_date"): start_date = statistics.start_date end_date = statistics.end_date else: start_date = None end_date = None return json.dumps({ "queries": {q.label: q.query for q in queries}, "mediums": {m.id: m.name for m in mediums}, "articlesets": {a.id: a.name for a in articlesets}, "statistics": { "start_date": start_date, "end_date": end_date, "narticles": statistics.n } }, cls=DjangoJSONEncoder)
def run(self, form): assert isinstance( self.data, QueryDict ), "Class should have been instantiated with a django QueryDict as 'data'" selection = SelectionSearch(form) data = {API_KEYWORD_MAP.get(k, k): v for k, v in self.data.lists()} data["q"] = [ "{}#{}".format(q.label, q.query) for q in selection.get_queries() ] data["ids"] = data.get("ids", selection.get_filters().get("ids", [])) url = urlencode(data, doseq=True) rowlink = ARTICLE_ROWLINK.format( reverse("navigator:project-details", args=[self.project.id]), "{id}") table = Datatable(SearchResource, url="/api/v4/search", rowlink=rowlink, rowlink_open_in="new", checkboxes=True, allow_export_via_post=True, allow_html_export=True) table = table.add_arguments(minimal="1") table = table.add_arguments(project=str(self.project.id)) for k, vs in data.items(): for v in vs: table = table.add_arguments(**{k: v}) return TABLE_TEMPLATE.render( Context({ "form": form, "url": url, "table": table }))
def run(self, form): selection = SelectionSearch(form) try: # Try to retrieve cache values primary, secondary, categories, aggregation = self.get_cache() except NotInCacheError: self.monitor.update(message="Executing query..") narticles = selection.get_count() self.monitor.update(message="Found {narticles} articles. Aggregating..".format(**locals())) # Get aggregation order_by = form.cleaned_data["order_by"] primary = form.cleaned_data["primary"] secondary = form.cleaned_data["secondary"] categories = list(filter(None, [primary, secondary])) aggregation = list(selection.get_aggregate(categories, flat=False)) aggregation = sorted_aggregation(*order_by, aggregation) self.set_cache([primary, secondary, categories, aggregation]) else: self.monitor.update(2) # Matrices are very annoying to construct in javascript due to missing hashtables. If # the user requests a table, we thus first convert it to a different format which should # be easier to render. if form.cleaned_data["output_type"] == "text/json+aggregation+table": aggregation = aggregation_to_matrix(aggregation, categories) if form.cleaned_data["output_type"] == "text/csv": return aggregation_to_csv(aggregation, categories, [CountArticlesValue()]) self.monitor.update(message="Serialising..".format(**locals())) return json.dumps(aggregation, cls=AggregationEncoder, check_circular=False)
def get_association(self, form): selection = SelectionSearch(form) filters = selection.get_filters() queries = selection.get_queries() weighted = form.cleaned_data["weigh"] interval = form.cleaned_data["interval"] return Association(queries, filters, weighted=weighted, interval=interval)
def run(self, form): selection = SelectionSearch(form) queries = selection.get_article_ids_per_query() if form.cleaned_data["output_type"] == "application/json+clustermap": clusters, articles = zip(*get_clusters(queries).items()) cluster_queries = get_cluster_queries(clusters) image, html = get_clustermap_image(queries) coords = tuple(clustermap_html_to_coords(html)) return json.dumps({ "coords": coords, "image": b64encode(image).decode("ascii"), "clusters": [{ "query": q, "articles": tuple(a) } for q, a in zip(cluster_queries, articles)] }) headers, rows = get_clustermap_table(queries) if form.cleaned_data["output_type"] == "application/spss-sav": # *sigh*.. this code is fugly. _headers = {str(h): i for i, h in enumerate(headers)} return table2sav( Table(rows=list(rows), columns=list(map(str, headers)), columnTypes=[int] * len(headers), cellfunc=lambda row, col: row[_headers[col]])) dialect = 'excel' if form.cleaned_data["output_type"] == "text/csv+tab": dialect = 'excel-tab' result = StringIO() csvf = csv.writer(result, dialect=dialect) csvf.writerow(list(map(str, headers))) csvf.writerows(sorted(rows)) if form.cleaned_data[ "output_type"] == "application/json+clustermap+table": return json.dumps({ "csv": result.getvalue(), "queries": {q.label: q.query for q in queries} }) return result.getvalue()
def run(self, form): selection = SelectionSearch(form) data = {API_KEYWORD_MAP.get(k, k): v for k,v in self.data.iterlists()} data["q"] = ["{}#{}".format(q.label, q.query) for q in selection.get_queries()] url = urllib.urlencode(data, doseq=True) table = Datatable(SearchResource, url="/api/v4/search") table = table.add_arguments(minimal="1") table = table.add_arguments(project=str(self.project.id)) for k, vs in data.items(): for v in vs: table = table.add_arguments(**{k:v}) return TABLE_TEMPLATE.render(Context({"form": form, "url": url, "table": table}))
def run(self, form): # Get codebook object new_codebook = form.cleaned_data["new_codebook"] if new_codebook: codebook = Codebook(name=new_codebook, project=self.project) codebook.save() else: codebook = form.cleaned_data["existing_codebook"] codebook.cache() # Get queries and their labels indicator_language = form.cleaned_data["indicator_language"] roots = {r.label: r for r in codebook.get_roots()} queries = {q.label: q for q in SelectionSearch.get_instance(form).get_queries()} updated, new = 0, 0 for label, query in queries.items(): if label in roots: # Update existing code roots[label].add_label(indicator_language, query.query, replace=True) updated += 1 else: # Create new code code = Code(label=label) code.save() code.add_label(indicator_language, query.query, replace=True) codebook.add_code(code) new += 1 return "Updated {} code(s), added {} new code(s).".format(updated, new)
def _run_query(self, form_data, expected_indices=None, expected_count=None, msg=None): self._setUp() sets = ArticleSet.objects.filter(pk=self.articleset.pk) form = SelectionForm(articlesets=sets, project=self.articleset.project, data=form_data) form.full_clean() self.assertFalse(form.errors, "Form contains errors") search = SelectionSearch(form) if expected_indices: article_ids = search.get_article_ids() articles = Article.objects.filter(id__in=article_ids) expected = [self.articles[i] for i in expected_indices] self.assertSetEqual(set(articles), set(expected), msg=msg) if expected_count: self.assertEqual(search.get_count(), expected_count, msg=msg)
def run(self, form): provenance = None#form.cleaned_data["provenance"] #TODO: is dit correct? job_size = form.cleaned_data["job_size"] self.monitor.update(10, "Executing query..") article_ids = list(SelectionSearch.get_instance(form).get_article_ids()) cj = CodingJob() cj.project = self.project cj.name = form.cleaned_data["name"] cj.unitschema = form.cleaned_data["unitschema"] cj.articleschema = form.cleaned_data["articleschema"] cj.coder = form.cleaned_data["coder"] cj.insertuser = self.user self.monitor.update(50, "Creating codingjobs..") if job_size == 0: job_size = len(article_ids) n_batches = len(article_ids) // job_size n_batches += 1 if len(article_ids) % job_size else 0 for i, cid in enumerate(_create_codingjob_batches(cj, article_ids, job_size)): progress = int((i / float(n_batches)) * (100 // 2)) msg = "Creating codingjob {} of {}..".format(i+1, n_batches) print(50 + progress) self.monitor.update(50 + progress, msg) if provenance: cj = CodingJob.objects.get(id=cid) cj.provenance = provenance cj.save() return "Codingjob(s) created."
def run(self, form): assert isinstance(self.data, QueryDict), "Class should have been instantiated with a django QueryDict as 'data'" selection = SelectionSearch.get_instance(form) data = {API_KEYWORD_MAP.get(k, k): v for k, v in self.data.lists()} data["q"] = ["{}#{}".format(q.label, q.query) for q in selection.get_queries()] data["ids"] = data.get("ids", selection.get_filters().get("ids", [])) url = urlencode(data, doseq=True) rowlink = ARTICLE_ROWLINK.format(reverse("navigator:project-details", args=[self.project.id]), "{id}") table = Datatable( SearchResource, url="/api/v4/search", rowlink=rowlink, rowlink_open_in="new", checkboxes=True, allow_export_via_post=True, allow_html_export=True ) table = table.add_arguments(minimal="1") table = table.add_arguments(project=str(self.project.id)) for k, vs in data.items(): for v in vs: table = table.add_arguments(**{k:v}) return TABLE_TEMPLATE.render({"form": form, "url": url, "table": table})
def run(self, form): selection = SelectionSearch.get_instance(form) queries = selection.get_queries() articlesets = form.cleaned_data["articlesets"] codingjobs = form.cleaned_data["codingjobs"] statistics = selection.get_statistics() if hasattr(statistics, "start_date"): start_date = statistics.start_date end_date = statistics.end_date else: start_date = None end_date = None return json.dumps({ "queries": {q.label: q.query for q in queries}, "articlesets": {a.id: a.name for a in articlesets}, "codingjobs": {cj.id: cj.name for cj in codingjobs}, "codes_used": list(get_used_code_ids(codingjobs)), "statistics": { "start_date": start_date, "end_date": end_date, "narticles": statistics.n } }, cls=DjangoJSONEncoder)
def clean_relative_to(self): column = self.cleaned_data['relative_to'] y_axis = self.cleaned_data['y_axis'] if not column: return None if y_axis == "medium": if int(column) not in (m.id for m in self.cleaned_data["mediums"]): raise ValidationError(MEDIUM_ERR.format(column=column)) return Medium.objects.get(id=int(column)) if y_axis == "term": queries = SelectionSearch(self).get_queries() queries = {q.label: q for q in queries} if column not in queries: raise ValidationError( "Term '{column}' not found in search terms.".format( column=column)) return queries[column] if y_axis == "set": if int(column) not in (aset.id for aset in self.articlesets): raise ValidationError( "Set '{column}' not available.".format(column=column)) return ArticleSet.objects.get(id=int(column)) raise ValidationError("Not a valid column name.")
def run(self, form): # Get codebook object new_codebook = form.cleaned_data["new_codebook"] if new_codebook: codebook = Codebook(name=new_codebook, project=self.project) codebook.save() else: codebook = form.cleaned_data["existing_codebook"] codebook.cache() # Get queries and their labels indicator_language = form.cleaned_data["indicator_language"] roots = {r.label: r for r in codebook.get_roots()} queries = {q.label: q for q in SelectionSearch(form).get_queries()} updated, new = 0, 0 for label, query in queries.items(): if label in roots: # Update existing code roots[label].add_label(indicator_language, query.query, replace=True) updated += 1 else: # Create new code code = Code(label=label) code.save() code.add_label(indicator_language, query.query, replace=True) codebook.add_code(code) new += 1 return "Updated {} code(s), added {} new code(s).".format(updated, new)
def run(self, form): selection = SelectionSearch(form) queries = selection.get_article_ids_per_query() if form.cleaned_data["output_type"] == "application/json+clustermap": clusters, articles = zip(*get_clusters(queries).items()) cluster_queries = get_cluster_queries(clusters) image, html = get_clustermap_image(queries) coords = tuple(clustermap_html_to_coords(html)) return json.dumps( {"coords": coords, "image": b64encode(image).decode("ascii"), "clusters": [ {"query": q, "articles": tuple(a)} for q, a in zip(cluster_queries, articles) ]} ) headers, rows = get_clustermap_table(queries) if form.cleaned_data["output_type"] == "application/spss-sav": # *sigh*.. this code is fugly. _headers = {str(h): i for i, h in enumerate(headers)} return table2sav(Table( rows=list(rows), columns=list(map(str, headers)), columnTypes=[int]*len(headers), cellfunc=lambda row, col: row[_headers[col]] )) dialect = 'excel' if form.cleaned_data["output_type"] == "text/csv+tab": dialect = 'excel-tab' result = StringIO() csvf = csv.writer(result, dialect=dialect) csvf.writerow(list(map(str, headers))) csvf.writerows(sorted(rows)) if form.cleaned_data["output_type"] == "application/json+clustermap+table": return json.dumps({ "csv": result.getvalue(), "queries": {q.label: q.query for q in queries} }) return result.getvalue()
def run(self, form): form_data = json.dumps(dict(form.data._iterlists())) size = form.cleaned_data['size'] offset = form.cleaned_data['offset'] show_aggregation = form.cleaned_data['aggregations'] with Timer() as timer: selection = SelectionSearch(form) self.monitor.update(1, "Executing query..") narticles = selection.get_count() self.monitor.update(39, "Fetching mediums..".format(**locals())) mediums = selection.get_mediums() self.monitor.update(59, "Fetching articles..".format(**locals())) articles = selection.get_articles(size=size, offset=offset) if show_aggregation: self.monitor.update(69, "Aggregating..".format(**locals())) date_aggr = selection.get_aggregate(x_axis="date", y_axis="total", interval="day") medium_aggr = selection.get_aggregate(x_axis="medium", y_axis="date", interval="day") self.monitor.update(79, "Rendering results..".format(**locals())) return TEMPLATE.render( Context( dict(locals(), **{ "project": self.project, "user": self.user })))
def run(self, form): self.monitor.update(10, "Executing query..") article_ids = list(SelectionSearch.get_instance(form).get_article_ids()) _check_read_access(self.user, article_ids) self.monitor.update(60, "Saving to set..") form.cleaned_data["articleset"].add_articles(article_ids) return OK_TEMPLATE.render({ "project": self.project, "aset": form.cleaned_data["articleset"], "len": len(article_ids) })
def run(self, form): self.monitor.update(10, "Executing query..") article_ids = list(SelectionSearch(form).get_article_ids()) self.monitor.update(60, "Saving to set..") #form.cleaned_data["articleset"].add_articles(article_ids) return OK_TEMPLATE.render( Context({ "project": self.project, "aset": form.cleaned_data["articleset"], "len": len(article_ids) }))
def run(self, form): form_data = json.dumps(dict(form.data._iterlists())) size = form.cleaned_data['size'] offset = form.cleaned_data['offset'] show_aggregation = form.cleaned_data['aggregations'] with Timer() as timer: selection = SelectionSearch(form) self.monitor.update(1, "Executing query..") narticles = selection.get_count() self.monitor.update(39, "Fetching mediums..".format(**locals())) mediums = selection.get_mediums() self.monitor.update(59, "Fetching articles..".format(**locals())) articles = selection.get_articles(size=size, offset=offset) if show_aggregation: self.monitor.update(69, "Aggregating..".format(**locals())) date_aggr = selection.get_aggregate(x_axis="date", y_axis="total", interval="day") medium_aggr = selection.get_aggregate(x_axis="medium", y_axis="date", interval="day") self.monitor.update(79, "Rendering results..".format(**locals())) return TEMPLATE.render(Context(dict(locals(), **{ "project": self.project, "user": self.user })))
def run(self, form): form_data = dict(form.data.lists()) for value in form_data.values(): if value == [None]: value.pop() form_data = json.dumps(form_data, indent=4) size = form.cleaned_data['size'] offset = form.cleaned_data['offset'] number_of_fragments = form.cleaned_data['number_of_fragments'] fragment_size = form.cleaned_data['fragment_size'] show_fields = sorted(form.cleaned_data['show_fields']) show_aggregation = form.cleaned_data['aggregations'] sort_by = form.cleaned_data.get('sort_by') sort_desc = "desc" if form.cleaned_data.get('sort_descending', False) else "asc" if sort_by: sort = [":".join([sort_by, sort_desc])] else: sort = [] with Timer() as timer: selection = SelectionSearch.get_instance(form) self.monitor.update(message="Executing query..") narticles = selection.get_count() self.monitor.update(message="Fetching articles..".format(**locals())) articles = selection.get_articles(size=size, offset=offset, sort=sort).as_dicts() articles = get_fragments(selection.get_query(), [a["id"] for a in articles], fragment_size, number_of_fragments) if show_aggregation: self.monitor.update(message="Aggregating..".format(**locals())) statistics = selection.get_statistics() try: delta_start_end = statistics.end_date - statistics.start_date interval = next(interval for (interval, delta) in TIMEDELTAS if MAX_DATE_GROUPS * delta > delta_start_end) except TypeError: interval = "day" except StopIteration: interval = "year" date_aggr = selection.get_aggregate([IntervalCategory(interval)], objects=False) else: # Increase progress without doing anything (because we don't have to aggregate) self.monitor.update() self.monitor.update(message="Rendering results..".format(**locals())) return TEMPLATE.render(dict(locals(), **{ "project": self.project, "user": self.user }))
def run(self, form): self.monitor.update(1, "Executing query..") selection = SelectionSearch(form) narticles = selection.get_count() self.monitor.update(10, "Found {narticles} articles. Aggregating..".format(**locals())) # Get aggregation aggregation = selection.get_aggregate( form.cleaned_data['x_axis'], form.cleaned_data['y_axis'], form.cleaned_data['interval'] ) # self.monitor.update(20, "Calculating relative values..".format(**locals())) column = form.cleaned_data['relative_to'] if column is not None: aggregation = list(get_relative(aggregation, column)) self.monitor.update(60, "Serialising..".format(**locals())) return json.dumps(list(aggregation), cls=AggregationEncoder, check_circular=False)
def run(self, form): form_data = dict(form.data.lists()) for value in form_data.values(): if value == [None]: value.pop() form_data = json.dumps(form_data, indent=4) size = form.cleaned_data['size'] offset = form.cleaned_data['offset'] number_of_fragments = form.cleaned_data['number_of_fragments'] fragment_size = form.cleaned_data['fragment_size'] show_fields = sorted(form.cleaned_data['show_fields']) show_aggregation = form.cleaned_data['aggregations'] with Timer() as timer: selection = SelectionSearch(form) self.monitor.update(message="Executing query..") narticles = selection.get_count() self.monitor.update(message="Fetching articles..".format( **locals())) articles = selection.get_articles(size=size, offset=offset).as_dicts() articles = get_fragments(selection.get_query(), [a["id"] for a in articles], fragment_size, number_of_fragments) if show_aggregation: self.monitor.update(message="Aggregating..".format(**locals())) statistics = selection.get_statistics() try: delta_start_end = statistics.end_date - statistics.start_date interval = next( interval for (interval, delta) in TIMEDELTAS if MAX_DATE_GROUPS * delta > delta_start_end) except (StopIteration, TypeError): interval = "day" date_aggr = selection.get_aggregate( [IntervalCategory(interval)], objects=False) else: # Increase progress without doing anything (because we don't have to aggregate) self.monitor.update() self.monitor.update(message="Rendering results..".format( **locals())) return TEMPLATE.render( Context( dict(locals(), **{ "project": self.project, "user": self.user })))
def run(self, form): selection = SelectionSearch(form) data = {API_KEYWORD_MAP.get(k, k): v for k, v in self.data.iterlists()} data["q"] = [ "{}#{}".format(q.label, q.query) for q in selection.get_queries() ] url = urllib.urlencode(data, doseq=True) table = Datatable(SearchResource, url="/api/v4/search") table = table.add_arguments(minimal="1") table = table.add_arguments(project=str(self.project.id)) for k, vs in data.items(): for v in vs: table = table.add_arguments(**{k: v}) return TABLE_TEMPLATE.render( Context({ "form": form, "url": url, "table": table }))
def run(self, form): self.monitor.update(1, "Executing query..") selection = SelectionSearch(form) narticles = selection.get_count() self.monitor.update( 10, "Found {narticles} articles. Aggregating..".format(**locals())) # Get aggregation aggregation = selection.get_aggregate(form.cleaned_data['x_axis'], form.cleaned_data['y_axis'], form.cleaned_data['interval']) # self.monitor.update(20, "Calculating relative values..".format(**locals())) column = form.cleaned_data['relative_to'] if column is not None: aggregation = list(get_relative(aggregation, column)) self.monitor.update(60, "Serialising..".format(**locals())) return json.dumps(list(aggregation), cls=AggregationEncoder, check_circular=False)
def run(self, form): name = form.cleaned_data["name"] #provenance = form.cleaned_data["provenance"] project = form.cleaned_data["project"] aset = ArticleSet.objects.create(name=name, project=project) self.monitor.update(10, "Executing query..") article_ids = list(SelectionSearch(form).get_article_ids()) self.monitor.update(60, "Saving to set..") aset.add_articles(article_ids) return OK_TEMPLATE.render( Context({ "project": project, "aset": aset, "len": len(article_ids) }))
def run(self, form): self.monitor.update(1, "Executing query..") selection = SelectionSearch.get_instance(form) try: aggregation, primary, secondary, categories, values = self.get_cache() except NotInCacheError: narticles = selection.get_count() self.monitor.update(10, "Found {narticles} articles. Aggregating..".format(**locals())) # Get aggregation codingjobs = form.cleaned_data["codingjobs"] primary = form.cleaned_data['primary'] secondary = form.cleaned_data['secondary'] value1 = form.cleaned_data['value1'] value2 = form.cleaned_data['value2'] order_by = form.cleaned_data["order_by"] article_ids = list(selection.get_article_ids()) codings = Coding.objects.filter(coded_article__article__id__in=article_ids, coded_article__codingjob__id__in=selection.data.codingjobs, coded_article__status=STATUS_COMPLETE) terms = selection.get_article_ids_per_query() orm_aggregate = ORMAggregate(codings, flat=False, terms=terms) categories = list(filter(None, [primary, secondary])) values = list(filter(None, [value1, value2])) aggregation = orm_aggregate.get_aggregate(categories, values) aggregation = sorted_aggregation(*order_by, aggregation) self.set_cache([aggregation, primary, secondary, categories, values]) else: self.monitor.update(10, "Found in cache. Rendering..".format(**locals())) # Matrices are very annoying to construct in javascript due to missing hashtables. If # the user requests a table, we thus first convert it to a different format which should # be easier to render. if form.cleaned_data["output_type"] == "text/json+aggregation+table": aggregation = aggregation_to_matrix(aggregation, categories) if form.cleaned_data["output_type"] == "text/csv": return aggregation_to_csv(aggregation, categories, values) self.monitor.update(60, "Serialising..".format(**locals())) return json.dumps(aggregation, cls=AggregationEncoder, check_circular=False)
def _clean_aggregation(self, field_name): field_value = self.cleaned_data[field_name] if not field_value: return None if field_value == "articleset": return aggregate_es.ArticlesetCategory(self.articlesets) if field_value == "term": terms = SelectionSearch(self).get_queries() return aggregate_es.TermCategory(terms) if field_value.endswith(INTERVALS): fieldname, interval = field_value.rsplit("_", 1) return aggregate_es.IntervalCategory(field=fieldname, interval=interval, fill_zeros=self.cleaned_data["fill_zeroes"]) if field_value.endswith("_str"): # _str is added to disambiguate between fields and intervals field_value, _ = field_value.rsplit("_", 1) return FieldCategory.from_fieldname(field_value)
def run(self, form): provenance = None #form.cleaned_data["provenance"] #TODO: is dit correct? job_size = form.cleaned_data["job_size"] self.monitor.update(10, "Executing query..") article_ids = list(SelectionSearch(form).get_article_ids()) cj = CodingJob() cj.project = self.project cj.name = form.cleaned_data["name"] cj.unitschema = form.cleaned_data["unitschema"] cj.articleschema = form.cleaned_data["articleschema"] cj.coder = form.cleaned_data["coder"] cj.insertuser = self.user self.monitor.update(50, "Creating codingjobs..") if job_size == 0: job_size = len(article_ids) n_batches = len(article_ids) // job_size n_batches += 1 if len(article_ids) % job_size else 0 for i, cid in enumerate( _create_codingjob_batches(cj, article_ids, job_size)): progress = int((i / float(n_batches)) * (100 // 2)) msg = "Creating codingjob {} of {}..".format(i + 1, n_batches) print(50 + progress) self.monitor.update(50 + progress, msg) if provenance: cj = CodingJob.objects.get(id=cid) cj.provenance = provenance cj.save() return "Codingjob(s) created."
def run(self, form): form_data = json.dumps(dict(form.data.lists())) size = form.cleaned_data['size'] offset = form.cleaned_data['offset'] show_aggregation = form.cleaned_data['aggregations'] with Timer() as timer: selection = SelectionSearch(form) self.monitor.update(1, "Executing query..") narticles = selection.get_count() self.monitor.update(39, "Fetching mediums..".format(**locals())) mediums = selection.get_mediums() self.monitor.update(59, "Fetching articles..".format(**locals())) articles = [escape_article_result(art) for art in selection.get_articles(size=size, offset=offset)] if show_aggregation: self.monitor.update(69, "Aggregating..".format(**locals())) statistics = selection.get_statistics() try: delta_start_end = statistics.end_date - statistics.start_date interval = next(interval for (interval, delta) in TIMEDELTAS if MAX_DATE_GROUPS * delta > delta_start_end) except (StopIteration, TypeError): interval = "day" date_aggr = selection.get_nested_aggregate([IntervalCategory(interval)]) date_aggr = fill_zeroes((((date,),(value,)) for date,value in date_aggr), IntervalCategory(interval)) medium_aggr = selection.get_nested_aggregate([MediumCategory()]) self.monitor.update(79, "Rendering results..".format(**locals())) return TEMPLATE.render(Context(dict(locals(), **{ "project": self.project, "user": self.user })))
def clean(self): # This is a bit of a hack. We need all the other fields to be correclty validated # in order to validate the query field. SelectionSearch(self).get_query() return self.cleaned_data
def run(self, form): self.monitor.update(1, "Executing query..") selection = SelectionSearch(form) try: aggregation, primary, secondary, categories, values = self.get_cache( ) except NotInCacheError: narticles = selection.get_count() self.monitor.update( 10, "Found {narticles} articles. Aggregating..".format(**locals())) # Get aggregation codingjobs = form.cleaned_data["codingjobs"] primary = form.cleaned_data['primary'] secondary = form.cleaned_data['secondary'] value1 = form.cleaned_data['value1'] value2 = form.cleaned_data['value2'] article_ids = selection.get_article_ids() # This should probably happen in SelectionForm? coded_articles = CodedArticle.objects.all() coded_articles = coded_articles.filter(article__id__in=article_ids) coded_articles = coded_articles.filter( codingjob__id__in=codingjobs) coded_article_ids = set(coded_articles.values_list("id", flat=True)) for field_name in ("1", "2", "3"): if not coded_article_ids: break schemafield = form.cleaned_data["codingschemafield_{}".format( field_name)] schemafield_values = form.cleaned_data[ "codingschemafield_value_{}".format(field_name)] schemafield_include_descendants = form.cleaned_data[ "codingschemafield_include_descendants_{}".format( field_name)] if schemafield and schemafield_values: code_ids = get_code_filter( schemafield.codebook, schemafield_values, schemafield_include_descendants) coding_values = CodingValue.objects.filter( coding__coded_article__id__in=coded_article_ids) coding_values = coding_values.filter( field__id=schemafield.id) coding_values = coding_values.filter(intval__in=code_ids) coded_article_ids &= set( coding_values.values_list("coding__coded_article__id", flat=True)) codings = Coding.objects.filter( coded_article__id__in=coded_article_ids) terms = selection.get_article_ids_per_query() orm_aggregate = ORMAggregate(codings, flat=False, terms=terms) categories = list(filter(None, [primary, secondary])) values = list(filter(None, [value1, value2])) aggregation = orm_aggregate.get_aggregate(categories, values) aggregation = sorted(aggregation, key=to_sortable_tuple) self.set_cache( [aggregation, primary, secondary, categories, values]) else: self.monitor.update( 10, "Found in cache. Rendering..".format(**locals())) if form.cleaned_data.get("primary_fill_zeroes") and hasattr( primary, 'interval'): aggregation = list( aggregate_es.fill_zeroes(aggregation, primary, secondary)) # Matrices are very annoying to construct in javascript due to missing hashtables. If # the user requests a table, we thus first convert it to a different format which should # be easier to render. if form.cleaned_data["output_type"] == "text/json+aggregation+table": aggregation = aggregation_to_matrix(aggregation, categories) if form.cleaned_data["output_type"] == "text/csv": return aggregation_to_csv(aggregation, categories, values) self.monitor.update(60, "Serialising..".format(**locals())) return json.dumps(aggregation, cls=AggregationEncoder, check_circular=False)