def run(self, form): # Get codebook object new_codebook = form.cleaned_data["new_codebook"] if new_codebook: codebook = Codebook(name=new_codebook, project=self.project) codebook.save() else: codebook = form.cleaned_data["existing_codebook"] codebook.cache() # Get queries and their labels indicator_language = form.cleaned_data["indicator_language"] roots = {r.label: r for r in codebook.get_roots()} queries = {q.label: q for q in SelectionSearch(form).get_queries()} updated, new = 0, 0 for label, query in queries.items(): if label in roots: # Update existing code roots[label].add_label(indicator_language, query.query, replace=True) updated += 1 else: # Create new code code = Code(label=label) code.save() code.add_label(indicator_language, query.query, replace=True) codebook.add_code(code) new += 1 return "Updated {} code(s), added {} new code(s).".format(updated, new)
def _run(self, file, project, codebook_name, default_language, codebook, **kargs): data = csv_as_columns(self.bound_form.get_reader()) # build code, parent pairs if "parent" in data: parents = zip(data["code"], data["parent"]) else: cols = get_indented_columns(data) parents = list(get_parents_from_columns(cols)) uuids = data["uuid"] if "uuid" in data else [None] * len(parents) # create codebook if not codebook: codebook = Codebook.objects.create(project=project, name=codebook_name) log.info("Created codebook {codebook.id} : {codebook}".format( **locals())) else: codebook.cache_labels() log.info("Updating {codebook.id} : {codebook}".format(**locals())) # create/retrieve codes codes = { code: Code.get_or_create(uuid=uuid or None) for ((code, parent), uuid) in zip(parents, uuids) } to_add = [] for code, parent in parents: instance = codes[code] parent_instance = codes[parent] if parent else None instance.add_label(default_language, code) cbc = codebook.get_codebookcode(instance) if cbc is None: to_add.append((instance, parent_instance)) else: getid = lambda c: None if c is None else c.id if getid(cbc.parent) != getid(parent_instance): cbc.parent = parent_instance cbc.save() codebook.add_codes(to_add) for col in data: if col.startswith(LABEL_PREFIX): lang = col[len(LABEL_PREFIX):].strip() if lang.startswith('-'): lang = lang[1:].strip() try: lang = int(lang) except ValueError: lang = Language.get_or_create(label=lang).id for (code, parent), label in zip(parents, data[col]): if label: codes[code].add_label(lang, label) return codebook
def _run(self, file, project, codebook_name, default_language, codebook, **kargs): data = csv_as_columns(self.bound_form.get_reader()) # build code, parent pairs if "parent" in data: parents = zip(data["code"], data["parent"]) else: cols = get_indented_columns(data) parents = list(get_parents_from_columns(cols)) uuids = data["uuid"] if "uuid" in data else [None] * len(parents) # create codebook if not codebook: codebook = Codebook.objects.create(project=project, name=codebook_name) log.info("Created codebook {codebook.id} : {codebook}".format(**locals())) else: codebook.cache_labels() log.info("Updating {codebook.id} : {codebook}".format(**locals())) # create/retrieve codes codes = {code : Code.get_or_create(uuid=uuid or None) for ((code, parent), uuid) in zip(parents, uuids)} to_add = [] for code, parent in parents: instance = codes[code] parent_instance = codes[parent] if parent else None instance.add_label(default_language, code) cbc = codebook.get_codebookcode(instance) if cbc is None: to_add.append((instance, parent_instance)) else: getid = lambda c: None if c is None else c.id if getid(cbc.parent) != getid(parent_instance): cbc.parent = parent_instance cbc.save() codebook.add_codes(to_add) for col in data: if col.startswith(LABEL_PREFIX): lang = col[len(LABEL_PREFIX):].strip() if lang.startswith('-'): lang = lang[1:].strip() try: lang = int(lang) except ValueError: lang = Language.get_or_create(label=lang).id for (code, parent), label in zip(parents, data[col]): if label: codes[code].add_label(lang, label) return codebook
def _run(self, file, project, codebook_name, **kargs): data = csv_as_columns(self.bound_form.get_reader()) # build code, parent pairs if "parent" in data: parents = zip(data["code"], data["parent"]) else: cols = get_indented_columns(data) parents = list(get_parents_from_columns(cols)) uuids = data["uuid"] if "uuid" in data else [None] * len(parents) # create objects cb = Codebook.objects.create(project=project, name=codebook_name) log.info("Created codebook {cb.id} : {cb}".format(**locals())) codes = { code: Code.get_or_create(uuid=uuid or None) for ((code, parent), uuid) in zip(parents, uuids) } to_add = [] for code, parent in parents: instance = codes[code] parent_instance = codes[parent] if parent else None to_add.append((instance, parent_instance)) if not instance.labels.all().exists(): instance.add_label(0, code) cb.add_codes(to_add) for col in data: if col.startswith(LABEL_PREFIX): lang = col[len(LABEL_PREFIX):] try: lang = int(lang) except ValueError: lang = Language.objects.get(label=lang).id for (code, parent), label in zip(parents, data[col]): if label and not codes[code].labels.filter( language_id=lang).exists(): codes[code].add_label(lang, label) return cb
def _run(self, file, project, codebook_name, **kargs): data = csv_as_columns(self.bound_form.get_reader()) # build code, parent pairs if "parent" in data: parents = zip(data["code"], data["parent"]) else: cols = get_indented_columns(data) parents = list(get_parents_from_columns(cols)) uuids = data["uuid"] if "uuid" in data else [None] * len(parents) # create objects cb = Codebook.objects.create(project=project, name=codebook_name) log.info("Created codebook {cb.id} : {cb}".format(**locals())) codes = {code : Code.get_or_create(uuid=uuid or None) for ((code, parent), uuid) in zip(parents, uuids)} to_add = [] for code, parent in parents: instance = codes[code] parent_instance = codes[parent] if parent else None to_add.append((instance, parent_instance)) if not instance.labels.all().exists(): instance.add_label(0, code) cb.add_codes(to_add) for col in data: if col.startswith(LABEL_PREFIX): lang = col[len(LABEL_PREFIX):] try: lang = int(lang) except ValueError: lang = Language.objects.get(label=lang).id for (code, parent), label in zip(parents, data[col]): if label and not codes[code].labels.filter(language_id=lang).exists(): codes[code].add_label(lang, label) return cb
def post(self, request, pk, **kwargs): self.object = self.get_object() ruleset = self.object.id class _RuleFormWithRuleset(RuleForm): """Rule Form that inserts ruleset info""" def clean(self): # HACK! How to add ruleset info to extra fields? cleaned_data = super(RuleForm, self).clean() msg_req = u"This field is required." if (("ruleset" not in cleaned_data and len(self._errors.get("ruleset", [])) == 1 and self._errors["ruleset"][0] == msg_req)): cleaned_data["ruleset"] = RuleSet.objects.get(pk=ruleset) del self._errors["ruleset"] for fld in ("insert", "remove", "where"): self.cleaned_data[fld] = _normalize(self.cleaned_data[fld]) return cleaned_data if request.FILES: # upload json dump ruleset = json.load(request.FILES['file']) if not 'rules' in ruleset and 'lexicon' in ruleset: raise ValidationError("Invalid json") rules = [ dict( label=rule.get('label', 'rule-{}'.format(i)), ruleset=self.object, order=int(rule.get('order', i)), where=rule['condition'], insert=rule.get('insert', ''), remove=rule.get('remove', ''), remarks=rule.get('remarks', ''), ) for (i, rule) in enumerate(ruleset['rules']) ] lexicon = {} for entry in ruleset['lexicon']: lexicon[entry['lexclass']] = entry['lemma'] self.object.rules.all().delete() cb = self.object.lexicon_codebook Code.objects.filter(codebook_codes__codebook_id=cb.id).delete() cb.codebookcodes.all().delete() lexlang = self.object.lexicon_language lang = Language.objects.get(pk=(0 if lexlang.id == 1 else 1)) for lexclass, lemmata in lexicon.iteritems(): c = Code.create(lexclass, lang) c.add_label(lexlang, ", ".join(lemmata)) cb.add_code(c) for rule in rules: Rule.objects.create(**rule) return redirect(reverse("ruleset", args=(self.object.id, ))) else: ruleset_form = modelform_factory(RuleSet)(request.POST, instance=self.object) if ruleset_form.is_valid(): ruleset_form.save() formset = formset_factory(_RuleFormWithRuleset, formset=BaseModelFormSet, can_delete=True) formset.model = Rule formset = formset(request.POST, request.FILES, queryset=self.object.rules.all()) if formset.is_valid(): formset.save() return redirect(reverse("ruleset", args=(self.object.id, ))) formset = formset_factory(RuleForm, formset=BaseModelFormSet, can_delete=True) formset.model = Rule formset = formset(queryset=self.object.rules.all()) ctx = self.get_context_data(formset=formset, ruleset_form=ruleset_form) return self.render_to_response(ctx)
def post(self, request, pk, **kwargs): self.object = self.get_object() ruleset = self.object.id class _RuleFormWithRuleset(RuleForm): """Rule Form that inserts ruleset info""" def clean(self): # HACK! How to add ruleset info to extra fields? cleaned_data = super(RuleForm, self).clean() msg_req = u"This field is required." if (("ruleset" not in cleaned_data and len(self._errors.get("ruleset", [])) == 1 and self._errors["ruleset"][0] == msg_req)): cleaned_data["ruleset"] = RuleSet.objects.get(pk=ruleset) del self._errors["ruleset"] for fld in ("insert", "remove", "where"): self.cleaned_data[fld] = _normalize(self.cleaned_data[fld]) return cleaned_data if request.FILES: # upload json dump ruleset = json.load(request.FILES['file']) if not 'rules' in ruleset and 'lexicon' in ruleset: raise ValidationError("Invalid json") rules = [dict(label = rule.get('label', 'rule-{}'.format(i)), ruleset = self.object, order = int(rule.get('order', i)), where = rule['condition'], insert = rule.get('insert', ''), remove = rule.get('remove', ''), remarks = rule.get('remarks', ''), ) for (i, rule) in enumerate(ruleset['rules'])] lexicon = {} for entry in ruleset['lexicon']: lexicon[entry['lexclass']] = entry['lemma'] self.object.rules.all().delete() cb = self.object.lexicon_codebook Code.objects.filter(codebook_codes__codebook_id=cb.id).delete() cb.codebookcodes.all().delete() lexlang = self.object.lexicon_language lang = Language.objects.get(pk=(0 if lexlang.id == 1 else 1)) for lexclass, lemmata in lexicon.iteritems(): c = Code.create(lexclass, lang) c.add_label(lexlang, ", ".join(lemmata)) cb.add_code(c) for rule in rules: Rule.objects.create(**rule) return redirect(reverse("ruleset", args=(self.object.id, ))) else: ruleset_form = modelform_factory(RuleSet)( request.POST, instance=self.object) if ruleset_form.is_valid(): ruleset_form.save() formset = formset_factory(_RuleFormWithRuleset, formset=BaseModelFormSet, can_delete=True) formset.model = Rule formset = formset(request.POST, request.FILES, queryset=self.object.rules.all()) if formset.is_valid(): formset.save() return redirect(reverse("ruleset", args=(self.object.id, ))) formset = formset_factory(RuleForm, formset=BaseModelFormSet, can_delete=True) formset.model = Rule formset = formset(queryset=self.object.rules.all()) ctx = self.get_context_data(formset=formset, ruleset_form=ruleset_form) return self.render_to_response(ctx)