Example #1
0
    def run(self, form):
        # Get codebook object
        new_codebook = form.cleaned_data["new_codebook"]
        if new_codebook:
            codebook = Codebook(name=new_codebook, project=self.project)
            codebook.save()
        else:
            codebook = form.cleaned_data["existing_codebook"]
            codebook.cache()

        # Get queries and their labels
        indicator_language = form.cleaned_data["indicator_language"]
        roots = {r.label: r for r in codebook.get_roots()}
        queries = {q.label: q for q in SelectionSearch(form).get_queries()}

        updated, new = 0, 0
        for label, query in queries.items():
            if label in roots:
                # Update existing code
                roots[label].add_label(indicator_language, query.query, replace=True)
                updated += 1
            else:
                # Create new code
                code = Code(label=label)
                code.save()
                code.add_label(indicator_language, query.query, replace=True)
                codebook.add_code(code)
                new += 1

        return "Updated {} code(s), added {} new code(s).".format(updated, new)
Example #2
0
    def _run(self, file, project, codebook_name, default_language, codebook,
             **kargs):
        data = csv_as_columns(self.bound_form.get_reader())

        # build code, parent pairs
        if "parent" in data:
            parents = zip(data["code"], data["parent"])
        else:
            cols = get_indented_columns(data)
            parents = list(get_parents_from_columns(cols))
        uuids = data["uuid"] if "uuid" in data else [None] * len(parents)

        # create codebook
        if not codebook:
            codebook = Codebook.objects.create(project=project,
                                               name=codebook_name)
            log.info("Created codebook {codebook.id} : {codebook}".format(
                **locals()))
        else:
            codebook.cache_labels()
            log.info("Updating {codebook.id} : {codebook}".format(**locals()))

        # create/retrieve codes
        codes = {
            code: Code.get_or_create(uuid=uuid or None)
            for ((code, parent), uuid) in zip(parents, uuids)
        }

        to_add = []
        for code, parent in parents:
            instance = codes[code]
            parent_instance = codes[parent] if parent else None
            instance.add_label(default_language, code)
            cbc = codebook.get_codebookcode(instance)
            if cbc is None:
                to_add.append((instance, parent_instance))
            else:
                getid = lambda c: None if c is None else c.id
                if getid(cbc.parent) != getid(parent_instance):
                    cbc.parent = parent_instance
                    cbc.save()
        codebook.add_codes(to_add)

        for col in data:
            if col.startswith(LABEL_PREFIX):
                lang = col[len(LABEL_PREFIX):].strip()
                if lang.startswith('-'): lang = lang[1:].strip()
                try:
                    lang = int(lang)
                except ValueError:
                    lang = Language.get_or_create(label=lang).id
                for (code, parent), label in zip(parents, data[col]):
                    if label:
                        codes[code].add_label(lang, label)
        return codebook
Example #3
0
    def run(self, form):
        # Get codebook object
        new_codebook = form.cleaned_data["new_codebook"]
        if new_codebook:
            codebook = Codebook(name=new_codebook, project=self.project)
            codebook.save()
        else:
            codebook = form.cleaned_data["existing_codebook"]
            codebook.cache()

        # Get queries and their labels
        indicator_language = form.cleaned_data["indicator_language"]
        roots = {r.label: r for r in codebook.get_roots()}
        queries = {q.label: q for q in SelectionSearch(form).get_queries()}

        updated, new = 0, 0
        for label, query in queries.items():
            if label in roots:
                # Update existing code
                roots[label].add_label(indicator_language,
                                       query.query,
                                       replace=True)
                updated += 1
            else:
                # Create new code
                code = Code(label=label)
                code.save()
                code.add_label(indicator_language, query.query, replace=True)
                codebook.add_code(code)
                new += 1

        return "Updated {} code(s), added {} new code(s).".format(updated, new)
    def _run(self, file, project, codebook_name, default_language, codebook, **kargs):
        data = csv_as_columns(self.bound_form.get_reader())
        
        # build code, parent pairs
        if "parent" in data:
            parents = zip(data["code"], data["parent"])
        else:
            cols = get_indented_columns(data)
            parents = list(get_parents_from_columns(cols))
        uuids = data["uuid"] if "uuid" in data else [None] * len(parents)

        # create codebook
        if not codebook:
            codebook = Codebook.objects.create(project=project, name=codebook_name)
            log.info("Created codebook {codebook.id} : {codebook}".format(**locals()))
        else:
            codebook.cache_labels()
            log.info("Updating {codebook.id} : {codebook}".format(**locals()))

        # create/retrieve codes
        codes = {code : Code.get_or_create(uuid=uuid or None) for ((code, parent), uuid) in zip(parents, uuids)}

        to_add = []
        for code, parent in parents:
            instance = codes[code]
            parent_instance = codes[parent] if parent else None
            instance.add_label(default_language, code)
            cbc = codebook.get_codebookcode(instance)
            if cbc is None:
                to_add.append((instance, parent_instance))
            else:
                getid = lambda c: None if c is None else c.id
                if getid(cbc.parent) != getid(parent_instance):
                    cbc.parent = parent_instance
                    cbc.save()
        codebook.add_codes(to_add)


        for col in data:
            if col.startswith(LABEL_PREFIX):
                lang = col[len(LABEL_PREFIX):].strip()
                if lang.startswith('-'): lang = lang[1:].strip()
                try:
                    lang = int(lang)
                except ValueError:
                    lang = Language.get_or_create(label=lang).id
                for (code, parent), label in zip(parents, data[col]):
                    if label:
                        codes[code].add_label(lang, label)
        return codebook
Example #5
0
    def _run(self, file, project, codebook_name, **kargs):
        data = csv_as_columns(self.bound_form.get_reader())

        # build code, parent pairs
        if "parent" in data:
            parents = zip(data["code"], data["parent"])
        else:
            cols = get_indented_columns(data)
            parents = list(get_parents_from_columns(cols))

        uuids = data["uuid"] if "uuid" in data else [None] * len(parents)

        # create objects
        cb = Codebook.objects.create(project=project, name=codebook_name)

        log.info("Created codebook {cb.id} : {cb}".format(**locals()))

        codes = {
            code: Code.get_or_create(uuid=uuid or None)
            for ((code, parent), uuid) in zip(parents, uuids)
        }

        to_add = []
        for code, parent in parents:
            instance = codes[code]
            parent_instance = codes[parent] if parent else None
            to_add.append((instance, parent_instance))
            if not instance.labels.all().exists():
                instance.add_label(0, code)
        cb.add_codes(to_add)

        for col in data:
            if col.startswith(LABEL_PREFIX):
                lang = col[len(LABEL_PREFIX):]
                try:
                    lang = int(lang)
                except ValueError:
                    lang = Language.objects.get(label=lang).id
                for (code, parent), label in zip(parents, data[col]):
                    if label and not codes[code].labels.filter(
                            language_id=lang).exists():
                        codes[code].add_label(lang, label)
        return cb
Example #6
0
    def _run(self, file, project, codebook_name, **kargs):
        data = csv_as_columns(self.bound_form.get_reader())
        
        # build code, parent pairs
        if "parent" in data:
            parents = zip(data["code"], data["parent"])
        else:
            cols = get_indented_columns(data)
            parents = list(get_parents_from_columns(cols))

        uuids = data["uuid"] if "uuid" in data else [None] * len(parents)

        # create objects
        cb = Codebook.objects.create(project=project, name=codebook_name)

        log.info("Created codebook {cb.id} : {cb}".format(**locals()))


        
        codes = {code : Code.get_or_create(uuid=uuid or None) for ((code, parent), uuid) in zip(parents, uuids)}

        to_add = []
        for code, parent in parents:
            instance = codes[code]
            parent_instance = codes[parent] if parent else None
            to_add.append((instance, parent_instance))
            if not instance.labels.all().exists():
                instance.add_label(0, code)
        cb.add_codes(to_add)


        for col in data:
            if col.startswith(LABEL_PREFIX):
                lang = col[len(LABEL_PREFIX):]
                try:
                    lang = int(lang)
                except ValueError:
                    lang = Language.objects.get(label=lang).id
                for (code, parent), label in zip(parents, data[col]):
                    if label and not codes[code].labels.filter(language_id=lang).exists():
                        codes[code].add_label(lang, label)
        return cb
Example #7
0
    def post(self, request, pk, **kwargs):
        self.object = self.get_object()
        ruleset = self.object.id

        class _RuleFormWithRuleset(RuleForm):
            """Rule Form that inserts ruleset info"""
            def clean(self):
                # HACK! How to add ruleset info to extra fields?
                cleaned_data = super(RuleForm, self).clean()
                msg_req = u"This field is required."
                if (("ruleset" not in cleaned_data
                     and len(self._errors.get("ruleset", [])) == 1
                     and self._errors["ruleset"][0] == msg_req)):
                    cleaned_data["ruleset"] = RuleSet.objects.get(pk=ruleset)
                    del self._errors["ruleset"]
                for fld in ("insert", "remove", "where"):
                    self.cleaned_data[fld] = _normalize(self.cleaned_data[fld])
                return cleaned_data

        if request.FILES:
            # upload json dump
            ruleset = json.load(request.FILES['file'])
            if not 'rules' in ruleset and 'lexicon' in ruleset:
                raise ValidationError("Invalid json")
            rules = [
                dict(
                    label=rule.get('label', 'rule-{}'.format(i)),
                    ruleset=self.object,
                    order=int(rule.get('order', i)),
                    where=rule['condition'],
                    insert=rule.get('insert', ''),
                    remove=rule.get('remove', ''),
                    remarks=rule.get('remarks', ''),
                ) for (i, rule) in enumerate(ruleset['rules'])
            ]

            lexicon = {}
            for entry in ruleset['lexicon']:
                lexicon[entry['lexclass']] = entry['lemma']

            self.object.rules.all().delete()
            cb = self.object.lexicon_codebook
            Code.objects.filter(codebook_codes__codebook_id=cb.id).delete()
            cb.codebookcodes.all().delete()
            lexlang = self.object.lexicon_language
            lang = Language.objects.get(pk=(0 if lexlang.id == 1 else 1))
            for lexclass, lemmata in lexicon.iteritems():
                c = Code.create(lexclass, lang)
                c.add_label(lexlang, ", ".join(lemmata))
                cb.add_code(c)

            for rule in rules:
                Rule.objects.create(**rule)

            return redirect(reverse("ruleset", args=(self.object.id, )))
        else:
            ruleset_form = modelform_factory(RuleSet)(request.POST,
                                                      instance=self.object)
            if ruleset_form.is_valid():
                ruleset_form.save()

        formset = formset_factory(_RuleFormWithRuleset,
                                  formset=BaseModelFormSet,
                                  can_delete=True)
        formset.model = Rule
        formset = formset(request.POST,
                          request.FILES,
                          queryset=self.object.rules.all())
        if formset.is_valid():
            formset.save()

            return redirect(reverse("ruleset", args=(self.object.id, )))

            formset = formset_factory(RuleForm,
                                      formset=BaseModelFormSet,
                                      can_delete=True)
            formset.model = Rule
            formset = formset(queryset=self.object.rules.all())

        ctx = self.get_context_data(formset=formset, ruleset_form=ruleset_form)
        return self.render_to_response(ctx)
Example #8
0
    def post(self, request, pk, **kwargs):
        self.object = self.get_object()
        ruleset = self.object.id

        class _RuleFormWithRuleset(RuleForm):

            """Rule Form that inserts ruleset info"""

            def clean(self):
                # HACK! How to add ruleset info to extra fields?
                cleaned_data = super(RuleForm, self).clean()
                msg_req = u"This field is required."
                if (("ruleset" not in cleaned_data
                     and len(self._errors.get("ruleset", [])) == 1
                     and self._errors["ruleset"][0] == msg_req)):
                    cleaned_data["ruleset"] = RuleSet.objects.get(pk=ruleset)
                    del self._errors["ruleset"]
                for fld in ("insert", "remove", "where"):
                    self.cleaned_data[fld] = _normalize(self.cleaned_data[fld])
                return cleaned_data

        if request.FILES:
            # upload json dump
            ruleset = json.load(request.FILES['file'])
            if not 'rules' in ruleset and 'lexicon' in ruleset:
                raise ValidationError("Invalid json")
            rules = [dict(label = rule.get('label', 'rule-{}'.format(i)),
                          ruleset = self.object,
                          order =  int(rule.get('order', i)),
                          where = rule['condition'],
                          insert = rule.get('insert', ''),
                          remove = rule.get('remove', ''),
                          remarks = rule.get('remarks', ''),
                          ) for (i, rule) in enumerate(ruleset['rules'])]

            lexicon = {}
            for entry in ruleset['lexicon']:
                lexicon[entry['lexclass']] = entry['lemma']

            self.object.rules.all().delete()
            cb = self.object.lexicon_codebook
            Code.objects.filter(codebook_codes__codebook_id=cb.id).delete()
            cb.codebookcodes.all().delete()
            lexlang = self.object.lexicon_language
            lang = Language.objects.get(pk=(0 if lexlang.id == 1 else 1))
            for lexclass, lemmata in lexicon.iteritems():
                c = Code.create(lexclass, lang)
                c.add_label(lexlang, ", ".join(lemmata))
                cb.add_code(c)

            for rule in rules:
                Rule.objects.create(**rule)


            return redirect(reverse("ruleset", args=(self.object.id, )))
        else:
            ruleset_form = modelform_factory(RuleSet)(
                request.POST, instance=self.object)
            if ruleset_form.is_valid():
                ruleset_form.save()

        formset = formset_factory(_RuleFormWithRuleset,
                                  formset=BaseModelFormSet, can_delete=True)
        formset.model = Rule
        formset = formset(request.POST, request.FILES,
                          queryset=self.object.rules.all())
        if formset.is_valid():
            formset.save()

            return redirect(reverse("ruleset", args=(self.object.id, )))

            formset = formset_factory(RuleForm, formset=BaseModelFormSet,
                                      can_delete=True)
            formset.model = Rule
            formset = formset(queryset=self.object.rules.all())


        ctx = self.get_context_data(formset=formset, ruleset_form=ruleset_form)
        return self.render_to_response(ctx)