コード例 #1
0
def data_document_detail(request, pk):
    template_name = "data_document/data_document_detail.html"
    doc = get_object_or_404(DataDocument, pk=pk)
    if doc.data_group.group_type.code == "SD":
        messages.info(
            request,
            f'"{doc}" has no detail page. GroupType is "{doc.data_group.group_type}"',
        )
        return redirect(reverse("data_group_detail", args=[doc.data_group_id]))
    ParentForm, _ = create_detail_formset(doc)
    Parent, Child = get_extracted_models(doc.data_group.group_type.code)
    ext = Parent.objects.filter(pk=doc.pk).first()
    chemicals = Child.objects.filter(
        extracted_text__data_document=doc).prefetch_related("dsstox")
    if Child == ExtractedListPresence:
        chemicals = chemicals.prefetch_related("tags")
    lp = ExtractedListPresence.objects.filter(
        extracted_text=ext if ext else None).first()
    tag_form = ExtractedListPresenceTagForm()
    context = {
        "doc": doc,
        "extracted_text": ext,
        "chemicals": chemicals,
        "edit_text_form":
        ParentForm(instance=ext),  # empty form if ext is None
        "list_presence_tag_form": tag_form if lp else None,
    }
    if doc.data_group.group_type.code == "CO":
        script_chem = chemicals.filter(script__isnull=False).first()
        context[
            "cleaning_script"] = script_chem.script if script_chem else None
    return render(request, template_name, context)
コード例 #2
0
ファイル: forms.py プロジェクト: drmjw/factotum
def create_detail_formset(group_type, extra=0, can_delete=False):
    '''Returns the pair of formsets that will be needed based on group_type.
    .                       ('CO'),('CP'),('FU'),('HP')
    .

    '''
    parent, child = get_extracted_models(group_type)

    def make_formset(parent_model, model, fields):
        return forms.inlineformset_factory(parent_model=parent_model,
                                           model=model,
                                           fields=fields,
                                           extra=extra,
                                           can_delete=False)

    def make_custom_formset(parent_model, model, fields, formset, form):
        return forms.inlineformset_factory(
            parent_model=parent_model,
            model=model,
            fields=fields,
            formset=formset,  #this specifies a custom formset
            form=form,
            extra=extra,
            can_delete=False)

    def one():  # for chemicals or unknown
        ChemicalFormSet = make_custom_formset(parent_model=parent,
                                              model=child,
                                              fields=child.detail_fields(),
                                              formset=ExtractedChemicalFormSet,
                                              form=ExtractedChemicalForm)
        return (ExtractedTextForm, ChemicalFormSet)

    def two():  # for functional_use
        FunctionalUseFormSet = make_formset(parent, child,
                                            child.detail_fields())
        return (ExtractedTextForm, FunctionalUseFormSet)

    def three():  # for habits_and_practices
        HnPFormSet = make_formset(parent, child, child.detail_fields())
        return (ExtractedTextForm, HnPFormSet)

    def four():  # for extracted_list_presence
        ListPresenceFormSet = make_formset(parent, child,
                                           child.detail_fields())
        return (ExtractedCPCatForm, ListPresenceFormSet)

    dg_types = {
        'CO': one,
        'UN': one,
        'FU': two,
        'HP': three,
        'CP': four,
    }
    func = dg_types.get(group_type, lambda: None)
    return func()
コード例 #3
0
 def test_every_extractedtext(self):
     """'Loop through all the ExtractedText objects and confirm that the new
     create_detail_formset method returns forms based on the correct models
     """
     for et in ExtractedText.objects.all():
         dd = et.data_document
         ParentForm, ChildForm = create_detail_formset(dd, EXTRA)
         child_formset = ChildForm(instance=et)
         # Compare the model of the child formset's QuerySet to the model
         # of the ExtractedText object's child objects
         dd_child_model = get_extracted_models(
             dd.data_group.group_type.code)[1]
         childform_model = child_formset.__dict__.get(
             "queryset").__dict__.get("model")
         self.assertEqual(dd_child_model, childform_model)
コード例 #4
0
def create_detail_formset(document,
                          extra=1,
                          can_delete=False,
                          exclude=[],
                          hidden=[]):
    """Returns the pair of formsets that will be needed based on group_type.
    .                       ('CO'),('CP'),('FU'),('HP'),('HH')
    Parameters
        ----------
        document : DataDocument
            The parent DataDocument
        extra : integer
            How many empty forms should be created for new records
        can_delete : boolean
            whether a delete checkbox is included
        exclude : list
            which fields to leave out of the form
        hiddent : list
            which fields to make hidden on the form
    .

    """
    group_type = document.data_group.type
    parent, child = get_extracted_models(group_type)
    extracted = hasattr(document, "extractedtext")

    def make_formset(
        parent_model,
        model,
        formset=BaseInlineFormSet,
        form=forms.ModelForm,
        exclude=exclude,
        hidden=hidden,
    ):
        formset_fields = model.detail_fields()
        if exclude:
            formset_fields = [
                in_field for in_field in formset_fields
                if not in_field in exclude
            ]
        # set fields to hidden if so specified
        widgets = dict([(in_field, forms.HiddenInput())
                        for in_field in formset_fields if in_field in hidden])
        return forms.inlineformset_factory(
            parent_model=parent_model,
            model=model,
            fields=formset_fields,
            formset=formset,  # this specifies a custom formset
            form=form,
            extra=extra,
            can_delete=can_delete,
            widgets=widgets,
        )

    def one():  # for chemicals or unknown
        ChemicalFormSet = make_formset(
            parent_model=parent,
            model=child,
            formset=ExtractedChemicalFormSet,
            form=ExtractedChemicalForm,
            hidden=["component"],
        )
        return (ExtractedTextForm, ChemicalFormSet)

    def two():  # for functional_use
        FunctionalUseFormSet = make_formset(parent, child)
        return (ExtractedTextForm, FunctionalUseFormSet)

    def three():  # for habits_and_practices
        HnPFormSet = make_formset(parent, child)
        return (ExtractedTextForm, HnPFormSet)

    def four():  # for extracted_list_presence
        ListPresenceFormSet = make_formset(parent, child)
        ParentForm = ExtractedCPCatForm if extracted else ExtractedCPCatEditForm

        return (ParentForm, ListPresenceFormSet)

    def five():  # for extracted_hh_rec
        HHFormSet = make_formset(parent, child)
        ParentForm = ExtractedHHDocForm if extracted else ExtractedHHDocEditForm
        return (ParentForm, HHFormSet)

    dg_types = {
        "CO": one,
        "UN": one,
        "FU": two,
        "HP": three,
        "CP": four,
        "HH": five
    }
    func = dg_types.get(group_type, lambda: None)
    return func()
コード例 #5
0
def chemical_delete(request, doc_pk, chem_pk):
    doc = DataDocument.objects.get(pk=doc_pk)
    _, Chemical = get_extracted_models(doc.data_group.group_type.code)
    chem = Chemical.objects.get(pk=chem_pk)
    chem.delete()
    return redirect(doc)
コード例 #6
0
 def clean(self):
     validation_errors = []
     # We're now CPU bound on this call, not SQL bound. Make for a more fun problem.
     Parent, Child = get_extracted_models(self.dg.type)
     unique_parent_ids = set(f.cleaned_data["data_document_id"] for f in self.forms)
     # Check that extraction_script is valid
     extraction_script_id = self.forms[0].cleaned_data["extraction_script_id"]
     if not Script.objects.filter(
         script_type="EX", pk=extraction_script_id
     ).exists():
         err = forms.ValidationError("Invalid extraction script selection.")
         validation_errors.append(err)
     # Check that unit_type is valid
     unit_type_ids = (
         f.cleaned_data["unit_type_id"]
         for f in self.forms
         if f.cleaned_data.get("unit_type_id") is not None
     )
     bad_ids = get_missing_ids(UnitType, unit_type_ids)
     if bad_ids:
         err_str = 'The following "unit_type"s were not found: '
         err_str += ", ".join("%d" % i for i in bad_ids)
         err = forms.ValidationError(err_str)
         validation_errors.append(err)
     # Check that weight_fraction_type is valid
     weight_fraction_type_ids = (
         f.cleaned_data["weight_fraction_type_id"]
         for f in self.forms
         if f.cleaned_data.get("weight_fraction_type_id") is not None
     )
     bad_ids = get_missing_ids(WeightFractionType, weight_fraction_type_ids)
     if bad_ids:
         err_str = 'The following "weight_fraction_type"s were not found: '
         err_str += ", ".join("%d" % i for i in bad_ids)
         err = forms.ValidationError(err_str)
         validation_errors.append(err)
     # Check that the data_document_id are all valid
     datadocument_dict = DataDocument.objects.filter(data_group=self.dg).in_bulk(
         unique_parent_ids
     )
     if len(datadocument_dict) != len(unique_parent_ids):
         bad_ids = unique_parent_ids - datadocument_dict.keys()
         err_str = (
             'The following "data_document_id"s were not found for this data group: '
         )
         err_str += ", ".join("%d" % i for i in bad_ids)
         err = forms.ValidationError(err_str)
         validation_errors.append(err)
     # Check that parent fields do not conflict (OneToOne check)
     if hasattr(Parent, "cat_code"):
         oto_field = "cat_code"
     elif hasattr(Parent, "prod_name"):
         oto_field = "prod_name"
     else:
         oto_field = None
     if oto_field:
         unique_parent_oto_fields = set(
             (f.cleaned_data["data_document_id"], f.cleaned_data[oto_field])
             for f in self.forms
         )
         if len(unique_parent_ids) != len(unique_parent_oto_fields):
             unseen_parents = set(unique_parent_ids)
             bad_ids = []
             for i, _ in unique_parent_oto_fields:
                 if i in unseen_parents:
                     unseen_parents.remove(i)
                 else:
                     bad_ids.append(i)
             err_str = (
                 'The following "data_document_id"s got unexpected "%s"s (must be 1:1): '
                 % oto_field
             )
             err_str += ", ".join("%d" % i for i in bad_ids)
             err = forms.ValidationError(err_str)
             validation_errors.append(err)
     if validation_errors:
         raise forms.ValidationError(validation_errors)
     # Make the DataDocument, Parent, and Child objects and validate them
     parent_dict = Parent.objects.in_bulk(unique_parent_ids)
     unseen_parents = set(unique_parent_ids)
     for form in self.forms:
         data = form.cleaned_data
         pk = data["data_document_id"]
         # Parent and DataDocument
         if pk in unseen_parents:
             # DataDocument updates
             datadocument = datadocument_dict[pk]
             new_raw_category = data["raw_category"]
             old_raw_category = datadocument.raw_category
             if new_raw_category != old_raw_category:
                 datadocument.raw_category = new_raw_category
                 datadocument.clean(skip_type_check=True)
                 datadocument._meta.created_fields = {}
                 datadocument._meta.updated_fields = {
                     "raw_category": {
                         "old": old_raw_category,
                         "new": new_raw_category,
                     }
                 }
             else:
                 datadocument._meta.created_fields = {}
                 datadocument._meta.updated_fields = {}
             # Parent creates
             parent_params = clean_dict(data, Parent)
             if pk not in parent_dict:
                 parent = Parent(**parent_params)
                 parent._meta.created_fields = parent_params
                 parent._meta.updated_fields = {}
             # Parent updates
             else:
                 parent = parent_dict[pk]
                 parent._meta.created_fields = {}
                 parent._meta.updated_fields = {}
                 for field, new_value in parent_params.items():
                     old_value = getattr(parent, field)
                     if new_value != old_value:
                         setattr(parent, field, new_value)
                         parent._meta.updated_fields[field] = {
                             "old_value": old_value,
                             "new_value": new_value,
                         }
             # Mark this parent as seen
             unseen_parents.remove(pk)
         else:
             parent = None
             datadocument = None
         # Child creates
         child_params = clean_dict(data, Child)
         # Only include children if relevant data is attached
         if child_params.keys() - {"extracted_text_id", "weight_fraction_type_id"}:
             child = Child(**child_params)
             child._meta.created_fields = child_params
             child._meta.updated_fields = {}
         else:
             child = None
         # Store in dictionary
         data["datadocument"] = datadocument
         data["parent"] = parent
         data["child"] = child
コード例 #7
0
def data_group_detail(request, pk,
                      template_name='data_group/datagroup_detail.html'):
    dg = get_object_or_404(DataGroup, pk=pk, )
    dg_type = str(dg.type)
    dg.doc_types = DocumentType.objects.filter(group_type=dg.group_type)
    docs = dg.datadocument_set.get_queryset()#this needs to be updated after matching...
    prod_link = ProductDocument.objects.filter(document__in=docs)
    page = request.GET.get('page')
    paginator = Paginator(docs, 50) # TODO: make this dynamic someday in its own ticket
    store = settings.MEDIA_URL + str(dg.fs_id)
    ext = ExtractedText.objects.filter(data_document_id__in=docs).first()
    if ext:
        ext = ext.pull_out_cp()
    context = { 'datagroup'      : dg,
                'documents'      : paginator.page(1 if page is None else page),
                'all_documents'  : docs, # this used for template download
                'extract_fields' : dg.get_extracted_template_fieldnames(),
                'ext_err'        : {},
                'clean_comp_err'        : {},
                'extract_form'   : include_extract_form(dg),
                'clean_comp_data_form'   : include_clean_comp_data_form(dg),
                'bulk'           : len(docs) - len(prod_link),
                'msg'            : '',
                }
    if request.method == 'POST' and 'upload' in request.POST:
        # match filename to pdf name
        matched_files = [f for d in docs for f
                in request.FILES.getlist('multifiles') if f.name == d.filename]
        if not matched_files:
            context['msg'] = ('There are no matching records in the '
                                                        'selected directory.')
            return render(request, template_name, context)
        zf = zipfile.ZipFile(dg.zip_file, 'a', zipfile.ZIP_DEFLATED)
        while matched_files:
            f = matched_files.pop(0)
            doc = DataDocument.objects.get(filename=f.name,
                                            data_group=dg.pk)
            if doc.matched:
                continue
            doc.matched = True
            doc.save()
            fs = FileSystemStorage(store + '/pdf')
            afn = doc.get_abstract_filename()
            fs.save(afn, f)
            zf.write(store + '/pdf/' + afn, afn)
        zf.close()
        form = include_extract_form(dg)
        # update docs so it appears in the template table w/ "matched" docs
        context['all_documents'] = dg.datadocument_set.get_queryset()
        context['extract_form'] = form
        context['msg'] = 'Matching records uploaded successfully.'
    if request.method == 'POST' and 'extract_button' in request.POST:
        extract_form = ExtractionScriptForm(request.POST,
                                                request.FILES,dg_type=dg.type)
        if extract_form.is_valid():
            csv_file = request.FILES.get('extract_file')
            script_pk = int(request.POST['script_selection'])
            script = Script.objects.get(pk=script_pk)
            info = [x.decode('ascii','ignore') for x in csv_file.readlines()]
            table = csv.DictReader(info)
            missing =  list(set(dg.get_extracted_template_fieldnames())-
                                                        set(table.fieldnames))
            if missing: #column names are NOT a match, send back to user
                context['msg'] = ('The following columns need to be added or '
                                            f'renamed in the csv: {missing}')
                return render(request, template_name, context)
            good_records = []
            ext_parent, ext_child = get_extracted_models(dg_type)
            for i, row in enumerate(csv.DictReader(info)):
                d = docs.get(pk=int(row['data_document_id']))
                d.raw_category = row.pop('raw_category')
                wft = request.POST.get('weight_fraction_type', None)
                if wft: # this signifies 'Composition' type
                    w = 'weight_fraction_type'
                    row[w] = WeightFractionType.objects.get(pk=int(wft))
                    unit_type_id = int(row['unit_type'])
                    row['unit_type'] = UnitType.objects.get(pk=unit_type_id)
                    rank = row['ingredient_rank']
                    row['ingredient_rank'] = None if rank == '' else rank
                ext, created = ext_parent.objects.get_or_create(data_document=d,
                                                    extraction_script=script)
                if not created and ext.prod_name != row['prod_name']:
                    # check that there is a 1:1 relation w/ prod_name
                    err_msg = ['must be 1:1 with "data_document_id".']
                    context['ext_err'][i+1] = {'prod_name': err_msg}
                if created:
                    update_fields(row, ext)
                row['extracted_text'] = ext
                if (ext_child == ExtractedListPresence):
                    row['extracted_cpcat'] = ext
                row = clean_dict(row, ext_child)
                try:
                    ext.full_clean()
                    ext.save()
                    record = ext_child(**row)
                    record.full_clean()
                except ValidationError as e:
                    context['ext_err'][i+1] = e.message_dict
                good_records.append((d,ext,record))
            if context['ext_err']: # if errors, send back with errors
                return render(request, template_name, context)
            if not context['ext_err']:  # no saving until all errors are removed
                for doc,text,record in good_records:
                    doc.extracted = True
                    doc.save()
                    text.save()
                    record.save()
                fs = FileSystemStorage(store)
                fs.save(str(dg)+'_extracted.csv', csv_file)
                context['msg'] = (f'{len(good_records)} extracted records '
                                                    'uploaded successfully.')
                context['extract_form'] = include_extract_form(dg)
    if request.method == 'POST' and 'bulk' in request.POST:
        # get the set of documents that have not been matched
        a = set(docs.values_list('pk',flat=True))
        b = set(prod_link.values_list('document_id',flat=True))
        # DataDocs to make products for...
        docs_needing_products = DataDocument.objects.filter(pk__in=list(a-b))
        stub = Product.objects.all().count() + 1
        for doc in docs_needing_products:
            # Try to name the new product from the ExtractedText record's prod_name
            try:
                ext = ExtractedText.objects.get(data_document_id=doc.id)
                if ext:
                    ext = ext.pull_out_cp()
                    if ext.prod_name:
                        new_prod_title = ext.prod_name
                    else:
                        new_prod_title = None
            except ExtractedText.DoesNotExist:
                new_prod_title = None
            # If the ExtractedText record can't provide a title, use the DataDocument's title
            if not new_prod_title:
                if doc.title:
                    new_prod_title = '%s stub' % doc.title
                else:
                    new_prod_title = 'unknown'
            product = Product.objects.create(
                                    title=new_prod_title,
                                    upc=f'stub_{stub}',
                                    data_source_id=doc.data_group.data_source_id
                                    )
            ProductDocument.objects.create(product=product, document=doc)
            stub += 1
        context['bulk'] = 0
    if request.method == 'POST' and 'clean_comp_data_button' in request.POST:
        clean_comp_data_form = CleanCompDataForm(request.POST, request.FILES)
        if clean_comp_data_form.is_valid():
            script_pk = int(request.POST['script_selection'])
            script = Script.objects.get(pk=script_pk)
            csv_file = request.FILES.get('clean_comp_data_file')
            info = [x.decode('ascii','ignore') for x in csv_file.readlines()]
            table = csv.DictReader(info)
            missing =  list(set(dg.get_clean_comp_data_fieldnames())-
                                                        set(table.fieldnames))
            if missing: #column names are NOT a match, send back to user
                context['clean_comp_data_form'].collapsed = False
                context['msg'] = ('The following columns need to be added or '
                                            f'renamed in the csv: {missing}')
                return render(request, template_name, context)

            good_records = []
            for i, row in enumerate(csv.DictReader(info)):
                try:
                    extracted_chemical = ExtractedChemical.objects.get(pk=int(row['id']))
                except ExtractedChemical.DoesNotExist as e:
                    extracted_chemical = None
                    context['clean_comp_err'][i + 1] = {'id': ['No ExtractedChemical matches id ' + row['id'], ]}
                try:
                    ingredient = Ingredient.objects.get(extracted_chemical=extracted_chemical)
                except Ingredient.DoesNotExist as e:
                    ingredient = Ingredient(extracted_chemical=extracted_chemical)
                ingredient.lower_wf_analysis = row['lower_wf_analysis']
                ingredient.central_wf_analysis = row['central_wf_analysis']
                ingredient.upper_wf_analysis = row['upper_wf_analysis']
                ingredient.script = script
                try:
                    ingredient.full_clean()
                except ValidationError as e:
                    context['clean_comp_err'][i+1] = e.message_dict
                good_records.append(ingredient)
            if context['clean_comp_err']: # if errors, send back with errors
                context['clean_comp_data_form'].collapsed = False
                return render(request, template_name, context)
            if not context['clean_comp_err']:  # no saving until all errors are removed
                for ingredient in good_records:
                    ingredient.save()
                context['msg'] = (f'{len(good_records)} clean composition data records '
                                                    'uploaded successfully.')
                context['clean_comp_data_form'] = include_clean_comp_data_form(dg)
        else:
            context['clean_comp_data_form'].collapsed = False

    return render(request, template_name, context)