Exemple #1
0
    def test_chem_upload(self):
        req_data = {'script_selection': 5,
                    'weight_fraction_type': 1,
                    'extract_button': 'Submit',
                    }

        req = self.factory.post(path = '/datagroup/6/' , data=req_data)
        req.user = User.objects.get(username='******')
        req.FILES['extract_file'] = self.generate_invalid_chem_csv()
        # get error in response
        resp = views.data_group_detail(request=req, pk=6)
        # print(resp.content)
        self.assertContains(resp,'must be 1:1')
        text_count = ExtractedText.objects.all().count()
        self.assertTrue(text_count < 2, 
                                    'Shouldn\'t be 2 extracted texts uploaded')
        # Now get the success response
        req.FILES['extract_file'] = self.generate_valid_chem_csv()
        doc_count = DataDocument.objects.filter(
                                    raw_category='aerosol hairspray').count()
        self.assertTrue(doc_count == 0, 
                            'DataDocument raw category shouldn\'t exist yet.')
        resp = views.data_group_detail(request=req, pk=6)
        self.assertContains(resp,'2 extracted records uploaded successfully.')

        doc_count = DataDocument.objects.filter(
                                    raw_category='aerosol hairspray').count()
        self.assertTrue(doc_count > 0, 
                            'DataDocument raw category values must be updated.')
        text_count = ExtractedText.objects.all().count()
        self.assertTrue(text_count == 2, 'Should be 2 extracted texts')
        dg = DataGroup.objects.get(pk=6)
        dg.delete()
    def test_presence_upload(self):
        # Delete the CPCat records that were loaded with the fixtures
        ExtractedCPCat.objects.all().delete()
        self.assertEqual(len(ExtractedCPCat.objects.all()), 0,
                         "Should be empty before upload.")
        usr = User.objects.get(username="******")
        # test for error to be propagated w/o a 1:1 match of ExtCPCat to DataDoc
        in_mem_sample_csv = make_upload_csv("sample_files/presence_cpcat.csv")
        req_data = {"extfile-extraction_script": 5, "extfile-submit": "Submit"}
        req_data.update(self.mng_data)
        req = self.factory.post("/datagroup/49/", data=req_data)
        req.FILES["extfile-bulkformsetfileupload"] = in_mem_sample_csv
        middleware = SessionMiddleware()
        middleware.process_request(req)
        req.session.save()
        middleware = MessageMiddleware()
        middleware.process_request(req)
        req.session.save()
        req.user = usr
        resp = views.data_group_detail(request=req, pk=49)
        self.assertContains(resp, "must be 1:1")
        self.assertEqual(
            len(ExtractedCPCat.objects.all()),
            0,
            "ExtractedCPCat records remain 0 if error in upload.",
        )
        # test for error to propogate w/ too many chars in a field
        in_mem_sample_csv = make_upload_csv("sample_files/presence_chars.csv")
        req.FILES["extfile-bulkformsetfileupload"] = in_mem_sample_csv
        resp = views.data_group_detail(request=req, pk=49)
        self.assertContains(resp,
                            "Ensure this value has at most 50 characters")
        self.assertEqual(
            len(ExtractedCPCat.objects.all()),
            0,
            "ExtractedCPCat records remain 0 if error in upload.",
        )
        # test that upload works successfully...
        in_mem_sample_csv = make_upload_csv("sample_files/presence_good.csv")
        req.FILES["extfile-bulkformsetfileupload"] = in_mem_sample_csv
        resp = views.data_group_detail(request=req, pk=49)
        self.assertContains(resp, "3 extracted records uploaded successfully.")

        doc_count = DataDocument.objects.filter(
            raw_category="list presence category").count()
        self.assertTrue(doc_count > 0,
                        "DataDocument raw category values must be updated.")
        self.assertEqual(len(ExtractedCPCat.objects.all()), 2,
                         "Two after upload.")
        chem = ExtractedListPresence.objects.get(
            raw_cas__icontains="100784-20-1")
        self.assertTrue(chem.raw_cas[0] != " ",
                        "White space should be stripped.")
        dg = DataGroup.objects.get(pk=49)
        dg.delete()
Exemple #3
0
 def test_valid_clean_comp_data_with_BOM_upload(self):
     sample_csv = self.generate_valid_clean_comp_data_with_BOM_csv_string()
     sample_csv_bytes = sample_csv.encode(encoding="UTF-8", errors="strict")
     in_mem_sample_csv = InMemoryUploadedFile(
         io.BytesIO(sample_csv_bytes),
         field_name="cleancomp-bulkformsetfileupload",
         name="clean_comp_data.csv",
         content_type="text/csv",
         size=len(sample_csv),
         charset="utf-8",
     )
     req_data = {"cleancomp-script_id": 17, "cleancomp-submit": "Submit"}
     req_data.update(self.mng_data)
     req = self.factory.post(path="/datagroup/47/", data=req_data)
     req.FILES["cleancomp-bulkformsetfileupload"] = in_mem_sample_csv
     middleware = SessionMiddleware()
     middleware.process_request(req)
     req.session.save()
     middleware = MessageMiddleware()
     middleware.process_request(req)
     req.session.save()
     req.user = User.objects.get(username="******")
     resp = views.data_group_detail(request=req, pk=47)
     self.assertContains(
         resp, "2 clean composition data records uploaded successfully.")
     self.assertEqual(
         ExtractedChemical.objects.filter(script_id=17).count(),
         2,
         "There should be only 2 ExtractedChemical objects",
     )
 def test_blank_upc_upload(self):
     """
     Each UPC should be made and be unique
     """
     sample_csv = (
         "data_document_id,data_document_filename,title,upc,url,brand_name,size,color,item_id,parent_item_id,short_description,long_description,thumb_image,medium_image,large_image,model_number,manufacturer\n"
         "177852,fff53301-a199-4e1b-91b4-39227ca0fe3c.pdf,'product title a'\n"
         "178456,fefd813f-d1e0-4fa7-8c4e-49030eca08a3.pdf,'product title b'\n"
         "178496,fc5f964c-91e2-42c5-9899-2ff38e37ba89.pdf,'product title c'\n"
         "161698,f040f93d-1cf3-4eff-85a9-da14d8d2e252.pdf,'product title d'\n"
         "159270,f040f93d-1cf3-4eff-85a9-da14d8d2e253.pdf,'product title e'"
     )
     req = self.generate_csv_request(sample_csv)
     pre_pdcount = ProductDocument.objects.count()
     resp = views.data_group_detail(request=req, pk=23)
     self.assertContains(resp, "5 records have been successfully uploaded.")
     # ProductDocument records should also have been added
     post_pdcount = ProductDocument.objects.count()
     self.assertEqual(
         post_pdcount,
         pre_pdcount + 5,
         "There should be 5 more ProductDocuments after the upload",
     )
     # The first data_document_id in the csv should now have
     # two Products linked to it, including the new one
     resp = self.c.get(f"/datadocument/%s/" % 177852)
     self.assertContains(resp, "product title a")
Exemple #5
0
    def test_functionaluse_upload(self):
        # This action is performed on a data document without extracted text
        # but with a matched data document. DataDocument 500 was added to the
        # seed data for this test
        dd_id = 500
        dd = DataDocument.objects.get(pk=dd_id)
        #et = ExtractedText.objects.get(data_document=dd)
        dd_pdf = dd.pdf_url()
        
        sample_csv = ("data_document_id,data_document_filename,prod_name,"
                      "doc_date,rev_num,raw_category,raw_cas,raw_chem_name,report_funcuse"
                    "\n"
                    "%s,"
                    "%s,"
                    "sample functional use product,"
                    "2018-04-07,"
                    ","
                    "raw PUC,"
                    "RAW-CAS-01,"
                    "raw chemname 01,"
                    "surfactant" % (dd_id, dd_pdf)
        )
        sample_csv_bytes = sample_csv.encode(encoding='UTF-8',errors='strict' )
        in_mem_sample_csv = InMemoryUploadedFile(
                io.BytesIO(sample_csv_bytes),
                field_name='extract_file',
                name='Functional_use_extract_template.csv',
                content_type='text/csv',
                size=len(sample_csv),
                charset='utf-8',
        )
        req_data = {'script_selection': 5,
                    'extract_button': 'Submit',
                    }
        
        req = self.factory.post('/datagroup/50/' , data=req_data)
        req.FILES['extract_file'] = in_mem_sample_csv
        req.user = User.objects.get(username='******')
        self.assertEqual(len(ExtractedFunctionalUse.objects.filter(extracted_text_id=dd_id)),0,
                            "Empty before upload.")
        # Now get the response
        resp = views.data_group_detail(request=req, pk=50)
        self.assertContains(resp,'1 extracted records uploaded successfully.')

        doc_count = DataDocument.objects.filter(raw_category='raw PUC').count()
        self.assertTrue(doc_count > 0, 'DataDocument raw category values must be updated.')

        self.assertEqual(len(ExtractedFunctionalUse.objects.filter(extracted_text_id=dd_id)),1,
                            "One new ExtractedFunctionalUse after upload.")
 def test_bad_header_upload(self):
     """
     The bad header should cause the entire form to be invalid
     """
     sample_csv = (
         "data_document_idX,data_document_file_name,title,upc,url,brand_name,size,color,item_id,parent_item_id,short_description,long_description,thumb_image,medium_image,large_image,model_number,manufacturer\n"
         "177852,fff53301-a199-4e1b-91b4-39227ca0fe3c.pdf,'product title a',110230011425\n"
         "178456,fefd813f-d1e0-4fa7-8c4e-49030eca08a3.pdf,'product title b',903944840750\n"
         "178496,fc5f964c-91e2-42c5-9899-2ff38e37ba89.pdf,'product title c',852646877466\n"
         "161698,f040f93d-1cf3-4eff-85a9-da14d8d2e252.pdf,'product title d'\n"
         "159270,f040f93d-1cf3-4eff-85a9-da14d8d2e253.pdf,'product title e'"
     )
     req = self.generate_csv_request(sample_csv)
     resp = views.data_group_detail(request=req, pk=23)
     self.assertContains(resp,
                         "data_document_filename: This field is required.")
     self.assertContains(resp, "CSV column titles should be ")
 def test_existing_upc_upload(self):
     """
     A UPC that exists in the database already should be rejected
     """
     sample_csv = (
         "data_document_id,data_document_filename,title,upc,url,brand_name,size,color,item_id,parent_item_id,short_description,long_description,thumb_image,medium_image,large_image,model_number,manufacturer\n"
         "162019,fff53301-a199-4e1b-91b4-39227ca0fe3c.pdf,'product title a',110230011425\n"
         "161990,fefd813f-d1e0-4fa7-8c4e-49030eca08a3.pdf,'product title b',903944840750\n"
         "161938,fc5f964c-91e2-42c5-9899-2ff38e37ba89.pdf,'product title c',852646877466\n"
         "161698,f040f93d-1cf3-4eff-85a9-da14d8d2e252.pdf,'product title d',stub_11"
     )
     req = self.generate_csv_request(sample_csv)
     resp = views.data_group_detail(request=req, pk=23)
     self.assertContains(
         resp,
         "The following records had existing or duplicated UPCs and were not added: 161698",
     )
     self.assertContains(resp, "3 records have been successfully uploaded")
Exemple #8
0
    def test_presence_upload(self):
        sample_csv = ("data_document_id,data_document_filename,"
                    "doc_date,raw_category,raw_cas,raw_chem_name,"
                    "cat_code,description_cpcat,cpcat_code,cpcat_sourcetype"
                    "\n"
                    "254780,11177849.pdf,,list presence category,"
                    "0000075-37-6,hydrofluorocarbon 152a,presence_cat_code,"
                    "desc,cpcat_code,free"
                    "\n"
                    "254781,11165872.pdf,,list presence category,"
                    "0000064-17-5,sd alcohol 40-b (ethanol),presence_cat_code,"
                    "desc,cpcat_code,free"
        )
        sample_csv_bytes = sample_csv.encode(encoding='UTF-8',errors='strict' )
        in_mem_sample_csv = InMemoryUploadedFile(
                io.BytesIO(sample_csv_bytes),
                field_name='extract_file',
                name='SIRI_Chemical_Presence_extract_template.csv',
                content_type='text/csv',
                size=len(sample_csv),
                charset='utf-8',
        )
        req_data = {'script_selection': 5,
                    'extract_button': 'Submit',
                    }

        req = self.factory.post('/datagroup/49/' , data=req_data)
        req.FILES['extract_file'] = in_mem_sample_csv
        req.user = User.objects.get(username='******')
        self.assertEqual(len(ExtractedCPCat.objects.all()),0,
                            "Empty before upload.")
        # Now get the response
        resp = views.data_group_detail(request=req, pk=49)
        self.assertContains(resp,'2 extracted records uploaded successfully.')

        doc_count = DataDocument.objects.filter(raw_category='list presence category').count()
        self.assertTrue(doc_count > 0, 'DataDocument raw category values must be updated.')

        self.assertEqual(len(ExtractedCPCat.objects.all()),2,
                            "Two after upload.")

        dg = DataGroup.objects.get(pk=49)
        dg.delete()
 def test_invalid_clean_comp_data_upload(self):
     sample_csv = self.generate_invalid_clean_comp_data_csv_string()
     sample_csv_bytes = sample_csv.encode(encoding='UTF-8', errors='strict')
     in_mem_sample_csv = InMemoryUploadedFile(
         io.BytesIO(sample_csv_bytes),
         field_name='clean_comp_data_file',
         name='clean_comp_data.csv',
         content_type='text/csv',
         size=len(sample_csv),
         charset='utf-8',
     )
     req_data = {
         'script_selection': 17,
         'clean_comp_data_button': 'Submit',
     }
     req = self.factory.post(path='/datagroup/6/', data=req_data)
     req.FILES['clean_comp_data_file'] = in_mem_sample_csv
     req.user = User.objects.get(username='******')
     resp = views.data_group_detail(request=req, pk=6)
     self.assertContains(resp, 'No ExtractedChemical matches id 999')
Exemple #10
0
 def test_invalid_clean_comp_data_upload(self):
     sample_csv = self.generate_invalid_clean_comp_data_csv_string()
     sample_csv_bytes = sample_csv.encode(encoding="UTF-8", errors="strict")
     in_mem_sample_csv = InMemoryUploadedFile(
         io.BytesIO(sample_csv_bytes),
         field_name="cleancomp-bulkformsetfileupload",
         name="clean_comp_data.csv",
         content_type="text/csv",
         size=len(sample_csv),
         charset="utf-8",
     )
     req_data = {"cleancomp-script_id": 12, "cleancomp-submit": "Submit"}
     req_data.update(self.mng_data)
     req = self.factory.post(path="/datagroup/47/", data=req_data)
     middleware = SessionMiddleware()
     middleware.process_request(req)
     req.session.save()
     middleware = MessageMiddleware()
     middleware.process_request(req)
     req.session.save()
     req.FILES["cleancomp-bulkformsetfileupload"] = in_mem_sample_csv
     req.user = User.objects.get(username="******")
     resp = views.data_group_detail(request=req, pk=47)
     self.assertContains(
         resp, "lower_wf_analysis: Quantity 1.7777 must be between 0 and 1")
     self.assertContains(
         resp,
         "Both minimum and maximimum weight fraction values must be provided, not just one.",
     )
     self.assertContains(
         resp,
         "The following IDs do not exist in ExtractedChemicals for this data group: 999",
     )
     self.assertContains(
         resp,
         "Central weight fraction value cannot be defined with minimum value and maximum value.",
     )
Exemple #11
0
 def test_noscript(self):
     sample_csv = self.generate_valid_clean_comp_data_csv_string()
     sample_csv_bytes = sample_csv.encode(encoding="UTF-8", errors="strict")
     in_mem_sample_csv = InMemoryUploadedFile(
         io.BytesIO(sample_csv_bytes),
         field_name="cleancomp-bulkformsetfileupload",
         name="clean_comp_data.csv",
         content_type="text/csv",
         size=len(sample_csv),
         charset="utf-8",
     )
     req_data = {"cleancomp-submit": "Submit"}
     req_data.update(self.mng_data)
     req = self.factory.post(path="/datagroup/47/", data=req_data)
     req.FILES["cleancomp-bulkformsetfileupload"] = in_mem_sample_csv
     middleware = SessionMiddleware()
     middleware.process_request(req)
     req.session.save()
     middleware = MessageMiddleware()
     middleware.process_request(req)
     req.session.save()
     req.user = User.objects.get(username="******")
     resp = views.data_group_detail(request=req, pk=47)
     self.assertContains(resp, "This field is required")
    def test_valid_clean_comp_data_upload(self):
        sample_csv = self.generate_valid_clean_comp_data_csv_string()
        sample_csv_bytes = sample_csv.encode(encoding='UTF-8', errors='strict')
        in_mem_sample_csv = InMemoryUploadedFile(
            io.BytesIO(sample_csv_bytes),
            field_name='clean_comp_data_file',
            name='clean_comp_data.csv',
            content_type='text/csv',
            size=len(sample_csv),
            charset='utf-8',
        )
        req_data = {
            'script_selection': 17,
            'clean_comp_data_button': 'Submit',
        }
        req = self.factory.post(path='/datagroup/6/', data=req_data)
        req.FILES['clean_comp_data_file'] = in_mem_sample_csv
        req.user = User.objects.get(username='******')
        resp = views.data_group_detail(request=req, pk=6)
        self.assertContains(
            resp, '2 clean composition data records uploaded successfully.')

        self.assertEqual(Ingredient.objects.count(), 2,
                         "There should be only 2 Ingredient objects")
    def test_chem_upload(self):
        # create a matched-but-not-extracted document
        # so that uploading ExtractedText is an option
        dd = DataDocument.objects.create(
            filename="fake.pdf",
            title="Another unextracted document",
            matched=True,
            data_group_id="6",
            created_at="2019-11-18 11:00:00.000000",
            updated_at="2019-11-18 12:00:00.000000",
            document_type_id="2",
            organization="Org",
        )

        # Check the scripts offered in the selection form
        resp = self.c.get(path="/datagroup/6/", stream=True)
        soup = bs4.BeautifulSoup(resp.content, features="lxml")
        selector = soup.find_all(
            attrs={"name": "extfile-extraction_script"})[0]

        # The options should include "Home Depot (extraction)"
        hd = soup.find_all(string="Home Depot (extraction)")
        self.assertEqual(hd[0], "Home Depot (extraction)")

        # The Sun INDS script should not be available, because
        # its qa_begun value is True
        gp = soup.find_all(string="Sun INDS (extract)")
        self.assertEqual(gp, [])

        req_data = {
            "extfile-extraction_script": 5,
            "extfile-weight_fraction_type": 1,
            "extfile-submit": "Submit",
        }
        req_data.update(self.mng_data)
        req = self.factory.post(path="/datagroup/6/", data=req_data)
        middleware = SessionMiddleware()
        middleware.process_request(req)
        req.session.save()
        middleware = MessageMiddleware()
        middleware.process_request(req)
        req.session.save()
        req.user = User.objects.get(username="******")
        req.FILES[
            "extfile-bulkformsetfileupload"] = self.generate_invalid_chem_csv(
            )
        # get error in response
        text_count = ExtractedText.objects.all().count()
        resp = views.data_group_detail(request=req, pk=6)
        self.assertContains(resp, "must be 1:1")
        self.assertContains(resp, "were not found for this data group")
        self.assertContains(resp, "There must be a unit type")
        self.assertContains(resp,
                            "Central composition value cannot be defined")
        self.assertContains(resp, "Both minimum and maximimum")
        post_text_count = ExtractedText.objects.all().count()
        self.assertEquals(text_count, post_text_count,
                          "Shouldn't have extracted texts uploaded")
        # Now get the success response
        req.FILES[
            "extfile-bulkformsetfileupload"] = self.generate_valid_chem_csv()
        doc_count = DataDocument.objects.filter(
            raw_category="aerosol hairspray").count()
        old_rawchem_count = RawChem.objects.filter().count()
        old_extractedchemical_count = ExtractedChemical.objects.filter().count(
        )
        self.assertTrue(doc_count == 0,
                        "DataDocument raw category shouldn't exist yet.")
        resp = views.data_group_detail(request=req, pk=6)
        self.assertContains(resp, "4 extracted records uploaded successfully.")
        new_rawchem_count = RawChem.objects.filter().count()
        new_extractedchemical_count = ExtractedChemical.objects.filter().count(
        )
        self.assertTrue(
            new_rawchem_count - old_rawchem_count == 3,
            "There should only be 3 new RawChem records",
        )
        self.assertTrue(
            new_extractedchemical_count - old_extractedchemical_count == 3,
            "There should only be 3 new ExtractedChemical records",
        )
        doc_count = DataDocument.objects.filter(
            raw_category="aerosol hairspray").count()
        self.assertTrue(doc_count > 0,
                        "DataDocument raw category values must be updated.")
        text_count = ExtractedText.objects.all().count()
        self.assertTrue(text_count == 2, "Should be 2 extracted texts")
        chem_count = ExtractedChemical.objects.filter(
            component="Test Component").count()
        self.assertTrue(
            chem_count == 3,
            "Should be 3 extracted chemical records with the Test Component",
        )
        dg = DataGroup.objects.get(pk=6)
        dg.delete()
Exemple #14
0
    def test_audit_log_functionaluse_upload(self):
        dd_id = 500
        dd = DataDocument.objects.get(pk=dd_id)
        dd_pdf = dd.pdf_url()

        sample_csv = (
            "data_document_id,data_document_filename,prod_name,"
            "doc_date,rev_num,raw_category,raw_cas,raw_chem_name,report_funcuse"
            "\n"
            "%s,"
            "%s,"
            "sample functional use product,"
            "2018-04-07,"
            ","
            "raw PUC,"
            "RAW-CAS-01,"
            "raw chemname 01,"
            "surfactant" % (dd_id, dd_pdf))
        sample_csv_bytes = sample_csv.encode(encoding="UTF-8", errors="strict")
        in_mem_sample_csv = InMemoryUploadedFile(
            io.BytesIO(sample_csv_bytes),
            field_name="extfile-bulkformsetfileupload",
            name="Functional_use_extract_template.csv",
            content_type="text/csv",
            size=len(sample_csv),
            charset="utf-8",
        )
        req_data = {"extfile-extraction_script": 5, "extfile-submit": "Submit"}
        req_data.update(self.mng_data)
        req = self.factory.post("/datagroup/50/", data=req_data)
        req.FILES["extfile-bulkformsetfileupload"] = in_mem_sample_csv
        middleware = SessionMiddleware()
        middleware.process_request(req)
        req.session.save()
        middleware = MessageMiddleware()
        middleware.process_request(req)
        req.session.save()
        req.user = User.objects.get(username="******")
        self.assertEqual(
            len(ExtractedFunctionalUse.objects.filter(
                extracted_text_id=dd_id)),
            0,
            "Empty before upload.",
        )
        # Now get the response
        resp = views.data_group_detail(request=req, pk=50)
        self.assertContains(resp, "1 extracted record uploaded successfully.")

        self.assertEqual(
            len(ExtractedFunctionalUse.objects.filter(
                extracted_text_id=dd_id)),
            1,
            "One new ExtractedFunctionalUse after upload.",
        )

        logs = AuditLog.objects.all()
        self.assertEquals(3, len(logs), "Should have log entries")
        for log in logs:
            self.assertIsNotNone(log.model_name)
            self.assertIsNotNone(log.field_name)
            self.assertIsNone(log.old_value)
            self.assertIsNotNone(log.new_value)
            self.assertIsNotNone(log.date_created)
            self.assertIsNotNone(log.user_id)
            self.assertEquals("I", log.action, "Should be Insert action")
        logs.delete()

        # update
        efs = ExtractedFunctionalUse.objects.filter(extracted_text_id=dd_id)
        for ef in efs:
            ef.report_funcuse = "test func use"
            ef.save()

        logs = AuditLog.objects.all()
        self.assertEquals(1, len(logs), "Should have log entries")
        for log in logs:
            self.assertEquals(log.model_name, "extractedfuncationaluse")
            self.assertEquals(log.field_name, "report_funcuse")
            self.assertIsNotNone(log.new_value)
            self.assertEquals(log.new_value, "test func use")
            self.assertIsNotNone(log.date_created)
            self.assertIsNotNone(log.user_id)
            self.assertEquals("U", log.action, "Should be Update action")
        logs.delete()

        # delete
        for ef in efs:
            ef.delete()
        logs = AuditLog.objects.all()
        self.assertEquals(3, len(logs), "Should have log entries")
        for log in logs:
            self.assertIsNotNone(log.model_name)
            self.assertIsNotNone(log.field_name)
            self.assertIsNotNone(log.old_value)
            self.assertIsNone(log.new_value)
            self.assertIsNotNone(log.date_created)
            self.assertIsNotNone(log.user_id)
            self.assertEquals("D", log.action, "Should be delete action")
    def test_datagroup_create(self):
        long_fn = "a filename that is too long " * 10
        csv_string = (
            "filename,title,document_type,url,organization\n"
            "0bf5755e-3a08-4024-9d2f-0ea155a9bd17.pdf,NUTRA NAIL,MS,, \n"
            f"{long_fn},Body Cream,MS,, \n")
        data = io.StringIO(csv_string)
        sample_csv = InMemoryUploadedFile(
            data,
            field_name="csv",
            name="register_records.csv",
            content_type="text/csv",
            size=len(csv_string),
            charset="utf-8",
        )
        form_data = {
            "name": ["Walmart MSDS Test Group"],
            "description": ["test data group"],
            "group_type": ["2"],
            "downloaded_by": [str(User.objects.get(username="******").pk)],
            "downloaded_at": ["08/02/2018"],
            "download_script": ["1"],
            "data_source": ["10"],
        }
        request = self.factory.post(path="/datagroup/new/", data=form_data)
        request.FILES["csv"] = sample_csv
        request.user = User.objects.get(username="******")
        middleware = SessionMiddleware()
        middleware.process_request(request)
        request.session.save()
        middleware = MessageMiddleware()
        middleware.process_request(request)
        request.session.save()
        request.session = {}
        request.session["datasource_title"] = "Walmart"
        request.session["datasource_pk"] = 10
        resp = views.data_group_create(request=request, pk=10)
        dg_exists = DataGroup.objects.filter(
            name="Walmart MSDS Test Group").exists()
        self.assertContains(
            resp, "filename: Ensure this value has at most 255 characters")
        self.assertFalse(dg_exists)

        csv_string = (
            "filename,title,document_type,url,organization\n"
            "0bf5755e-3a08-4024-9d2f-0ea155a9bd17.pdf,NUTRA NAIL,MS,, \n"
            "0c68ab16-2065-4d9b-a8f2-e428eb192465.pdf,Body Cream,MS,, \n")
        data = io.StringIO(csv_string)
        sample_csv = InMemoryUploadedFile(
            data,
            field_name="csv",
            name="register_records.csv",
            content_type="text/csv",
            size=len(csv_string),
            charset="utf-8",
        )
        request = self.factory.post(path="/datagroup/new", data=form_data)
        request.FILES["csv"] = sample_csv
        middleware = SessionMiddleware()
        middleware.process_request(request)
        request.session.save()
        middleware = MessageMiddleware()
        middleware.process_request(request)
        request.session.save()
        request.user = User.objects.get(username="******")
        request.session = {}
        request.session["datasource_title"] = "Walmart"
        request.session["datasource_pk"] = 10
        resp = views.data_group_create(request=request, pk=10)

        self.assertEqual(resp.status_code, 302, "Should be redirecting")

        dg = DataGroup.objects.get(name="Walmart MSDS Test Group")

        self.assertEqual(f"/datagroup/{dg.pk}/", resp.url,
                         "Should be redirecting to the proper URL")

        # test whether the file system folder was created
        self.assertIn(
            str(dg.fs_id),
            os.listdir(settings.MEDIA_ROOT),
            "The data group's UUID should be a folder in MEDIA_ROOT",
        )
        # In the Data Group Detail Page
        resp = self.client.get(f"/datagroup/{dg.pk}/")

        # test whether the data documents were created
        docs = DataDocument.objects.filter(data_group=dg)
        self.assertEqual(len(docs), 2,
                         "there should be two associated documents")

        # test whether the "Download Registered Records" link is like this example

        # <a href="/datagroup/a9c7f5a7-5ad4-4f75-b877-a3747f0cc081/download_registered_documents" class="btn btn-secondary">
        csv_href = f"/datagroup/{dg.pk}/download_registered_documents/"
        self.assertIn(
            csv_href,
            str(resp._container),
            "The data group detail page must contain the right download link",
        )

        # grab a filename from a data document and see if it's in the csv
        doc_fn = docs.first().filename
        # test whether the registered records csv download link works
        resp_rr_csv = self.client.get(
            csv_href)  # this object should be of type StreamingHttpResponse
        docfound = "not found"
        for csv_row in resp_rr_csv.streaming_content:
            if doc_fn in str(csv_row):
                docfound = "found"
        self.assertEqual(
            docfound,
            "found",
            "the document file name should appear in the registered records csv",
        )

        # Test whether the data document csv download works
        # URL on data group detail page: datagroup/docs_csv/{pk}/
        dd_csv_href = (f"/datagroup/{dg.pk}/download_documents/"
                       )  # this is an interpreted django URL
        resp_dd_csv = self.client.get(dd_csv_href)
        for csv_row in resp_dd_csv.streaming_content:
            if doc_fn in str(csv_row):
                docfound = "found"
        self.assertEqual(
            docfound,
            "found",
            "the document file name should appear in the data documents csv",
        )

        # test uploading one pdf that matches a registered record
        doc = DataDocument.objects.filter(data_group_id=dg.pk).first()
        pdf = TemporaryUploadedFile(name=doc.filename,
                                    content_type="application/pdf",
                                    size=47,
                                    charset=None)
        request = self.factory.post(path="/datagroup/%s" % dg.pk,
                                    data={"uploaddocs-submit": "Submit"})
        request.FILES["uploaddocs-documents"] = pdf
        middleware = SessionMiddleware()
        middleware.process_request(request)
        request.session.save()
        middleware = MessageMiddleware()
        middleware.process_request(request)
        request.session.save()
        request.user = User.objects.get(username="******")
        resp = views.data_group_detail(request=request, pk=dg.pk)
        pdf_path = f"{settings.MEDIA_ROOT}{dg.fs_id}/pdf/{doc.get_abstract_filename()}"
        self.assertTrue(os.path.exists(pdf_path),
                        "the stored file should be in MEDIA_ROOT/dg.fs_id")
        pdf.close()
Exemple #16
0
    def test_audit_log_presence_upload(self):
        # Delete the CPCat records that were loaded with the fixtures
        ExtractedCPCat.objects.all().delete()
        self.assertEqual(len(ExtractedCPCat.objects.all()), 0,
                         "Should be empty before upload.")

        # upload file
        usr = User.objects.get(username="******")
        in_mem_sample_csv = make_upload_csv("sample_files/presence_good.csv")
        req_data = {"extfile-extraction_script": 5, "extfile-submit": "Submit"}
        req_data.update(self.mng_data)
        req = self.factory.post("/datagroup/49/", data=req_data)
        req.FILES["extfile-bulkformsetfileupload"] = in_mem_sample_csv
        middleware = SessionMiddleware()
        middleware.process_request(req)
        req.session.save()
        middleware = MessageMiddleware()
        middleware.process_request(req)
        req.session.save()
        req.user = usr

        resp = views.data_group_detail(request=req, pk=49)
        self.assertContains(resp, "3 extracted records uploaded successfully.")

        logs = AuditLog.objects.all()
        self.assertEquals(8, len(logs), "Should have log entries")

        for log in logs:
            self.assertIsNotNone(log.model_name)
            self.assertIsNotNone(log.field_name)
            self.assertIsNone(log.old_value)
            self.assertIsNotNone(log.new_value)
            self.assertIsNotNone(log.date_created)
            self.assertIsNotNone(log.user_id)
            self.assertEquals("I", log.action, "Should be Insert action")
        logs.delete()

        # update
        chems = ExtractedListPresence.objects.all()
        for chem in chems:
            chem.raw_cas = "test raw cas"
            chem.raw_chem_name = "test raw chem name"
            chem.report_funcuse = "report func use"
            chem.save()

        logs = AuditLog.objects.all()
        self.assertEquals(9, len(logs), "Should have log entries")

        for log in logs:
            self.assertIsNotNone(log.model_name)
            self.assertIsNotNone(log.field_name)
            self.assertIsNotNone(log.new_value)
            self.assertIsNotNone(log.date_created)
            self.assertIsNotNone(log.user_id)
            self.assertEquals("U", log.action, "Should be Update action")
        logs.delete()

        # delete
        chems.delete()

        logs = AuditLog.objects.all()
        self.assertEquals(9, len(logs), "Should have log entries")

        for log in logs:
            self.assertIsNotNone(log.model_name)
            self.assertIsNotNone(log.field_name)
            self.assertIsNotNone(log.old_value)
            self.assertIsNone(log.new_value)
            self.assertIsNotNone(log.date_created)
            self.assertIsNotNone(log.user_id)
            self.assertEquals("D", log.action, "Should be Delete action")

        dg = DataGroup.objects.get(pk=49)
        dg.delete()
Exemple #17
0
    def test_audit_log_chem_upload(self):
        req_data = {
            "extfile-extraction_script": 5,
            "extfile-weight_fraction_type": 1,
            "extfile-submit": "Submit",
        }
        req_data.update(self.mng_data)
        req = self.factory.post(path="/datagroup/6/", data=req_data)
        req.user = User.objects.get(username="******")

        middleware = SessionMiddleware()
        middleware.process_request(req)
        req.session.save()
        middleware = MessageMiddleware()
        middleware.process_request(req)
        req.session.save()

        # upload chem
        req.FILES[
            "extfile-bulkformsetfileupload"] = self.generate_valid_chem_csv()
        resp = views.data_group_detail(request=req, pk=6)
        self.assertContains(resp, "3 extracted records uploaded successfully.")

        # get audit logs
        logs = AuditLog.objects.all()
        self.assertEquals(13, len(logs), "Should have log entries")

        for log in logs:
            self.assertIsNotNone(log.model_name)
            self.assertIsNotNone(log.field_name)
            self.assertIsNone(log.old_value)
            self.assertIsNotNone(log.new_value)
            self.assertIsNotNone(log.date_created)
            self.assertIsNotNone(log.user_id)
            self.assertEquals("I", log.action, "Should be Insert action")

        logs.delete()

        # bulk update fields
        chems = ExtractedChemical.objects.filter(component="Test Component")

        for chemical in chems:
            chemical.raw_min_comp = "min comp"
            chemical.raw_max_comp = "max comp"
            chemical.raw_central_comp = "central comp"
            chemical.unit_type_id = 1
            chemical.report_funcuse = "report func use"
            chemical.ingredient_rank = 5
            chemical.lower_wf_analysis = 0.01
            chemical.central_wf_analysis = 0.44
            chemical.upper_wf_analysis = 0.88
        ExtractedChemical.objects.bulk_update(
            chems,
            [
                "raw_min_comp",
                "raw_max_comp",
                "raw_central_comp",
                "unit_type_id",
                "report_funcuse",
                "ingredient_rank",
                "upper_wf_analysis",
                "central_wf_analysis",
                "lower_wf_analysis",
            ],
        )

        logs = AuditLog.objects.all()
        self.assertEquals(27, len(logs), "Should have log entries")
        self.assertEquals(
            3, sum(log.field_name == "raw_min_comp" for log in logs))
        self.assertEquals(
            3, sum(log.field_name == "raw_max_comp" for log in logs))
        self.assertEquals(
            3, sum(log.field_name == "raw_central_comp" for log in logs))
        self.assertEquals(
            3, sum(log.field_name == "unit_type_id" for log in logs))
        self.assertEquals(
            3, sum(log.field_name == "report_funcuse" for log in logs))
        self.assertEquals(
            3, sum(log.field_name == "ingredient_rank" for log in logs))
        self.assertEquals(
            3, sum(log.field_name == "upper_wf_analysis" for log in logs))
        self.assertEquals(
            3, sum(log.field_name == "central_wf_analysis" for log in logs))
        self.assertEquals(
            3, sum(log.field_name == "lower_wf_analysis" for log in logs))

        for log in logs:
            self.assertEquals(log.model_name, "extractedchemical")
            self.assertIsNotNone(log.object_key)
            self.assertIsNotNone(log.field_name)
            self.assertIsNotNone(log.new_value)
            self.assertIsNotNone(log.date_created)
            self.assertIsNotNone(log.user_id)
            self.assertEquals("U", log.action, "Should be Update action")
        logs.delete()

        # change rid
        for chem in chems:
            chem.rid = "test rid"
            chem.save()
        logs = AuditLog.objects.all()
        self.assertEquals(3, len(logs), "Should have log entries")
        for log in logs:
            self.assertEquals(log.model_name, "rawchem")
            self.assertEquals("rid", log.field_name)
            self.assertIsNone(log.old_value)
            self.assertEquals("test rid", log.new_value)
            self.assertIsNotNone(log.date_created)
            self.assertIsNotNone(log.user_id)
            self.assertEquals("U", log.action, "Should be Update action")
        logs.delete()

        # delete chemicals
        for chemical in chems:
            chemical.delete()

        logs = AuditLog.objects.all()
        self.assertEquals(36, len(logs), "Should have log entries")
        self.assertEquals(
            3, sum(log.field_name == "raw_min_comp" for log in logs))
        self.assertEquals(
            3, sum(log.field_name == "raw_max_comp" for log in logs))
        self.assertEquals(
            3, sum(log.field_name == "raw_central_comp" for log in logs))
        self.assertEquals(
            3, sum(log.field_name == "unit_type_id" for log in logs))
        self.assertEquals(
            3, sum(log.field_name == "report_funcuse" for log in logs))
        self.assertEquals(
            3, sum(log.field_name == "ingredient_rank" for log in logs))
        self.assertEquals(
            3, sum(log.field_name == "upper_wf_analysis" for log in logs))
        self.assertEquals(
            3, sum(log.field_name == "central_wf_analysis" for log in logs))
        self.assertEquals(
            3, sum(log.field_name == "lower_wf_analysis" for log in logs))
        self.assertEquals(3, sum(log.field_name == "raw_cas" for log in logs))
        self.assertEquals(
            3, sum(log.field_name == "raw_chem_name" for log in logs))
        self.assertEquals(3, sum(log.field_name == "rid" for log in logs))
        for log in logs:
            self.assertIsNotNone(log.object_key)
            self.assertIsNotNone(log.model_name)
            self.assertIsNotNone(log.field_name)
            self.assertIsNone(log.new_value)
            self.assertIsNotNone(log.old_value)
            self.assertIsNotNone(log.date_created)
            self.assertIsNotNone(log.user_id)
            self.assertEquals("D", log.action, "Should be Delete action")

        dg = DataGroup.objects.get(pk=6)
        dg.delete()
    def test_functionaluse_upload(self):
        # This action is performed on a data document without extracted text
        # but with a matched data document. DataDocument 500 was added to the
        # seed data for this test
        dd_id = 500
        dd = DataDocument.objects.get(pk=dd_id)
        # et = ExtractedText.objects.get(data_document=dd)
        dd_pdf = dd.pdf_url()

        sample_csv = (
            "data_document_id,data_document_filename,prod_name,"
            "doc_date,rev_num,raw_category,raw_cas,raw_chem_name,report_funcuse"
            "\n"
            "%s,"
            "%s,"
            "sample functional use product,"
            "2018-04-07,"
            ","
            "raw PUC,"
            "RAW-CAS-01,"
            "raw chemname 01,"
            "surfactant" % (dd_id, dd_pdf))
        sample_csv_bytes = sample_csv.encode(encoding="UTF-8", errors="strict")
        in_mem_sample_csv = InMemoryUploadedFile(
            io.BytesIO(sample_csv_bytes),
            field_name="extfile-bulkformsetfileupload",
            name="Functional_use_extract_template.csv",
            content_type="text/csv",
            size=len(sample_csv),
            charset="utf-8",
        )
        req_data = {"extfile-extraction_script": 5, "extfile-submit": "Submit"}
        req_data.update(self.mng_data)
        req = self.factory.post("/datagroup/50/", data=req_data)
        req.FILES["extfile-bulkformsetfileupload"] = in_mem_sample_csv
        middleware = SessionMiddleware()
        middleware.process_request(req)
        req.session.save()
        middleware = MessageMiddleware()
        middleware.process_request(req)
        req.session.save()
        req.user = User.objects.get(username="******")
        self.assertEqual(
            len(ExtractedFunctionalUse.objects.filter(
                extracted_text_id=dd_id)),
            0,
            "Empty before upload.",
        )
        # Now get the response
        resp = views.data_group_detail(request=req, pk=50)
        self.assertContains(resp, "1 extracted record uploaded successfully.")

        doc_count = DataDocument.objects.filter(raw_category="raw PUC").count()
        self.assertTrue(doc_count > 0,
                        "DataDocument raw category values must be updated.")

        self.assertEqual(
            len(ExtractedFunctionalUse.objects.filter(
                extracted_text_id=dd_id)),
            1,
            "One new ExtractedFunctionalUse after upload.",
        )
    def test_datagroup_create(self):
        long_fn = 'a filename that is too long ' * 10
        csv_string = (
            "filename,title,document_type,url,organization\n"
            "0bf5755e-3a08-4024-9d2f-0ea155a9bd17.pdf,NUTRA NAIL,UN,, \n"
            f"{long_fn},Body Cream,1,, \n")
        data = io.StringIO(csv_string)
        sample_csv = InMemoryUploadedFile(data,
                                          field_name='csv',
                                          name='register_records.csv',
                                          content_type='text/csv',
                                          size=len(csv_string),
                                          charset='utf-8')
        form_data = {
            'name': ['Walmart MSDS Test Group'],
            'description': ['test data group'],
            'group_type': ['1'],
            'downloaded_by': [str(User.objects.get(username='******').pk)],
            'downloaded_at': ['08/02/2018'],
            'download_script': ['1'],
            'data_source': ['10']
        }
        request = self.factory.post(path='/datagroup/new/', data=form_data)
        request.FILES['csv'] = sample_csv
        request.user = User.objects.get(username='******')
        request.session = {}
        request.session['datasource_title'] = 'Walmart'
        request.session['datasource_pk'] = 10
        resp = views.data_group_create(request=request, pk=10)
        # print(resp.content)
        dg_exists = DataGroup.objects.filter(
            name='Walmart MSDS Test Group').exists()
        self.assertContains(resp, 'Filename too long')
        self.assertFalse(dg_exists, )
        # print(dg.__dict__)

        csv_string = (
            "filename,title,document_type,url,organization\n"
            "0bf5755e-3a08-4024-9d2f-0ea155a9bd17.pdf,NUTRA NAIL,UN,, \n"
            "0c68ab16-2065-4d9b-a8f2-e428eb192465.pdf,Body Cream,UN,, \n")
        data = io.StringIO(csv_string)
        sample_csv = InMemoryUploadedFile(data,
                                          field_name='csv',
                                          name='register_records.csv',
                                          content_type='text/csv',
                                          size=len(csv_string),
                                          charset='utf-8')
        request = self.factory.post(path='/datagroup/new', data=form_data)
        request.FILES['csv'] = sample_csv
        request.user = User.objects.get(username='******')
        request.session = {}
        request.session['datasource_title'] = 'Walmart'
        request.session['datasource_pk'] = 10
        resp = views.data_group_create(request=request, pk=10)

        self.assertEqual(resp.status_code, 302, "Should be redirecting")

        dg = DataGroup.objects.get(name='Walmart MSDS Test Group')

        self.assertEqual(f'/datagroup/{dg.pk}/', resp.url,
                         "Should be redirecting to the proper URL")

        # test whether the file system folder was created
        self.assertIn(
            str(dg.fs_id), os.listdir(settings.MEDIA_ROOT),
            "The data group's UUID should be a folder in MEDIA_ROOT")

        # In the Data Group Detail Page
        resp = self.client.get(f'/datagroup/{dg.pk}/')

        # test whether the data documents were created
        docs = DataDocument.objects.filter(data_group=dg)
        self.assertEqual(len(docs), 2,
                         "there should be two associated documents")

        # test whether the "Download Registered Records" link is like this example
        # <a href="/datagroup/a9c7f5a7-5ad4-4f75-b877-a3747f0cc081/registered_records.csv" class="btn btn-secondary">
        # <span class="oi oi-spreadsheet"></span>&nbsp;Download Registered Records CSV</a>
        csv_href = f'/datagroup/{dg.pk}/registered_records.csv'
        self.assertIn(
            csv_href, str(resp._container),
            "The data group detail page must contain the right download link")

        # grab a filename from a data document and see if it's in the csv
        doc_fn = docs.first().filename
        # test whether the registered records csv download link works
        resp_rr_csv = self.client.get(
            csv_href)  # this object should be of type StreamingHttpResponse
        docfound = 'not found'
        for csv_row in resp_rr_csv.streaming_content:
            if doc_fn in str(csv_row):
                docfound = 'found'
        self.assertEqual(
            docfound, 'found',
            "the document file name should appear in the registered records csv"
        )

        # Test whether the data document csv download works
        # URL on data group detail page: datagroup/docs_csv/{pk}/
        dd_csv_href = f'/datagroup/docs_csv/{dg.pk}/'  # this is an interpreted django URL
        resp_dd_csv = self.client.get(dd_csv_href)
        for csv_row in resp_dd_csv.streaming_content:
            if doc_fn in str(csv_row):
                docfound = 'found'
        self.assertEqual(
            docfound, 'found',
            "the document file name should appear in the data documents csv")

        # test whether the "Download All PDF Documents" link works
        dg_zip_href = f'/datagroup/pdfs_zipped/{dg.pk}/'  # this is the django-interpreted URL
        self.assertIn(
            dg_zip_href, str(resp._container),
            "The data group detail page must contain the right zip download link"
        )
        resp_zip = self.client.get(dg_zip_href)

        # test uploading one pdf that matches a registered record
        f = TemporaryUploadedFile(
            name='0bf5755e-3a08-4024-9d2f-0ea155a9bd17.pdf',
            content_type='application/pdf',
            size=47,
            charset=None)
        request = self.factory.post(path='/datagroup/%s' % dg.pk,
                                    data={'upload': 'Submit'})
        request.FILES['multifiles'] = f
        request.user = User.objects.get(username='******')
        resp = views.data_group_detail(request=request, pk=dg.pk)
        doc = DataDocument.objects.get(title='NUTRA NAIL')
        fn = doc.get_abstract_filename()
        folder_name = str(dg.fs_id)
        stored_file = f'{folder_name}/pdf/{fn}'
        pdf_path = f'{settings.MEDIA_ROOT}{stored_file}'
        self.assertTrue(os.path.exists(pdf_path),
                        "the stored file should be in MEDIA_ROOT/dg.fs_id")
        f.close()