def test_chem_upload(self): req_data = {'script_selection': 5, 'weight_fraction_type': 1, 'extract_button': 'Submit', } req = self.factory.post(path = '/datagroup/6/' , data=req_data) req.user = User.objects.get(username='******') req.FILES['extract_file'] = self.generate_invalid_chem_csv() # get error in response resp = views.data_group_detail(request=req, pk=6) # print(resp.content) self.assertContains(resp,'must be 1:1') text_count = ExtractedText.objects.all().count() self.assertTrue(text_count < 2, 'Shouldn\'t be 2 extracted texts uploaded') # Now get the success response req.FILES['extract_file'] = self.generate_valid_chem_csv() doc_count = DataDocument.objects.filter( raw_category='aerosol hairspray').count() self.assertTrue(doc_count == 0, 'DataDocument raw category shouldn\'t exist yet.') resp = views.data_group_detail(request=req, pk=6) self.assertContains(resp,'2 extracted records uploaded successfully.') doc_count = DataDocument.objects.filter( raw_category='aerosol hairspray').count() self.assertTrue(doc_count > 0, 'DataDocument raw category values must be updated.') text_count = ExtractedText.objects.all().count() self.assertTrue(text_count == 2, 'Should be 2 extracted texts') dg = DataGroup.objects.get(pk=6) dg.delete()
def test_presence_upload(self): # Delete the CPCat records that were loaded with the fixtures ExtractedCPCat.objects.all().delete() self.assertEqual(len(ExtractedCPCat.objects.all()), 0, "Should be empty before upload.") usr = User.objects.get(username="******") # test for error to be propagated w/o a 1:1 match of ExtCPCat to DataDoc in_mem_sample_csv = make_upload_csv("sample_files/presence_cpcat.csv") req_data = {"extfile-extraction_script": 5, "extfile-submit": "Submit"} req_data.update(self.mng_data) req = self.factory.post("/datagroup/49/", data=req_data) req.FILES["extfile-bulkformsetfileupload"] = in_mem_sample_csv middleware = SessionMiddleware() middleware.process_request(req) req.session.save() middleware = MessageMiddleware() middleware.process_request(req) req.session.save() req.user = usr resp = views.data_group_detail(request=req, pk=49) self.assertContains(resp, "must be 1:1") self.assertEqual( len(ExtractedCPCat.objects.all()), 0, "ExtractedCPCat records remain 0 if error in upload.", ) # test for error to propogate w/ too many chars in a field in_mem_sample_csv = make_upload_csv("sample_files/presence_chars.csv") req.FILES["extfile-bulkformsetfileupload"] = in_mem_sample_csv resp = views.data_group_detail(request=req, pk=49) self.assertContains(resp, "Ensure this value has at most 50 characters") self.assertEqual( len(ExtractedCPCat.objects.all()), 0, "ExtractedCPCat records remain 0 if error in upload.", ) # test that upload works successfully... in_mem_sample_csv = make_upload_csv("sample_files/presence_good.csv") req.FILES["extfile-bulkformsetfileupload"] = in_mem_sample_csv resp = views.data_group_detail(request=req, pk=49) self.assertContains(resp, "3 extracted records uploaded successfully.") doc_count = DataDocument.objects.filter( raw_category="list presence category").count() self.assertTrue(doc_count > 0, "DataDocument raw category values must be updated.") self.assertEqual(len(ExtractedCPCat.objects.all()), 2, "Two after upload.") chem = ExtractedListPresence.objects.get( raw_cas__icontains="100784-20-1") self.assertTrue(chem.raw_cas[0] != " ", "White space should be stripped.") dg = DataGroup.objects.get(pk=49) dg.delete()
def test_valid_clean_comp_data_with_BOM_upload(self): sample_csv = self.generate_valid_clean_comp_data_with_BOM_csv_string() sample_csv_bytes = sample_csv.encode(encoding="UTF-8", errors="strict") in_mem_sample_csv = InMemoryUploadedFile( io.BytesIO(sample_csv_bytes), field_name="cleancomp-bulkformsetfileupload", name="clean_comp_data.csv", content_type="text/csv", size=len(sample_csv), charset="utf-8", ) req_data = {"cleancomp-script_id": 17, "cleancomp-submit": "Submit"} req_data.update(self.mng_data) req = self.factory.post(path="/datagroup/47/", data=req_data) req.FILES["cleancomp-bulkformsetfileupload"] = in_mem_sample_csv middleware = SessionMiddleware() middleware.process_request(req) req.session.save() middleware = MessageMiddleware() middleware.process_request(req) req.session.save() req.user = User.objects.get(username="******") resp = views.data_group_detail(request=req, pk=47) self.assertContains( resp, "2 clean composition data records uploaded successfully.") self.assertEqual( ExtractedChemical.objects.filter(script_id=17).count(), 2, "There should be only 2 ExtractedChemical objects", )
def test_blank_upc_upload(self): """ Each UPC should be made and be unique """ sample_csv = ( "data_document_id,data_document_filename,title,upc,url,brand_name,size,color,item_id,parent_item_id,short_description,long_description,thumb_image,medium_image,large_image,model_number,manufacturer\n" "177852,fff53301-a199-4e1b-91b4-39227ca0fe3c.pdf,'product title a'\n" "178456,fefd813f-d1e0-4fa7-8c4e-49030eca08a3.pdf,'product title b'\n" "178496,fc5f964c-91e2-42c5-9899-2ff38e37ba89.pdf,'product title c'\n" "161698,f040f93d-1cf3-4eff-85a9-da14d8d2e252.pdf,'product title d'\n" "159270,f040f93d-1cf3-4eff-85a9-da14d8d2e253.pdf,'product title e'" ) req = self.generate_csv_request(sample_csv) pre_pdcount = ProductDocument.objects.count() resp = views.data_group_detail(request=req, pk=23) self.assertContains(resp, "5 records have been successfully uploaded.") # ProductDocument records should also have been added post_pdcount = ProductDocument.objects.count() self.assertEqual( post_pdcount, pre_pdcount + 5, "There should be 5 more ProductDocuments after the upload", ) # The first data_document_id in the csv should now have # two Products linked to it, including the new one resp = self.c.get(f"/datadocument/%s/" % 177852) self.assertContains(resp, "product title a")
def test_functionaluse_upload(self): # This action is performed on a data document without extracted text # but with a matched data document. DataDocument 500 was added to the # seed data for this test dd_id = 500 dd = DataDocument.objects.get(pk=dd_id) #et = ExtractedText.objects.get(data_document=dd) dd_pdf = dd.pdf_url() sample_csv = ("data_document_id,data_document_filename,prod_name," "doc_date,rev_num,raw_category,raw_cas,raw_chem_name,report_funcuse" "\n" "%s," "%s," "sample functional use product," "2018-04-07," "," "raw PUC," "RAW-CAS-01," "raw chemname 01," "surfactant" % (dd_id, dd_pdf) ) sample_csv_bytes = sample_csv.encode(encoding='UTF-8',errors='strict' ) in_mem_sample_csv = InMemoryUploadedFile( io.BytesIO(sample_csv_bytes), field_name='extract_file', name='Functional_use_extract_template.csv', content_type='text/csv', size=len(sample_csv), charset='utf-8', ) req_data = {'script_selection': 5, 'extract_button': 'Submit', } req = self.factory.post('/datagroup/50/' , data=req_data) req.FILES['extract_file'] = in_mem_sample_csv req.user = User.objects.get(username='******') self.assertEqual(len(ExtractedFunctionalUse.objects.filter(extracted_text_id=dd_id)),0, "Empty before upload.") # Now get the response resp = views.data_group_detail(request=req, pk=50) self.assertContains(resp,'1 extracted records uploaded successfully.') doc_count = DataDocument.objects.filter(raw_category='raw PUC').count() self.assertTrue(doc_count > 0, 'DataDocument raw category values must be updated.') self.assertEqual(len(ExtractedFunctionalUse.objects.filter(extracted_text_id=dd_id)),1, "One new ExtractedFunctionalUse after upload.")
def test_bad_header_upload(self): """ The bad header should cause the entire form to be invalid """ sample_csv = ( "data_document_idX,data_document_file_name,title,upc,url,brand_name,size,color,item_id,parent_item_id,short_description,long_description,thumb_image,medium_image,large_image,model_number,manufacturer\n" "177852,fff53301-a199-4e1b-91b4-39227ca0fe3c.pdf,'product title a',110230011425\n" "178456,fefd813f-d1e0-4fa7-8c4e-49030eca08a3.pdf,'product title b',903944840750\n" "178496,fc5f964c-91e2-42c5-9899-2ff38e37ba89.pdf,'product title c',852646877466\n" "161698,f040f93d-1cf3-4eff-85a9-da14d8d2e252.pdf,'product title d'\n" "159270,f040f93d-1cf3-4eff-85a9-da14d8d2e253.pdf,'product title e'" ) req = self.generate_csv_request(sample_csv) resp = views.data_group_detail(request=req, pk=23) self.assertContains(resp, "data_document_filename: This field is required.") self.assertContains(resp, "CSV column titles should be ")
def test_existing_upc_upload(self): """ A UPC that exists in the database already should be rejected """ sample_csv = ( "data_document_id,data_document_filename,title,upc,url,brand_name,size,color,item_id,parent_item_id,short_description,long_description,thumb_image,medium_image,large_image,model_number,manufacturer\n" "162019,fff53301-a199-4e1b-91b4-39227ca0fe3c.pdf,'product title a',110230011425\n" "161990,fefd813f-d1e0-4fa7-8c4e-49030eca08a3.pdf,'product title b',903944840750\n" "161938,fc5f964c-91e2-42c5-9899-2ff38e37ba89.pdf,'product title c',852646877466\n" "161698,f040f93d-1cf3-4eff-85a9-da14d8d2e252.pdf,'product title d',stub_11" ) req = self.generate_csv_request(sample_csv) resp = views.data_group_detail(request=req, pk=23) self.assertContains( resp, "The following records had existing or duplicated UPCs and were not added: 161698", ) self.assertContains(resp, "3 records have been successfully uploaded")
def test_presence_upload(self): sample_csv = ("data_document_id,data_document_filename," "doc_date,raw_category,raw_cas,raw_chem_name," "cat_code,description_cpcat,cpcat_code,cpcat_sourcetype" "\n" "254780,11177849.pdf,,list presence category," "0000075-37-6,hydrofluorocarbon 152a,presence_cat_code," "desc,cpcat_code,free" "\n" "254781,11165872.pdf,,list presence category," "0000064-17-5,sd alcohol 40-b (ethanol),presence_cat_code," "desc,cpcat_code,free" ) sample_csv_bytes = sample_csv.encode(encoding='UTF-8',errors='strict' ) in_mem_sample_csv = InMemoryUploadedFile( io.BytesIO(sample_csv_bytes), field_name='extract_file', name='SIRI_Chemical_Presence_extract_template.csv', content_type='text/csv', size=len(sample_csv), charset='utf-8', ) req_data = {'script_selection': 5, 'extract_button': 'Submit', } req = self.factory.post('/datagroup/49/' , data=req_data) req.FILES['extract_file'] = in_mem_sample_csv req.user = User.objects.get(username='******') self.assertEqual(len(ExtractedCPCat.objects.all()),0, "Empty before upload.") # Now get the response resp = views.data_group_detail(request=req, pk=49) self.assertContains(resp,'2 extracted records uploaded successfully.') doc_count = DataDocument.objects.filter(raw_category='list presence category').count() self.assertTrue(doc_count > 0, 'DataDocument raw category values must be updated.') self.assertEqual(len(ExtractedCPCat.objects.all()),2, "Two after upload.") dg = DataGroup.objects.get(pk=49) dg.delete()
def test_invalid_clean_comp_data_upload(self): sample_csv = self.generate_invalid_clean_comp_data_csv_string() sample_csv_bytes = sample_csv.encode(encoding='UTF-8', errors='strict') in_mem_sample_csv = InMemoryUploadedFile( io.BytesIO(sample_csv_bytes), field_name='clean_comp_data_file', name='clean_comp_data.csv', content_type='text/csv', size=len(sample_csv), charset='utf-8', ) req_data = { 'script_selection': 17, 'clean_comp_data_button': 'Submit', } req = self.factory.post(path='/datagroup/6/', data=req_data) req.FILES['clean_comp_data_file'] = in_mem_sample_csv req.user = User.objects.get(username='******') resp = views.data_group_detail(request=req, pk=6) self.assertContains(resp, 'No ExtractedChemical matches id 999')
def test_invalid_clean_comp_data_upload(self): sample_csv = self.generate_invalid_clean_comp_data_csv_string() sample_csv_bytes = sample_csv.encode(encoding="UTF-8", errors="strict") in_mem_sample_csv = InMemoryUploadedFile( io.BytesIO(sample_csv_bytes), field_name="cleancomp-bulkformsetfileupload", name="clean_comp_data.csv", content_type="text/csv", size=len(sample_csv), charset="utf-8", ) req_data = {"cleancomp-script_id": 12, "cleancomp-submit": "Submit"} req_data.update(self.mng_data) req = self.factory.post(path="/datagroup/47/", data=req_data) middleware = SessionMiddleware() middleware.process_request(req) req.session.save() middleware = MessageMiddleware() middleware.process_request(req) req.session.save() req.FILES["cleancomp-bulkformsetfileupload"] = in_mem_sample_csv req.user = User.objects.get(username="******") resp = views.data_group_detail(request=req, pk=47) self.assertContains( resp, "lower_wf_analysis: Quantity 1.7777 must be between 0 and 1") self.assertContains( resp, "Both minimum and maximimum weight fraction values must be provided, not just one.", ) self.assertContains( resp, "The following IDs do not exist in ExtractedChemicals for this data group: 999", ) self.assertContains( resp, "Central weight fraction value cannot be defined with minimum value and maximum value.", )
def test_noscript(self): sample_csv = self.generate_valid_clean_comp_data_csv_string() sample_csv_bytes = sample_csv.encode(encoding="UTF-8", errors="strict") in_mem_sample_csv = InMemoryUploadedFile( io.BytesIO(sample_csv_bytes), field_name="cleancomp-bulkformsetfileupload", name="clean_comp_data.csv", content_type="text/csv", size=len(sample_csv), charset="utf-8", ) req_data = {"cleancomp-submit": "Submit"} req_data.update(self.mng_data) req = self.factory.post(path="/datagroup/47/", data=req_data) req.FILES["cleancomp-bulkformsetfileupload"] = in_mem_sample_csv middleware = SessionMiddleware() middleware.process_request(req) req.session.save() middleware = MessageMiddleware() middleware.process_request(req) req.session.save() req.user = User.objects.get(username="******") resp = views.data_group_detail(request=req, pk=47) self.assertContains(resp, "This field is required")
def test_valid_clean_comp_data_upload(self): sample_csv = self.generate_valid_clean_comp_data_csv_string() sample_csv_bytes = sample_csv.encode(encoding='UTF-8', errors='strict') in_mem_sample_csv = InMemoryUploadedFile( io.BytesIO(sample_csv_bytes), field_name='clean_comp_data_file', name='clean_comp_data.csv', content_type='text/csv', size=len(sample_csv), charset='utf-8', ) req_data = { 'script_selection': 17, 'clean_comp_data_button': 'Submit', } req = self.factory.post(path='/datagroup/6/', data=req_data) req.FILES['clean_comp_data_file'] = in_mem_sample_csv req.user = User.objects.get(username='******') resp = views.data_group_detail(request=req, pk=6) self.assertContains( resp, '2 clean composition data records uploaded successfully.') self.assertEqual(Ingredient.objects.count(), 2, "There should be only 2 Ingredient objects")
def test_chem_upload(self): # create a matched-but-not-extracted document # so that uploading ExtractedText is an option dd = DataDocument.objects.create( filename="fake.pdf", title="Another unextracted document", matched=True, data_group_id="6", created_at="2019-11-18 11:00:00.000000", updated_at="2019-11-18 12:00:00.000000", document_type_id="2", organization="Org", ) # Check the scripts offered in the selection form resp = self.c.get(path="/datagroup/6/", stream=True) soup = bs4.BeautifulSoup(resp.content, features="lxml") selector = soup.find_all( attrs={"name": "extfile-extraction_script"})[0] # The options should include "Home Depot (extraction)" hd = soup.find_all(string="Home Depot (extraction)") self.assertEqual(hd[0], "Home Depot (extraction)") # The Sun INDS script should not be available, because # its qa_begun value is True gp = soup.find_all(string="Sun INDS (extract)") self.assertEqual(gp, []) req_data = { "extfile-extraction_script": 5, "extfile-weight_fraction_type": 1, "extfile-submit": "Submit", } req_data.update(self.mng_data) req = self.factory.post(path="/datagroup/6/", data=req_data) middleware = SessionMiddleware() middleware.process_request(req) req.session.save() middleware = MessageMiddleware() middleware.process_request(req) req.session.save() req.user = User.objects.get(username="******") req.FILES[ "extfile-bulkformsetfileupload"] = self.generate_invalid_chem_csv( ) # get error in response text_count = ExtractedText.objects.all().count() resp = views.data_group_detail(request=req, pk=6) self.assertContains(resp, "must be 1:1") self.assertContains(resp, "were not found for this data group") self.assertContains(resp, "There must be a unit type") self.assertContains(resp, "Central composition value cannot be defined") self.assertContains(resp, "Both minimum and maximimum") post_text_count = ExtractedText.objects.all().count() self.assertEquals(text_count, post_text_count, "Shouldn't have extracted texts uploaded") # Now get the success response req.FILES[ "extfile-bulkformsetfileupload"] = self.generate_valid_chem_csv() doc_count = DataDocument.objects.filter( raw_category="aerosol hairspray").count() old_rawchem_count = RawChem.objects.filter().count() old_extractedchemical_count = ExtractedChemical.objects.filter().count( ) self.assertTrue(doc_count == 0, "DataDocument raw category shouldn't exist yet.") resp = views.data_group_detail(request=req, pk=6) self.assertContains(resp, "4 extracted records uploaded successfully.") new_rawchem_count = RawChem.objects.filter().count() new_extractedchemical_count = ExtractedChemical.objects.filter().count( ) self.assertTrue( new_rawchem_count - old_rawchem_count == 3, "There should only be 3 new RawChem records", ) self.assertTrue( new_extractedchemical_count - old_extractedchemical_count == 3, "There should only be 3 new ExtractedChemical records", ) doc_count = DataDocument.objects.filter( raw_category="aerosol hairspray").count() self.assertTrue(doc_count > 0, "DataDocument raw category values must be updated.") text_count = ExtractedText.objects.all().count() self.assertTrue(text_count == 2, "Should be 2 extracted texts") chem_count = ExtractedChemical.objects.filter( component="Test Component").count() self.assertTrue( chem_count == 3, "Should be 3 extracted chemical records with the Test Component", ) dg = DataGroup.objects.get(pk=6) dg.delete()
def test_audit_log_functionaluse_upload(self): dd_id = 500 dd = DataDocument.objects.get(pk=dd_id) dd_pdf = dd.pdf_url() sample_csv = ( "data_document_id,data_document_filename,prod_name," "doc_date,rev_num,raw_category,raw_cas,raw_chem_name,report_funcuse" "\n" "%s," "%s," "sample functional use product," "2018-04-07," "," "raw PUC," "RAW-CAS-01," "raw chemname 01," "surfactant" % (dd_id, dd_pdf)) sample_csv_bytes = sample_csv.encode(encoding="UTF-8", errors="strict") in_mem_sample_csv = InMemoryUploadedFile( io.BytesIO(sample_csv_bytes), field_name="extfile-bulkformsetfileupload", name="Functional_use_extract_template.csv", content_type="text/csv", size=len(sample_csv), charset="utf-8", ) req_data = {"extfile-extraction_script": 5, "extfile-submit": "Submit"} req_data.update(self.mng_data) req = self.factory.post("/datagroup/50/", data=req_data) req.FILES["extfile-bulkformsetfileupload"] = in_mem_sample_csv middleware = SessionMiddleware() middleware.process_request(req) req.session.save() middleware = MessageMiddleware() middleware.process_request(req) req.session.save() req.user = User.objects.get(username="******") self.assertEqual( len(ExtractedFunctionalUse.objects.filter( extracted_text_id=dd_id)), 0, "Empty before upload.", ) # Now get the response resp = views.data_group_detail(request=req, pk=50) self.assertContains(resp, "1 extracted record uploaded successfully.") self.assertEqual( len(ExtractedFunctionalUse.objects.filter( extracted_text_id=dd_id)), 1, "One new ExtractedFunctionalUse after upload.", ) logs = AuditLog.objects.all() self.assertEquals(3, len(logs), "Should have log entries") for log in logs: self.assertIsNotNone(log.model_name) self.assertIsNotNone(log.field_name) self.assertIsNone(log.old_value) self.assertIsNotNone(log.new_value) self.assertIsNotNone(log.date_created) self.assertIsNotNone(log.user_id) self.assertEquals("I", log.action, "Should be Insert action") logs.delete() # update efs = ExtractedFunctionalUse.objects.filter(extracted_text_id=dd_id) for ef in efs: ef.report_funcuse = "test func use" ef.save() logs = AuditLog.objects.all() self.assertEquals(1, len(logs), "Should have log entries") for log in logs: self.assertEquals(log.model_name, "extractedfuncationaluse") self.assertEquals(log.field_name, "report_funcuse") self.assertIsNotNone(log.new_value) self.assertEquals(log.new_value, "test func use") self.assertIsNotNone(log.date_created) self.assertIsNotNone(log.user_id) self.assertEquals("U", log.action, "Should be Update action") logs.delete() # delete for ef in efs: ef.delete() logs = AuditLog.objects.all() self.assertEquals(3, len(logs), "Should have log entries") for log in logs: self.assertIsNotNone(log.model_name) self.assertIsNotNone(log.field_name) self.assertIsNotNone(log.old_value) self.assertIsNone(log.new_value) self.assertIsNotNone(log.date_created) self.assertIsNotNone(log.user_id) self.assertEquals("D", log.action, "Should be delete action")
def test_datagroup_create(self): long_fn = "a filename that is too long " * 10 csv_string = ( "filename,title,document_type,url,organization\n" "0bf5755e-3a08-4024-9d2f-0ea155a9bd17.pdf,NUTRA NAIL,MS,, \n" f"{long_fn},Body Cream,MS,, \n") data = io.StringIO(csv_string) sample_csv = InMemoryUploadedFile( data, field_name="csv", name="register_records.csv", content_type="text/csv", size=len(csv_string), charset="utf-8", ) form_data = { "name": ["Walmart MSDS Test Group"], "description": ["test data group"], "group_type": ["2"], "downloaded_by": [str(User.objects.get(username="******").pk)], "downloaded_at": ["08/02/2018"], "download_script": ["1"], "data_source": ["10"], } request = self.factory.post(path="/datagroup/new/", data=form_data) request.FILES["csv"] = sample_csv request.user = User.objects.get(username="******") middleware = SessionMiddleware() middleware.process_request(request) request.session.save() middleware = MessageMiddleware() middleware.process_request(request) request.session.save() request.session = {} request.session["datasource_title"] = "Walmart" request.session["datasource_pk"] = 10 resp = views.data_group_create(request=request, pk=10) dg_exists = DataGroup.objects.filter( name="Walmart MSDS Test Group").exists() self.assertContains( resp, "filename: Ensure this value has at most 255 characters") self.assertFalse(dg_exists) csv_string = ( "filename,title,document_type,url,organization\n" "0bf5755e-3a08-4024-9d2f-0ea155a9bd17.pdf,NUTRA NAIL,MS,, \n" "0c68ab16-2065-4d9b-a8f2-e428eb192465.pdf,Body Cream,MS,, \n") data = io.StringIO(csv_string) sample_csv = InMemoryUploadedFile( data, field_name="csv", name="register_records.csv", content_type="text/csv", size=len(csv_string), charset="utf-8", ) request = self.factory.post(path="/datagroup/new", data=form_data) request.FILES["csv"] = sample_csv middleware = SessionMiddleware() middleware.process_request(request) request.session.save() middleware = MessageMiddleware() middleware.process_request(request) request.session.save() request.user = User.objects.get(username="******") request.session = {} request.session["datasource_title"] = "Walmart" request.session["datasource_pk"] = 10 resp = views.data_group_create(request=request, pk=10) self.assertEqual(resp.status_code, 302, "Should be redirecting") dg = DataGroup.objects.get(name="Walmart MSDS Test Group") self.assertEqual(f"/datagroup/{dg.pk}/", resp.url, "Should be redirecting to the proper URL") # test whether the file system folder was created self.assertIn( str(dg.fs_id), os.listdir(settings.MEDIA_ROOT), "The data group's UUID should be a folder in MEDIA_ROOT", ) # In the Data Group Detail Page resp = self.client.get(f"/datagroup/{dg.pk}/") # test whether the data documents were created docs = DataDocument.objects.filter(data_group=dg) self.assertEqual(len(docs), 2, "there should be two associated documents") # test whether the "Download Registered Records" link is like this example # <a href="/datagroup/a9c7f5a7-5ad4-4f75-b877-a3747f0cc081/download_registered_documents" class="btn btn-secondary"> csv_href = f"/datagroup/{dg.pk}/download_registered_documents/" self.assertIn( csv_href, str(resp._container), "The data group detail page must contain the right download link", ) # grab a filename from a data document and see if it's in the csv doc_fn = docs.first().filename # test whether the registered records csv download link works resp_rr_csv = self.client.get( csv_href) # this object should be of type StreamingHttpResponse docfound = "not found" for csv_row in resp_rr_csv.streaming_content: if doc_fn in str(csv_row): docfound = "found" self.assertEqual( docfound, "found", "the document file name should appear in the registered records csv", ) # Test whether the data document csv download works # URL on data group detail page: datagroup/docs_csv/{pk}/ dd_csv_href = (f"/datagroup/{dg.pk}/download_documents/" ) # this is an interpreted django URL resp_dd_csv = self.client.get(dd_csv_href) for csv_row in resp_dd_csv.streaming_content: if doc_fn in str(csv_row): docfound = "found" self.assertEqual( docfound, "found", "the document file name should appear in the data documents csv", ) # test uploading one pdf that matches a registered record doc = DataDocument.objects.filter(data_group_id=dg.pk).first() pdf = TemporaryUploadedFile(name=doc.filename, content_type="application/pdf", size=47, charset=None) request = self.factory.post(path="/datagroup/%s" % dg.pk, data={"uploaddocs-submit": "Submit"}) request.FILES["uploaddocs-documents"] = pdf middleware = SessionMiddleware() middleware.process_request(request) request.session.save() middleware = MessageMiddleware() middleware.process_request(request) request.session.save() request.user = User.objects.get(username="******") resp = views.data_group_detail(request=request, pk=dg.pk) pdf_path = f"{settings.MEDIA_ROOT}{dg.fs_id}/pdf/{doc.get_abstract_filename()}" self.assertTrue(os.path.exists(pdf_path), "the stored file should be in MEDIA_ROOT/dg.fs_id") pdf.close()
def test_audit_log_presence_upload(self): # Delete the CPCat records that were loaded with the fixtures ExtractedCPCat.objects.all().delete() self.assertEqual(len(ExtractedCPCat.objects.all()), 0, "Should be empty before upload.") # upload file usr = User.objects.get(username="******") in_mem_sample_csv = make_upload_csv("sample_files/presence_good.csv") req_data = {"extfile-extraction_script": 5, "extfile-submit": "Submit"} req_data.update(self.mng_data) req = self.factory.post("/datagroup/49/", data=req_data) req.FILES["extfile-bulkformsetfileupload"] = in_mem_sample_csv middleware = SessionMiddleware() middleware.process_request(req) req.session.save() middleware = MessageMiddleware() middleware.process_request(req) req.session.save() req.user = usr resp = views.data_group_detail(request=req, pk=49) self.assertContains(resp, "3 extracted records uploaded successfully.") logs = AuditLog.objects.all() self.assertEquals(8, len(logs), "Should have log entries") for log in logs: self.assertIsNotNone(log.model_name) self.assertIsNotNone(log.field_name) self.assertIsNone(log.old_value) self.assertIsNotNone(log.new_value) self.assertIsNotNone(log.date_created) self.assertIsNotNone(log.user_id) self.assertEquals("I", log.action, "Should be Insert action") logs.delete() # update chems = ExtractedListPresence.objects.all() for chem in chems: chem.raw_cas = "test raw cas" chem.raw_chem_name = "test raw chem name" chem.report_funcuse = "report func use" chem.save() logs = AuditLog.objects.all() self.assertEquals(9, len(logs), "Should have log entries") for log in logs: self.assertIsNotNone(log.model_name) self.assertIsNotNone(log.field_name) self.assertIsNotNone(log.new_value) self.assertIsNotNone(log.date_created) self.assertIsNotNone(log.user_id) self.assertEquals("U", log.action, "Should be Update action") logs.delete() # delete chems.delete() logs = AuditLog.objects.all() self.assertEquals(9, len(logs), "Should have log entries") for log in logs: self.assertIsNotNone(log.model_name) self.assertIsNotNone(log.field_name) self.assertIsNotNone(log.old_value) self.assertIsNone(log.new_value) self.assertIsNotNone(log.date_created) self.assertIsNotNone(log.user_id) self.assertEquals("D", log.action, "Should be Delete action") dg = DataGroup.objects.get(pk=49) dg.delete()
def test_audit_log_chem_upload(self): req_data = { "extfile-extraction_script": 5, "extfile-weight_fraction_type": 1, "extfile-submit": "Submit", } req_data.update(self.mng_data) req = self.factory.post(path="/datagroup/6/", data=req_data) req.user = User.objects.get(username="******") middleware = SessionMiddleware() middleware.process_request(req) req.session.save() middleware = MessageMiddleware() middleware.process_request(req) req.session.save() # upload chem req.FILES[ "extfile-bulkformsetfileupload"] = self.generate_valid_chem_csv() resp = views.data_group_detail(request=req, pk=6) self.assertContains(resp, "3 extracted records uploaded successfully.") # get audit logs logs = AuditLog.objects.all() self.assertEquals(13, len(logs), "Should have log entries") for log in logs: self.assertIsNotNone(log.model_name) self.assertIsNotNone(log.field_name) self.assertIsNone(log.old_value) self.assertIsNotNone(log.new_value) self.assertIsNotNone(log.date_created) self.assertIsNotNone(log.user_id) self.assertEquals("I", log.action, "Should be Insert action") logs.delete() # bulk update fields chems = ExtractedChemical.objects.filter(component="Test Component") for chemical in chems: chemical.raw_min_comp = "min comp" chemical.raw_max_comp = "max comp" chemical.raw_central_comp = "central comp" chemical.unit_type_id = 1 chemical.report_funcuse = "report func use" chemical.ingredient_rank = 5 chemical.lower_wf_analysis = 0.01 chemical.central_wf_analysis = 0.44 chemical.upper_wf_analysis = 0.88 ExtractedChemical.objects.bulk_update( chems, [ "raw_min_comp", "raw_max_comp", "raw_central_comp", "unit_type_id", "report_funcuse", "ingredient_rank", "upper_wf_analysis", "central_wf_analysis", "lower_wf_analysis", ], ) logs = AuditLog.objects.all() self.assertEquals(27, len(logs), "Should have log entries") self.assertEquals( 3, sum(log.field_name == "raw_min_comp" for log in logs)) self.assertEquals( 3, sum(log.field_name == "raw_max_comp" for log in logs)) self.assertEquals( 3, sum(log.field_name == "raw_central_comp" for log in logs)) self.assertEquals( 3, sum(log.field_name == "unit_type_id" for log in logs)) self.assertEquals( 3, sum(log.field_name == "report_funcuse" for log in logs)) self.assertEquals( 3, sum(log.field_name == "ingredient_rank" for log in logs)) self.assertEquals( 3, sum(log.field_name == "upper_wf_analysis" for log in logs)) self.assertEquals( 3, sum(log.field_name == "central_wf_analysis" for log in logs)) self.assertEquals( 3, sum(log.field_name == "lower_wf_analysis" for log in logs)) for log in logs: self.assertEquals(log.model_name, "extractedchemical") self.assertIsNotNone(log.object_key) self.assertIsNotNone(log.field_name) self.assertIsNotNone(log.new_value) self.assertIsNotNone(log.date_created) self.assertIsNotNone(log.user_id) self.assertEquals("U", log.action, "Should be Update action") logs.delete() # change rid for chem in chems: chem.rid = "test rid" chem.save() logs = AuditLog.objects.all() self.assertEquals(3, len(logs), "Should have log entries") for log in logs: self.assertEquals(log.model_name, "rawchem") self.assertEquals("rid", log.field_name) self.assertIsNone(log.old_value) self.assertEquals("test rid", log.new_value) self.assertIsNotNone(log.date_created) self.assertIsNotNone(log.user_id) self.assertEquals("U", log.action, "Should be Update action") logs.delete() # delete chemicals for chemical in chems: chemical.delete() logs = AuditLog.objects.all() self.assertEquals(36, len(logs), "Should have log entries") self.assertEquals( 3, sum(log.field_name == "raw_min_comp" for log in logs)) self.assertEquals( 3, sum(log.field_name == "raw_max_comp" for log in logs)) self.assertEquals( 3, sum(log.field_name == "raw_central_comp" for log in logs)) self.assertEquals( 3, sum(log.field_name == "unit_type_id" for log in logs)) self.assertEquals( 3, sum(log.field_name == "report_funcuse" for log in logs)) self.assertEquals( 3, sum(log.field_name == "ingredient_rank" for log in logs)) self.assertEquals( 3, sum(log.field_name == "upper_wf_analysis" for log in logs)) self.assertEquals( 3, sum(log.field_name == "central_wf_analysis" for log in logs)) self.assertEquals( 3, sum(log.field_name == "lower_wf_analysis" for log in logs)) self.assertEquals(3, sum(log.field_name == "raw_cas" for log in logs)) self.assertEquals( 3, sum(log.field_name == "raw_chem_name" for log in logs)) self.assertEquals(3, sum(log.field_name == "rid" for log in logs)) for log in logs: self.assertIsNotNone(log.object_key) self.assertIsNotNone(log.model_name) self.assertIsNotNone(log.field_name) self.assertIsNone(log.new_value) self.assertIsNotNone(log.old_value) self.assertIsNotNone(log.date_created) self.assertIsNotNone(log.user_id) self.assertEquals("D", log.action, "Should be Delete action") dg = DataGroup.objects.get(pk=6) dg.delete()
def test_functionaluse_upload(self): # This action is performed on a data document without extracted text # but with a matched data document. DataDocument 500 was added to the # seed data for this test dd_id = 500 dd = DataDocument.objects.get(pk=dd_id) # et = ExtractedText.objects.get(data_document=dd) dd_pdf = dd.pdf_url() sample_csv = ( "data_document_id,data_document_filename,prod_name," "doc_date,rev_num,raw_category,raw_cas,raw_chem_name,report_funcuse" "\n" "%s," "%s," "sample functional use product," "2018-04-07," "," "raw PUC," "RAW-CAS-01," "raw chemname 01," "surfactant" % (dd_id, dd_pdf)) sample_csv_bytes = sample_csv.encode(encoding="UTF-8", errors="strict") in_mem_sample_csv = InMemoryUploadedFile( io.BytesIO(sample_csv_bytes), field_name="extfile-bulkformsetfileupload", name="Functional_use_extract_template.csv", content_type="text/csv", size=len(sample_csv), charset="utf-8", ) req_data = {"extfile-extraction_script": 5, "extfile-submit": "Submit"} req_data.update(self.mng_data) req = self.factory.post("/datagroup/50/", data=req_data) req.FILES["extfile-bulkformsetfileupload"] = in_mem_sample_csv middleware = SessionMiddleware() middleware.process_request(req) req.session.save() middleware = MessageMiddleware() middleware.process_request(req) req.session.save() req.user = User.objects.get(username="******") self.assertEqual( len(ExtractedFunctionalUse.objects.filter( extracted_text_id=dd_id)), 0, "Empty before upload.", ) # Now get the response resp = views.data_group_detail(request=req, pk=50) self.assertContains(resp, "1 extracted record uploaded successfully.") doc_count = DataDocument.objects.filter(raw_category="raw PUC").count() self.assertTrue(doc_count > 0, "DataDocument raw category values must be updated.") self.assertEqual( len(ExtractedFunctionalUse.objects.filter( extracted_text_id=dd_id)), 1, "One new ExtractedFunctionalUse after upload.", )
def test_datagroup_create(self): long_fn = 'a filename that is too long ' * 10 csv_string = ( "filename,title,document_type,url,organization\n" "0bf5755e-3a08-4024-9d2f-0ea155a9bd17.pdf,NUTRA NAIL,UN,, \n" f"{long_fn},Body Cream,1,, \n") data = io.StringIO(csv_string) sample_csv = InMemoryUploadedFile(data, field_name='csv', name='register_records.csv', content_type='text/csv', size=len(csv_string), charset='utf-8') form_data = { 'name': ['Walmart MSDS Test Group'], 'description': ['test data group'], 'group_type': ['1'], 'downloaded_by': [str(User.objects.get(username='******').pk)], 'downloaded_at': ['08/02/2018'], 'download_script': ['1'], 'data_source': ['10'] } request = self.factory.post(path='/datagroup/new/', data=form_data) request.FILES['csv'] = sample_csv request.user = User.objects.get(username='******') request.session = {} request.session['datasource_title'] = 'Walmart' request.session['datasource_pk'] = 10 resp = views.data_group_create(request=request, pk=10) # print(resp.content) dg_exists = DataGroup.objects.filter( name='Walmart MSDS Test Group').exists() self.assertContains(resp, 'Filename too long') self.assertFalse(dg_exists, ) # print(dg.__dict__) csv_string = ( "filename,title,document_type,url,organization\n" "0bf5755e-3a08-4024-9d2f-0ea155a9bd17.pdf,NUTRA NAIL,UN,, \n" "0c68ab16-2065-4d9b-a8f2-e428eb192465.pdf,Body Cream,UN,, \n") data = io.StringIO(csv_string) sample_csv = InMemoryUploadedFile(data, field_name='csv', name='register_records.csv', content_type='text/csv', size=len(csv_string), charset='utf-8') request = self.factory.post(path='/datagroup/new', data=form_data) request.FILES['csv'] = sample_csv request.user = User.objects.get(username='******') request.session = {} request.session['datasource_title'] = 'Walmart' request.session['datasource_pk'] = 10 resp = views.data_group_create(request=request, pk=10) self.assertEqual(resp.status_code, 302, "Should be redirecting") dg = DataGroup.objects.get(name='Walmart MSDS Test Group') self.assertEqual(f'/datagroup/{dg.pk}/', resp.url, "Should be redirecting to the proper URL") # test whether the file system folder was created self.assertIn( str(dg.fs_id), os.listdir(settings.MEDIA_ROOT), "The data group's UUID should be a folder in MEDIA_ROOT") # In the Data Group Detail Page resp = self.client.get(f'/datagroup/{dg.pk}/') # test whether the data documents were created docs = DataDocument.objects.filter(data_group=dg) self.assertEqual(len(docs), 2, "there should be two associated documents") # test whether the "Download Registered Records" link is like this example # <a href="/datagroup/a9c7f5a7-5ad4-4f75-b877-a3747f0cc081/registered_records.csv" class="btn btn-secondary"> # <span class="oi oi-spreadsheet"></span> Download Registered Records CSV</a> csv_href = f'/datagroup/{dg.pk}/registered_records.csv' self.assertIn( csv_href, str(resp._container), "The data group detail page must contain the right download link") # grab a filename from a data document and see if it's in the csv doc_fn = docs.first().filename # test whether the registered records csv download link works resp_rr_csv = self.client.get( csv_href) # this object should be of type StreamingHttpResponse docfound = 'not found' for csv_row in resp_rr_csv.streaming_content: if doc_fn in str(csv_row): docfound = 'found' self.assertEqual( docfound, 'found', "the document file name should appear in the registered records csv" ) # Test whether the data document csv download works # URL on data group detail page: datagroup/docs_csv/{pk}/ dd_csv_href = f'/datagroup/docs_csv/{dg.pk}/' # this is an interpreted django URL resp_dd_csv = self.client.get(dd_csv_href) for csv_row in resp_dd_csv.streaming_content: if doc_fn in str(csv_row): docfound = 'found' self.assertEqual( docfound, 'found', "the document file name should appear in the data documents csv") # test whether the "Download All PDF Documents" link works dg_zip_href = f'/datagroup/pdfs_zipped/{dg.pk}/' # this is the django-interpreted URL self.assertIn( dg_zip_href, str(resp._container), "The data group detail page must contain the right zip download link" ) resp_zip = self.client.get(dg_zip_href) # test uploading one pdf that matches a registered record f = TemporaryUploadedFile( name='0bf5755e-3a08-4024-9d2f-0ea155a9bd17.pdf', content_type='application/pdf', size=47, charset=None) request = self.factory.post(path='/datagroup/%s' % dg.pk, data={'upload': 'Submit'}) request.FILES['multifiles'] = f request.user = User.objects.get(username='******') resp = views.data_group_detail(request=request, pk=dg.pk) doc = DataDocument.objects.get(title='NUTRA NAIL') fn = doc.get_abstract_filename() folder_name = str(dg.fs_id) stored_file = f'{folder_name}/pdf/{fn}' pdf_path = f'{settings.MEDIA_ROOT}{stored_file}' self.assertTrue(os.path.exists(pdf_path), "the stored file should be in MEDIA_ROOT/dg.fs_id") f.close()