def test_import_empty_tsv(config, database, caplog): """ a TSV but no data """ caplog.set_level(logging.DEBUG) prj_id = create_project(ADMIN_USER_ID, "Test LS 3") params = ImportReq(source_path=str(EMPTY_TSV_DIR)) with FileImport(prj_id, params) as sce: rsp: ImportRsp = sce.run(ADMIN_USER_ID) job = wait_for_stable(rsp.job_id) check_job_errors(job) assert len(get_job_errors(job)) == 1
def test_import_classif_issue(config, database, caplog): """ The TSV contains an unknown classification id """ caplog.set_level(logging.DEBUG) prj_id = create_project(ADMIN_USER_ID, "Test LS 5") params = ImportReq(source_path=str(ISSUES_DIR2)) with FileImport(prj_id, params) as sce: rsp: ImportRsp = sce.run(ADMIN_USER_ID) job = wait_for_stable(rsp.job_id) check_job_errors(job) errors = get_job_errors(job) assert errors == [ "Some specified classif_id don't exist, correct them prior to reload: 99999999" ]
def test_import_breaking_unicity(config, database, caplog): """ Sample orig_id is unique per project Acquisition orig_id is unique per project and belongs to a single Sample Process orig_id is unique per acquisition (structurally as it's 1<->1 relationship) So, if: S("a") -> A("b") -> P ("c") Then: S("a2") -> A("b") is illegal Message should be like 'Acquisition 'b' already belongs to sample 'a' so it cannot be created under 'a2' """ caplog.set_level(logging.DEBUG) srch = search_unique_project(ADMIN_USER_ID, "Test Create Update") prj_id = srch.projid # <- need the project from first test # Do preparation params = ImportReq(source_path=str(BREAKING_HIERARCHY_DIR)) with FileImport(prj_id, params) as sce: rsp: ImportRsp = sce.run(ADMIN_USER_ID) job = wait_for_stable(rsp.job_id) errors = check_job_errors(job) assert errors == [ "Acquisition 'generic_m106_mn01_n1_sml' is already associated with sample " "'{'m106_mn01_n1_sml'}', it cannot be associated as well with " "'m106_mn01_n1_sml_brk" ]
def test_import_again_not_skipping_nor_imgs(config, database, caplog): """ Re-import into same project, not skipping TSVs or images CANNOT RUN BY ITSELF """ caplog.set_level(logging.DEBUG) srch = search_unique_project(ADMIN_USER_ID, "Test Create Update") prj_id = srch.projid # <- need the project from first test params = ImportReq(source_path=str(PLAIN_DIR)) with FileImport(prj_id, params) as sce: rsp: ImportRsp = sce.run(ADMIN_USER_ID) job = wait_for_stable(rsp.job_id) check_job_errors(job) nb_errs = len([ an_err for an_err in get_job_errors(job) if "Duplicate object" in an_err ]) assert nb_errs == 11
def test_import_issues(config, database, caplog): """ The TSV contains loads of problems """ caplog.set_level(logging.DEBUG) prj_id = create_project(ADMIN_USER_ID, "Test LS 4") params = ImportReq(source_path=str(ISSUES_DIR)) with FileImport(prj_id, params) as sce: rsp: ImportRsp = sce.run(ADMIN_USER_ID) job = wait_for_stable(rsp.job_id) check_job_errors(job) errors = get_job_errors(job) assert errors == [ "Invalid Header 'nounderscorecol' in file ecotaxa_m106_mn01_n3_sml.tsv. Format must be Table_Field. Field ignored", "Invalid Header 'unknown_target' in file ecotaxa_m106_mn01_n3_sml.tsv. Unknown table prefix. Field ignored", "Invalid Type '[H]' for Field 'object_wrongtype' in file ecotaxa_m106_mn01_n3_sml.tsv. Incorrect Type. Field ignored", "Invalid float value 'a' for Field 'object_buggy_float' in file ecotaxa_m106_mn01_n3_sml.tsv.", "Invalid Lat. value '100' for Field 'object_lat' in file ecotaxa_m106_mn01_n3_sml.tsv. Incorrect range -90/+90°.", "Invalid Long. value '200' for Field 'object_lon' in file ecotaxa_m106_mn01_n3_sml.tsv. Incorrect range -180/+180°.", "Invalid Date value '20140433' for Field 'object_date' in file ecotaxa_m106_mn01_n3_sml.tsv.", "Invalid Time value '9920' for Field 'object_time' in file ecotaxa_m106_mn01_n3_sml.tsv.", "Invalid Annotation Status 'predit' for Field 'object_annotation_status' in file ecotaxa_m106_mn01_n3_sml.tsv.", "Missing Image 'm106_mn01_n3_sml_1081.jpg2' in file ecotaxa_m106_mn01_n3_sml.tsv. ", "Error while reading image 'm106_mn01_n3_sml_corrupted_image.jpg' " "from file ecotaxa_m106_mn01_n3_sml.tsv: cannot identify image file '.../m106_mn01_n3_sml_corrupted_image.jpg' <class 'PIL.UnidentifiedImageError'>", "Missing object_id in line '5' of file ecotaxa_m106_mn01_n3_sml.tsv. ", "Missing Image 'nada.png' in file ecotaxa_m106_mn01_n3_sml.tsv. " ] # @pytest.mark.skip() def test_import_classif_issue(config, database, caplog): """ The TSV contains an unknown classification id """ caplog.set_level(logging.DEBUG) prj_id = create_project(ADMIN_USER_ID, "Test LS 5") params = ImportReq(source_path=str(ISSUES_DIR2)) with FileImport(prj_id, params) as sce: rsp: ImportRsp = sce.run(ADMIN_USER_ID) job = wait_for_stable(rsp.job_id) check_job_errors(job) errors = get_job_errors(job) assert errors == [ "Some specified classif_id don't exist, correct them prior to reload: 99999999" ]
def test_import_again_irrelevant_skipping(config, database, caplog): """ Re-import similar files into same project CANNOT RUN BY ITSELF """ caplog.set_level(logging.DEBUG) srch = search_unique_project(ADMIN_USER_ID, "Test Create Update") prj_id = srch.projid # <- need the project from first test # Do preparation params = ImportReq(source_path=str(EMPTY_TSV_IN_UPD_DIR), skip_loaded_files=True, skip_existing_objects=True) with FileImport(prj_id, params) as sce: rsp: ImportRsp = sce.run(ADMIN_USER_ID) job = wait_for_stable(rsp.job_id) check_job_errors(job) errs = get_job_errors(job) found_err = False for an_err in errs: if "new TSV file(s) are not compliant" in an_err: found_err = True assert found_err
def test_import_too_many_custom_columns(config, database, caplog): """ The TSV contains too many custom columns. Not a realistic case, but it simulates what happens if importing into a project with mappings """ caplog.set_level(logging.DEBUG) prj_id = create_project(ADMIN_USER_ID, "Test LS 6") params = ImportReq(source_path=str(ISSUES_DIR3)) with FileImport(prj_id, params) as sce: rsp: ImportRsp = sce.run(ADMIN_USER_ID) job = wait_for_stable(rsp.job_id) check_job_errors(job) errors = get_job_errors(job) assert errors == [ 'Field acq_cus29, in file ecotaxa_m106_mn01_n3_sml.tsv, cannot be mapped. Too ' 'many custom fields, or bad type.', 'Field acq_cus30, in file ecotaxa_m106_mn01_n3_sml.tsv, cannot be mapped. Too ' 'many custom fields, or bad type.', 'Field acq_cus31, in file ecotaxa_m106_mn01_n3_sml.tsv, cannot be mapped. Too ' 'many custom fields, or bad type.' ]
def test_import_sparse(config, database, caplog): """ Import a sparse file, some columns are missing. """ caplog.set_level(logging.DEBUG) prj_id = create_project(ADMIN_USER_ID, "Test Sparse") params = ImportReq(source_path=str(SPARSE_DIR)) with FileImport(prj_id, params) as sce: rsp: ImportRsp = sce.run(ADMIN_USER_ID) job = wait_for_stable(rsp.job_id) errors = check_job_errors(job) assert errors == \ [ "In ecotaxa_20160719B-163000ish-HealyVPR08-2016_d200_h18_roi.tsv, field acq_id is mandatory as there are some acq columns: ['acq_hardware', 'acq_imgtype', 'acq_instrument'].", "In ecotaxa_20160719B-163000ish-HealyVPR08-2016_d200_h18_roi.tsv, field sample_id is mandatory as there are some sample columns: ['sample_program', 'sample_ship', 'sample_stationid']." ] print("\n".join(caplog.messages)) with AsciiDumper() as sce: sce.run(projid=prj_id, out="chk.dmp")