def import_plain(prj_id): params = ImportReq(source_path=str(PLAIN_DIR), skip_existing_objects=True) with FileImport(prj_id, params) as sce: rsp: ImportRsp = sce.run(ADMIN_USER_ID) job = wait_for_stable(rsp.job_id) assert job.state == DBJobStateEnum.Asking assert job.question == { "missing_users": ["admin4test", "elizandro rodriguez"], "missing_taxa": ["other", "ozzeur"] } reply = { "users": { 'admin4test': 1, 'elizandro rodriguez': 1 }, # Map to admin "taxa": { 'other': 99999, # 'other<dead' 'ozzeur': 85011 # 'other<living' } } with JobCRUDService() as sce: sce.reply(ADMIN_USER_ID, rsp.job_id, reply) job = wait_for_stable(rsp.job_id) check_job_ok(job)
def test_import_images_only(config, database, caplog, title): """ Simple import AKA image only import, with fixed values. """ caplog.set_level(logging.DEBUG) prj_id = create_project(ADMIN_USER_ID, title) vals = { "latitude": "abcde", "longitude": "456.5", "depthmin": "very very low" } params = SimpleImportReq(task_id=0, source_path=str(PLAIN_DIR), values=vals) with SimpleImport(prj_id, params, dry_run=True) as sce: rsp = sce.run(ADMIN_USER_ID) assert rsp.errors == [ "'abcde' is not a valid value for SimpleImportFields.latitude", "'456.5' is not a valid value for SimpleImportFields.longitude", "'very very low' is not a valid value for SimpleImportFields.depthmin" ] # Do real import vals["latitude"] = "43.8802" vals["longitude"] = "7.2329" vals["depthmin"] = "500" params.values = vals with SimpleImport(prj_id, params, dry_run=False) as sce: rsp: SimpleImportRsp = sce.run(ADMIN_USER_ID) print("\n".join(caplog.messages)) assert rsp.errors == [] job_id = rsp.job_id job = wait_for_stable(job_id) assert job.result["nb_images"] == 8 # Check that all went fine for a_msg in caplog.records: assert a_msg.levelno != logging.ERROR, a_msg.getMessage() # Second run, ensure we don't create dummy parents caplog.clear() with SimpleImport(prj_id, params, dry_run=False) as sce: rsp: SimpleImportRsp = sce.run(ADMIN_USER_ID) job_id2 = rsp.job_id assert job_id2 > job_id job = wait_for_stable(job_id2) print("\n2:".join(caplog.messages)) for a_msg in caplog.records: assert a_msg.levelno != logging.ERROR, a_msg.getMessage() assert "++ ID" not in a_msg.getMessage() return prj_id
def test_import_breaking_unicity(config, database, caplog): """ Sample orig_id is unique per project Acquisition orig_id is unique per project and belongs to a single Sample Process orig_id is unique per acquisition (structurally as it's 1<->1 relationship) So, if: S("a") -> A("b") -> P ("c") Then: S("a2") -> A("b") is illegal Message should be like 'Acquisition 'b' already belongs to sample 'a' so it cannot be created under 'a2' """ caplog.set_level(logging.DEBUG) srch = search_unique_project(ADMIN_USER_ID, "Test Create Update") prj_id = srch.projid # <- need the project from first test # Do preparation params = ImportReq(source_path=str(BREAKING_HIERARCHY_DIR)) with FileImport(prj_id, params) as sce: rsp: ImportRsp = sce.run(ADMIN_USER_ID) job = wait_for_stable(rsp.job_id) errors = check_job_errors(job) assert errors == [ "Acquisition 'generic_m106_mn01_n1_sml' is already associated with sample " "'{'m106_mn01_n1_sml'}', it cannot be associated as well with " "'m106_mn01_n1_sml_brk" ]
def do_import(prj_id: int, source_path: str, user_id: int): """ Import helper for tests """ # Do preparation, preparation params = ImportReq(source_path=str(source_path)) with FileImport(prj_id, params) as sce: rsp: ImportRsp = sce.run(user_id) job = wait_for_stable(rsp.job_id) job = fill_in_if_missing(job) check_job_ok(job) return prj_id
def test_import_empty_tsv(config, database, caplog): """ a TSV but no data """ caplog.set_level(logging.DEBUG) prj_id = create_project(ADMIN_USER_ID, "Test LS 3") params = ImportReq(source_path=str(EMPTY_TSV_DIR)) with FileImport(prj_id, params) as sce: rsp: ImportRsp = sce.run(ADMIN_USER_ID) job = wait_for_stable(rsp.job_id) check_job_errors(job) assert len(get_job_errors(job)) == 1
def test_import_uvp6(config, database, caplog, title): caplog.set_level(logging.DEBUG) prj_id = create_project(ADMIN_USER_ID, title) params = ImportReq(source_path=str(V6_FILE)) with FileImport(prj_id, params) as sce: rsp: ImportRsp = sce.run(ADMIN_USER_ID) job = wait_for_stable(rsp.job_id) check_job_ok(job) # Check that all went fine for a_msg in caplog.records: assert a_msg.levelno != logging.ERROR, a_msg.getMessage() return prj_id
def test_import(config, database, caplog, title): caplog.set_level(logging.DEBUG) # Create a dest project prj_id = create_project(ADMIN_USER_ID, title) # Prepare import request params = ImportReq(source_path=str(PLAIN_FILE)) with FileImport(prj_id, params) as sce: rsp: ImportRsp = sce.run(ADMIN_USER_ID) job = wait_for_stable(rsp.job_id) job = fill_in_if_missing(job) # assert (job.state, job.progress_pct, job.progress_msg) == (DBJobStateEnum.Finished, 100, "Done") # assert job.result["rowcount"] == 8 return prj_id
def test_import_classif_issue(config, database, caplog): """ The TSV contains an unknown classification id """ caplog.set_level(logging.DEBUG) prj_id = create_project(ADMIN_USER_ID, "Test LS 5") params = ImportReq(source_path=str(ISSUES_DIR2)) with FileImport(prj_id, params) as sce: rsp: ImportRsp = sce.run(ADMIN_USER_ID) job = wait_for_stable(rsp.job_id) check_job_errors(job) errors = get_job_errors(job) assert errors == [ "Some specified classif_id don't exist, correct them prior to reload: 99999999" ]
def do_import_update(prj_id, caplog, classif, source=None): if source is None: source = str(UPDATE_DIR) params = ImportReq(skip_existing_objects=True, update_mode=classif, source_path=source) with FileImport(prj_id, params) as sce: rsp: ImportRsp = sce.run(ADMIN_USER_ID) job = wait_for_stable(rsp.job_id) assert job.state == DBJobStateEnum.Asking assert job.question == { "missing_users": ["admin4test", "elizandro rodriguez"], "missing_taxa": ["other", "ozzeur"] } reply = { "users": { 'admin4test': 1, 'elizandro rodriguez': 1 }, # Map to admin "taxa": { 'other': 99999, # 'other<dead' 'ozzeur': 85011 # 'other<living' } } caplog.clear() with JobCRUDService() as sce: sce.reply(ADMIN_USER_ID, rsp.job_id, reply) job = wait_for_stable(rsp.job_id) check_job_ok(job) # Check that all went fine for a_msg in caplog.records: assert a_msg.levelno != logging.ERROR, a_msg.getMessage() # #498: No extra parent should be created for a_msg in caplog.records: assert "++ ID" not in a_msg.getMessage()
def test_import_uvp6_zip_in_dir(config, database, caplog): """ An *Images.zip inside a directory. """ caplog.set_level(logging.DEBUG) prj_id = create_project(ADMIN_USER_ID, "Test LS 8") params = ImportReq(source_path=str(V6_DIR)) with FileImport(prj_id, params) as sce: rsp: ImportRsp = sce.run(ADMIN_USER_ID) job = wait_for_stable(rsp.job_id) check_job_ok(job) # Check that all went fine for a_msg in caplog.records: assert a_msg.levelno != logging.ERROR, a_msg.getMessage()
def test_import_again_not_skipping_nor_imgs(config, database, caplog): """ Re-import into same project, not skipping TSVs or images CANNOT RUN BY ITSELF """ caplog.set_level(logging.DEBUG) srch = search_unique_project(ADMIN_USER_ID, "Test Create Update") prj_id = srch.projid # <- need the project from first test params = ImportReq(source_path=str(PLAIN_DIR)) with FileImport(prj_id, params) as sce: rsp: ImportRsp = sce.run(ADMIN_USER_ID) job = wait_for_stable(rsp.job_id) check_job_errors(job) nb_errs = len([ an_err for an_err in get_job_errors(job) if "Duplicate object" in an_err ]) assert nb_errs == 11
def test_import_a_bit_more_skipping(config, database, caplog, title): """ Re-import similar files into same project, with an extra one. The extra one has missing values in the TSV. CANNOT RUN BY ITSELF """ caplog.set_level(logging.DEBUG) srch = search_unique_project(ADMIN_USER_ID, title) prj_id = srch.projid # <- need the project from first test # Do preparation params = ImportReq(source_path=str(PLUS_DIR), skip_loaded_files=True, skip_existing_objects=True) with FileImport(prj_id, params) as sce: rsp: ImportRsp = sce.run(ADMIN_USER_ID) job = wait_for_stable(rsp.job_id) job = fill_in_if_missing(job) check_job_ok(job)
def fill_in_if_missing(job): if job.state == DBJobStateEnum.Asking: job_id = job.id # Missing user or taxa -> should proceed to step 2 for filling missing assert job.progress_msg == "Some users or taxonomic references could not be matched" # Simulate a missing user and map him to admin with JobCRUDService() as sce: sce.reply(ADMIN_USER_ID, job_id, { "users": { "admin4test": 1, "elizandro rodriguez": 1 }, "taxa": {} }) return wait_for_stable(job_id) else: return job
def test_import_issues(config, database, caplog): """ The TSV contains loads of problems """ caplog.set_level(logging.DEBUG) prj_id = create_project(ADMIN_USER_ID, "Test LS 4") params = ImportReq(source_path=str(ISSUES_DIR)) with FileImport(prj_id, params) as sce: rsp: ImportRsp = sce.run(ADMIN_USER_ID) job = wait_for_stable(rsp.job_id) check_job_errors(job) errors = get_job_errors(job) assert errors == [ "Invalid Header 'nounderscorecol' in file ecotaxa_m106_mn01_n3_sml.tsv. Format must be Table_Field. Field ignored", "Invalid Header 'unknown_target' in file ecotaxa_m106_mn01_n3_sml.tsv. Unknown table prefix. Field ignored", "Invalid Type '[H]' for Field 'object_wrongtype' in file ecotaxa_m106_mn01_n3_sml.tsv. Incorrect Type. Field ignored", "Invalid float value 'a' for Field 'object_buggy_float' in file ecotaxa_m106_mn01_n3_sml.tsv.", "Invalid Lat. value '100' for Field 'object_lat' in file ecotaxa_m106_mn01_n3_sml.tsv. Incorrect range -90/+90°.", "Invalid Long. value '200' for Field 'object_lon' in file ecotaxa_m106_mn01_n3_sml.tsv. Incorrect range -180/+180°.", "Invalid Date value '20140433' for Field 'object_date' in file ecotaxa_m106_mn01_n3_sml.tsv.", "Invalid Time value '9920' for Field 'object_time' in file ecotaxa_m106_mn01_n3_sml.tsv.", "Invalid Annotation Status 'predit' for Field 'object_annotation_status' in file ecotaxa_m106_mn01_n3_sml.tsv.", "Missing Image 'm106_mn01_n3_sml_1081.jpg2' in file ecotaxa_m106_mn01_n3_sml.tsv. ", "Error while reading image 'm106_mn01_n3_sml_corrupted_image.jpg' " "from file ecotaxa_m106_mn01_n3_sml.tsv: cannot identify image file '.../m106_mn01_n3_sml_corrupted_image.jpg' <class 'PIL.UnidentifiedImageError'>", "Missing object_id in line '5' of file ecotaxa_m106_mn01_n3_sml.tsv. ", "Missing Image 'nada.png' in file ecotaxa_m106_mn01_n3_sml.tsv. " ] # @pytest.mark.skip() def test_import_classif_issue(config, database, caplog): """ The TSV contains an unknown classification id """ caplog.set_level(logging.DEBUG) prj_id = create_project(ADMIN_USER_ID, "Test LS 5") params = ImportReq(source_path=str(ISSUES_DIR2)) with FileImport(prj_id, params) as sce: rsp: ImportRsp = sce.run(ADMIN_USER_ID) job = wait_for_stable(rsp.job_id) check_job_errors(job) errors = get_job_errors(job) assert errors == [ "Some specified classif_id don't exist, correct them prior to reload: 99999999" ]
def test_import_again_irrelevant_skipping(config, database, caplog): """ Re-import similar files into same project CANNOT RUN BY ITSELF """ caplog.set_level(logging.DEBUG) srch = search_unique_project(ADMIN_USER_ID, "Test Create Update") prj_id = srch.projid # <- need the project from first test # Do preparation params = ImportReq(source_path=str(EMPTY_TSV_IN_UPD_DIR), skip_loaded_files=True, skip_existing_objects=True) with FileImport(prj_id, params) as sce: rsp: ImportRsp = sce.run(ADMIN_USER_ID) job = wait_for_stable(rsp.job_id) check_job_errors(job) errs = get_job_errors(job) found_err = False for an_err in errs: if "new TSV file(s) are not compliant" in an_err: found_err = True assert found_err
def test_import_sparse(config, database, caplog): """ Import a sparse file, some columns are missing. """ caplog.set_level(logging.DEBUG) prj_id = create_project(ADMIN_USER_ID, "Test Sparse") params = ImportReq(source_path=str(SPARSE_DIR)) with FileImport(prj_id, params) as sce: rsp: ImportRsp = sce.run(ADMIN_USER_ID) job = wait_for_stable(rsp.job_id) errors = check_job_errors(job) assert errors == \ [ "In ecotaxa_20160719B-163000ish-HealyVPR08-2016_d200_h18_roi.tsv, field acq_id is mandatory as there are some acq columns: ['acq_hardware', 'acq_imgtype', 'acq_instrument'].", "In ecotaxa_20160719B-163000ish-HealyVPR08-2016_d200_h18_roi.tsv, field sample_id is mandatory as there are some sample columns: ['sample_program', 'sample_ship', 'sample_stationid']." ] print("\n".join(caplog.messages)) with AsciiDumper() as sce: sce.run(projid=prj_id, out="chk.dmp")
def test_import_too_many_custom_columns(config, database, caplog): """ The TSV contains too many custom columns. Not a realistic case, but it simulates what happens if importing into a project with mappings """ caplog.set_level(logging.DEBUG) prj_id = create_project(ADMIN_USER_ID, "Test LS 6") params = ImportReq(source_path=str(ISSUES_DIR3)) with FileImport(prj_id, params) as sce: rsp: ImportRsp = sce.run(ADMIN_USER_ID) job = wait_for_stable(rsp.job_id) check_job_errors(job) errors = get_job_errors(job) assert errors == [ 'Field acq_cus29, in file ecotaxa_m106_mn01_n3_sml.tsv, cannot be mapped. Too ' 'many custom fields, or bad type.', 'Field acq_cus30, in file ecotaxa_m106_mn01_n3_sml.tsv, cannot be mapped. Too ' 'many custom fields, or bad type.', 'Field acq_cus31, in file ecotaxa_m106_mn01_n3_sml.tsv, cannot be mapped. Too ' 'many custom fields, or bad type.' ]
def test_emodnet_export(config, database, fastapi, caplog): caplog.set_level(logging.FATAL) # Admin imports the project from tests.test_import import test_import, test_import_a_bit_more_skipping prj_id = test_import(config, database, caplog, "EMODNET project") # Add a sample spanning 2 days test_import_a_bit_more_skipping(config, database, caplog, "EMODNET project") # Get the project for update url = PROJECT_QUERY_URL.format(project_id=prj_id, manage=True) rsp = fastapi.get(url, headers=ADMIN_AUTH) prj_json = rsp.json() coll_title = "EMODNET test collection" # Create a minimal collection with only this project url = COLLECTION_CREATE_URL rsp = fastapi.post(url, headers=ADMIN_AUTH, json={ "title": coll_title, "project_ids": [prj_id] }) assert rsp.status_code == status.HTTP_200_OK coll_id = rsp.json() caplog.set_level(logging.DEBUG) # Admin exports it # First attempt with LOTS of missing data url = COLLECTION_EXPORT_EMODNET_URL.format(collection_id=coll_id, dry=False, zeroes=True, comp=True, morph=True) rsp = fastapi.get(url, headers=ADMIN_AUTH) assert rsp.status_code == status.HTTP_200_OK job_id = rsp.json()["job_id"] job = wait_for_stable(job_id) api_check_job_failed(fastapi, job_id, '5 error(s) during run') # TODO: Errors text # assert rsp.json()["errors"] == ['No valid data creator (user or organisation) found for EML metadata.', # 'No valid contact user found for EML metadata.', # "No valid metadata provider user found for EML metadata.", # "Collection 'abstract' field is empty", # "Collection license should be one of [<LicenseEnum.CC0: 'CC0 1.0'>, " # "<LicenseEnum.CC_BY: 'CC BY 4.0'>, <LicenseEnum.CC_BY_NC: 'CC BY-NC 4.0'>] to be " # "accepted, not ."] # assert rsp.json()["warnings"] == [] # Validate everything, otherwise no export. obj_ids = _prj_query(fastapi, CREATOR_AUTH, prj_id) assert len(obj_ids) == 11 url = OBJECT_SET_CLASSIFY_URL classifications = [-1 for _obj in obj_ids] # Keep current rsp = fastapi.post(url, headers=ADMIN_AUTH, json={ "target_ids": obj_ids, "classifications": classifications, "wanted_qualification": "V" }) assert rsp.status_code == status.HTTP_200_OK # Update underlying project license url = PROJECT_UPDATE_URL.format(project_id=prj_id) prj_json["license"] = "CC BY 4.0" # And give a contact who is now mandatory prj_json["contact"] = prj_json["managers"][0] rsp = fastapi.put(url, headers=ADMIN_AUTH, json=prj_json) assert rsp.status_code == status.HTTP_200_OK add_concentration_data(fastapi, prj_id) # Update the collection to fill in missing data url = COLLECTION_QUERY_URL.format(collection_id=coll_id) rsp = fastapi.get(url, headers=ADMIN_AUTH) assert rsp.status_code == status.HTTP_200_OK the_coll = rsp.json() url = COLLECTION_UPDATE_URL.format(collection_id=coll_id) the_coll['abstract'] = """ This series is part of the long term planktonic monitoring of # Villefranche-sur-mer, which is one of the oldest and richest in the world. # The data collection and processing has been funded by several projects # over its lifetime. It is currently supported directly by the Institut de la Mer # de Villefranche (IMEV), as part of its long term monitoring effort. """ the_coll[ 'license'] = "CC BY 4.0" # Would do nothing as the license comes from the underlying project user_doing_all = { 'id': REAL_USER_ID, # TODO: below is redundant with ID and ignored, but fails validation (http 422) if not set 'email': 'creator', 'name': 'User Creating Projects' } the_coll['creator_users'] = [user_doing_all] the_coll['contact_user'] = user_doing_all the_coll['provider_user'] = user_doing_all rsp = fastapi.put(url, headers=ADMIN_AUTH, json=the_coll) assert rsp.status_code == status.HTTP_200_OK url = COLLECTION_EXPORT_EMODNET_URL.format(collection_id=coll_id, dry=False, zeroes=False, comp=True, morph=True) rsp = fastapi.get(url, headers=ADMIN_AUTH) assert rsp.status_code == status.HTTP_200_OK job_id = rsp.json()["job_id"] job = wait_for_stable(job_id) api_check_job_ok(fastapi, job_id) # warns = rsp.json()["warnings"] # # assert warns == [] # assert rsp.json()["errors"] == [] # job_id = rsp.json()["job_id"] # Download the result zip url = JOB_DOWNLOAD_URL.format(job_id=job_id) # Ensure it's not public rsp = fastapi.get(url) assert rsp.status_code == status.HTTP_403_FORBIDDEN # But the creator can get it # rsp = fastapi.get(url, headers=REAL_USER_AUTH) # assert rsp.status_code == status.HTTP_200_OK # Admin can get it rsp = fastapi.get(url, headers=ADMIN_AUTH) assert rsp.status_code == status.HTTP_200_OK set_dates_in_ref(ref_zip) unzip_and_check(rsp.content, ref_zip) url_with_0s = COLLECTION_EXPORT_EMODNET_URL.format(collection_id=coll_id, dry=False, zeroes=True, comp=True, morph=True) rsp = fastapi.get(url_with_0s, headers=ADMIN_AUTH) assert rsp.status_code == status.HTTP_200_OK job_id = rsp.json()["job_id"] job = wait_for_stable(job_id) api_check_job_ok(fastapi, job_id) dl_url = JOB_DOWNLOAD_URL.format(job_id=job_id) rsp = fastapi.get(dl_url, headers=ADMIN_AUTH) set_dates_in_ref(with_zeroes_zip) unzip_and_check(rsp.content, with_zeroes_zip) url_raw_data = COLLECTION_EXPORT_EMODNET_URL.format(collection_id=coll_id, dry=False, zeroes=False, comp=False, morph=True) rsp = fastapi.get(url_raw_data, headers=ADMIN_AUTH) assert rsp.status_code == status.HTTP_200_OK job_id = rsp.json()["job_id"] job = wait_for_stable(job_id) api_check_job_ok(fastapi, job_id) dl_url = JOB_DOWNLOAD_URL.format(job_id=job_id) rsp = fastapi.get(dl_url, headers=ADMIN_AUTH) set_dates_in_ref(no_computations_zip) unzip_and_check(rsp.content, no_computations_zip) url_query_back = COLLECTION_QUERY_BY_TITLE_URL.format(title=coll_title) rsp = fastapi.get(url_query_back) assert rsp.status_code == status.HTTP_200_OK coll_desc = rsp.json() assert coll_desc['title'] == coll_title