Ejemplo n.º 1
0
def test_import_breaking_unicity(config, database, caplog):
    """
        Sample orig_id is unique per project
        Acquisition orig_id is unique per project and belongs to a single Sample
        Process orig_id is unique per acquisition (structurally as it's 1<->1 relationship)
        So, if:
            S("a") -> A("b") -> P ("c")
        Then:
            S("a2") -> A("b") is illegal
        Message should be like 'Acquisition 'b' already belongs to sample 'a' so it cannot be created under 'a2'
    """
    caplog.set_level(logging.DEBUG)
    srch = search_unique_project(ADMIN_USER_ID, "Test Create Update")
    prj_id = srch.projid  # <- need the project from first test
    # Do preparation
    params = ImportReq(source_path=str(BREAKING_HIERARCHY_DIR))

    with FileImport(prj_id, params) as sce:
        rsp: ImportRsp = sce.run(ADMIN_USER_ID)
    job = wait_for_stable(rsp.job_id)
    errors = check_job_errors(job)
    assert errors == [
        "Acquisition 'generic_m106_mn01_n1_sml' is already associated with sample "
        "'{'m106_mn01_n1_sml'}', it cannot be associated as well with "
        "'m106_mn01_n1_sml_brk"
    ]
Ejemplo n.º 2
0
def import_plain(prj_id):
    params = ImportReq(source_path=str(PLAIN_DIR), skip_existing_objects=True)
    with FileImport(prj_id, params) as sce:
        rsp: ImportRsp = sce.run(ADMIN_USER_ID)
    job = wait_for_stable(rsp.job_id)

    assert job.state == DBJobStateEnum.Asking
    assert job.question == {
        "missing_users": ["admin4test", "elizandro rodriguez"],
        "missing_taxa": ["other", "ozzeur"]
    }

    reply = {
        "users": {
            'admin4test': 1,
            'elizandro rodriguez': 1
        },  # Map to admin
        "taxa": {
            'other': 99999,  # 'other<dead'
            'ozzeur': 85011  # 'other<living'
        }
    }
    with JobCRUDService() as sce:
        sce.reply(ADMIN_USER_ID, rsp.job_id, reply)
    job = wait_for_stable(rsp.job_id)
    check_job_ok(job)
Ejemplo n.º 3
0
def do_import(prj_id: int, source_path: str, user_id: int):
    """ Import helper for tests """
    # Do preparation, preparation
    params = ImportReq(source_path=str(source_path))
    with FileImport(prj_id, params) as sce:
        rsp: ImportRsp = sce.run(user_id)
    job = wait_for_stable(rsp.job_id)
    job = fill_in_if_missing(job)
    check_job_ok(job)
    return prj_id
Ejemplo n.º 4
0
def test_import_empty_tsv(config, database, caplog):
    """ a TSV but no data """
    caplog.set_level(logging.DEBUG)
    prj_id = create_project(ADMIN_USER_ID, "Test LS 3")

    params = ImportReq(source_path=str(EMPTY_TSV_DIR))
    with FileImport(prj_id, params) as sce:
        rsp: ImportRsp = sce.run(ADMIN_USER_ID)
    job = wait_for_stable(rsp.job_id)
    check_job_errors(job)
    assert len(get_job_errors(job)) == 1
Ejemplo n.º 5
0
def test_import_uvp6(config, database, caplog, title):
    caplog.set_level(logging.DEBUG)
    prj_id = create_project(ADMIN_USER_ID, title)
    params = ImportReq(source_path=str(V6_FILE))
    with FileImport(prj_id, params) as sce:
        rsp: ImportRsp = sce.run(ADMIN_USER_ID)
    job = wait_for_stable(rsp.job_id)
    check_job_ok(job)
    # Check that all went fine
    for a_msg in caplog.records:
        assert a_msg.levelno != logging.ERROR, a_msg.getMessage()
    return prj_id
Ejemplo n.º 6
0
def test_import(config, database, caplog, title):
    caplog.set_level(logging.DEBUG)
    # Create a dest project
    prj_id = create_project(ADMIN_USER_ID, title)
    # Prepare import request
    params = ImportReq(source_path=str(PLAIN_FILE))
    with FileImport(prj_id, params) as sce:
        rsp: ImportRsp = sce.run(ADMIN_USER_ID)
    job = wait_for_stable(rsp.job_id)
    job = fill_in_if_missing(job)
    # assert (job.state, job.progress_pct, job.progress_msg) == (DBJobStateEnum.Finished, 100, "Done")
    # assert job.result["rowcount"] == 8
    return prj_id
Ejemplo n.º 7
0
    def test_import_classif_issue(config, database, caplog):
        """ The TSV contains an unknown classification id """
        caplog.set_level(logging.DEBUG)
        prj_id = create_project(ADMIN_USER_ID, "Test LS 5")

        params = ImportReq(source_path=str(ISSUES_DIR2))
        with FileImport(prj_id, params) as sce:
            rsp: ImportRsp = sce.run(ADMIN_USER_ID)
        job = wait_for_stable(rsp.job_id)
        check_job_errors(job)
        errors = get_job_errors(job)
        assert errors == [
            "Some specified classif_id don't exist, correct them prior to reload: 99999999"
        ]
Ejemplo n.º 8
0
def test_import_uvp6_zip_in_dir(config, database, caplog):
    """
        An *Images.zip inside a directory.
    """
    caplog.set_level(logging.DEBUG)
    prj_id = create_project(ADMIN_USER_ID, "Test LS 8")

    params = ImportReq(source_path=str(V6_DIR))
    with FileImport(prj_id, params) as sce:
        rsp: ImportRsp = sce.run(ADMIN_USER_ID)
    job = wait_for_stable(rsp.job_id)
    check_job_ok(job)
    # Check that all went fine
    for a_msg in caplog.records:
        assert a_msg.levelno != logging.ERROR, a_msg.getMessage()
Ejemplo n.º 9
0
def test_import_again_not_skipping_nor_imgs(config, database, caplog):
    """ Re-import into same project, not skipping TSVs or images
        CANNOT RUN BY ITSELF """
    caplog.set_level(logging.DEBUG)
    srch = search_unique_project(ADMIN_USER_ID, "Test Create Update")
    prj_id = srch.projid  # <- need the project from first test
    params = ImportReq(source_path=str(PLAIN_DIR))
    with FileImport(prj_id, params) as sce:
        rsp: ImportRsp = sce.run(ADMIN_USER_ID)
    job = wait_for_stable(rsp.job_id)
    check_job_errors(job)
    nb_errs = len([
        an_err for an_err in get_job_errors(job)
        if "Duplicate object" in an_err
    ])
    assert nb_errs == 11
Ejemplo n.º 10
0
def test_import_a_bit_more_skipping(config, database, caplog, title):
    """ Re-import similar files into same project, with an extra one.
        The extra one has missing values in the TSV.
        CANNOT RUN BY ITSELF """
    caplog.set_level(logging.DEBUG)
    srch = search_unique_project(ADMIN_USER_ID, title)
    prj_id = srch.projid  # <- need the project from first test
    # Do preparation
    params = ImportReq(source_path=str(PLUS_DIR),
                       skip_loaded_files=True,
                       skip_existing_objects=True)
    with FileImport(prj_id, params) as sce:
        rsp: ImportRsp = sce.run(ADMIN_USER_ID)
    job = wait_for_stable(rsp.job_id)
    job = fill_in_if_missing(job)
    check_job_ok(job)
Ejemplo n.º 11
0
def test_import_issues(config, database, caplog):
    """ The TSV contains loads of problems """
    caplog.set_level(logging.DEBUG)
    prj_id = create_project(ADMIN_USER_ID, "Test LS 4")

    params = ImportReq(source_path=str(ISSUES_DIR))
    with FileImport(prj_id, params) as sce:
        rsp: ImportRsp = sce.run(ADMIN_USER_ID)
    job = wait_for_stable(rsp.job_id)
    check_job_errors(job)
    errors = get_job_errors(job)
    assert errors == [
        "Invalid Header 'nounderscorecol' in file ecotaxa_m106_mn01_n3_sml.tsv. Format must be Table_Field. Field ignored",
        "Invalid Header 'unknown_target' in file ecotaxa_m106_mn01_n3_sml.tsv. Unknown table prefix. Field ignored",
        "Invalid Type '[H]' for Field 'object_wrongtype' in file ecotaxa_m106_mn01_n3_sml.tsv. Incorrect Type. Field ignored",
        "Invalid float value 'a' for Field 'object_buggy_float' in file ecotaxa_m106_mn01_n3_sml.tsv.",
        "Invalid Lat. value '100' for Field 'object_lat' in file ecotaxa_m106_mn01_n3_sml.tsv. Incorrect range -90/+90°.",
        "Invalid Long. value '200' for Field 'object_lon' in file ecotaxa_m106_mn01_n3_sml.tsv. Incorrect range -180/+180°.",
        "Invalid Date value '20140433' for Field 'object_date' in file ecotaxa_m106_mn01_n3_sml.tsv.",
        "Invalid Time value '9920' for Field 'object_time' in file ecotaxa_m106_mn01_n3_sml.tsv.",
        "Invalid Annotation Status 'predit' for Field 'object_annotation_status' in file ecotaxa_m106_mn01_n3_sml.tsv.",
        "Missing Image 'm106_mn01_n3_sml_1081.jpg2' in file ecotaxa_m106_mn01_n3_sml.tsv. ",
        "Error while reading image 'm106_mn01_n3_sml_corrupted_image.jpg' "
        "from file ecotaxa_m106_mn01_n3_sml.tsv: cannot identify image file '.../m106_mn01_n3_sml_corrupted_image.jpg' <class 'PIL.UnidentifiedImageError'>",
        "Missing object_id in line '5' of file ecotaxa_m106_mn01_n3_sml.tsv. ",
        "Missing Image 'nada.png' in file ecotaxa_m106_mn01_n3_sml.tsv. "
    ]

    # @pytest.mark.skip()

    def test_import_classif_issue(config, database, caplog):
        """ The TSV contains an unknown classification id """
        caplog.set_level(logging.DEBUG)
        prj_id = create_project(ADMIN_USER_ID, "Test LS 5")

        params = ImportReq(source_path=str(ISSUES_DIR2))
        with FileImport(prj_id, params) as sce:
            rsp: ImportRsp = sce.run(ADMIN_USER_ID)
        job = wait_for_stable(rsp.job_id)
        check_job_errors(job)
        errors = get_job_errors(job)
        assert errors == [
            "Some specified classif_id don't exist, correct them prior to reload: 99999999"
        ]
Ejemplo n.º 12
0
def test_import_sparse(config, database, caplog):
    """
        Import a sparse file, some columns are missing.
    """
    caplog.set_level(logging.DEBUG)
    prj_id = create_project(ADMIN_USER_ID, "Test Sparse")

    params = ImportReq(source_path=str(SPARSE_DIR))
    with FileImport(prj_id, params) as sce:
        rsp: ImportRsp = sce.run(ADMIN_USER_ID)
    job = wait_for_stable(rsp.job_id)
    errors = check_job_errors(job)
    assert errors == \
           [
               "In ecotaxa_20160719B-163000ish-HealyVPR08-2016_d200_h18_roi.tsv, field acq_id is mandatory as there are some acq columns: ['acq_hardware', 'acq_imgtype', 'acq_instrument'].",
               "In ecotaxa_20160719B-163000ish-HealyVPR08-2016_d200_h18_roi.tsv, field sample_id is mandatory as there are some sample columns: ['sample_program', 'sample_ship', 'sample_stationid']."
           ]
    print("\n".join(caplog.messages))
    with AsciiDumper() as sce:
        sce.run(projid=prj_id, out="chk.dmp")
Ejemplo n.º 13
0
def test_import_again_irrelevant_skipping(config, database, caplog):
    """ Re-import similar files into same project
        CANNOT RUN BY ITSELF """
    caplog.set_level(logging.DEBUG)
    srch = search_unique_project(ADMIN_USER_ID, "Test Create Update")
    prj_id = srch.projid  # <- need the project from first test
    # Do preparation
    params = ImportReq(source_path=str(EMPTY_TSV_IN_UPD_DIR),
                       skip_loaded_files=True,
                       skip_existing_objects=True)
    with FileImport(prj_id, params) as sce:
        rsp: ImportRsp = sce.run(ADMIN_USER_ID)
    job = wait_for_stable(rsp.job_id)
    check_job_errors(job)
    errs = get_job_errors(job)
    found_err = False
    for an_err in errs:
        if "new TSV file(s) are not compliant" in an_err:
            found_err = True
    assert found_err
Ejemplo n.º 14
0
def test_import_too_many_custom_columns(config, database, caplog):
    """ The TSV contains too many custom columns.
        Not a realistic case, but it simulates what happens if importing into a project with
         mappings """
    caplog.set_level(logging.DEBUG)
    prj_id = create_project(ADMIN_USER_ID, "Test LS 6")

    params = ImportReq(source_path=str(ISSUES_DIR3))
    with FileImport(prj_id, params) as sce:
        rsp: ImportRsp = sce.run(ADMIN_USER_ID)
    job = wait_for_stable(rsp.job_id)
    check_job_errors(job)
    errors = get_job_errors(job)
    assert errors == [
        'Field acq_cus29, in file ecotaxa_m106_mn01_n3_sml.tsv, cannot be mapped. Too '
        'many custom fields, or bad type.',
        'Field acq_cus30, in file ecotaxa_m106_mn01_n3_sml.tsv, cannot be mapped. Too '
        'many custom fields, or bad type.',
        'Field acq_cus31, in file ecotaxa_m106_mn01_n3_sml.tsv, cannot be mapped. Too '
        'many custom fields, or bad type.'
    ]
Ejemplo n.º 15
0
def do_import_update(prj_id, caplog, classif, source=None):
    if source is None:
        source = str(UPDATE_DIR)
    params = ImportReq(skip_existing_objects=True,
                       update_mode=classif,
                       source_path=source)
    with FileImport(prj_id, params) as sce:
        rsp: ImportRsp = sce.run(ADMIN_USER_ID)
    job = wait_for_stable(rsp.job_id)

    assert job.state == DBJobStateEnum.Asking
    assert job.question == {
        "missing_users": ["admin4test", "elizandro rodriguez"],
        "missing_taxa": ["other", "ozzeur"]
    }

    reply = {
        "users": {
            'admin4test': 1,
            'elizandro rodriguez': 1
        },  # Map to admin
        "taxa": {
            'other': 99999,  # 'other<dead'
            'ozzeur': 85011  # 'other<living'
        }
    }
    caplog.clear()
    with JobCRUDService() as sce:
        sce.reply(ADMIN_USER_ID, rsp.job_id, reply)
    job = wait_for_stable(rsp.job_id)
    check_job_ok(job)
    # Check that all went fine
    for a_msg in caplog.records:
        assert a_msg.levelno != logging.ERROR, a_msg.getMessage()
    # #498: No extra parent should be created
    for a_msg in caplog.records:
        assert "++ ID" not in a_msg.getMessage()