Beispiel #1
0
def test_add_schema_without_desc():
    """test schema add without desc."""
    header = ShardHeader()
    schema_json = {"id_001": {"type": "number"}, "name_002": {"type": "string"},
                   "data_003": {"type": "string"},
                   "label": {"type": "string"}, "key": {"type": "string"}}
    schema = header.build_schema(schema_json, ["data"])
    schema_id = header.add_schema(schema)  # add schema
    assert schema_id == 0
def test_nlp_file_writer():
    """test nlp file writer using shard api"""
    schema_json = {
        "id": {
            "type": "string"
        },
        "label": {
            "type": "number"
        },
        "rating": {
            "type": "number"
        },
        "input_ids": {
            "type": "array",
            "items": {
                "type": "number"
            }
        },
        "input_mask": {
            "type": "array",
            "items": {
                "type": "number"
            }
        },
        "segment_ids": {
            "type": "array",
            "items": {
                "type": "number"
            }
        }
    }
    data = list(
        get_nlp_data("../data/mindrecord/testAclImdbData/pos",
                     "../data/mindrecord/testAclImdbData/vocab.txt", 10))
    header = ShardHeader()
    schema = header.build_schema(schema_json, ["segment_ids"], "nlp_schema")
    schema_id = header.add_schema(schema)
    assert schema_id == 0, 'failed on adding schema'
    index_fields_list = ["id", "rating"]
    ret = header.add_index_fields(index_fields_list)
    assert ret == SUCCESS, 'failed on adding index fields.'
    writer = ShardWriter()
    paths = ["{}{}".format(NLP_FILE_NAME, x) for x in range(FILES_NUM)]
    ret = writer.open(paths)
    assert ret == SUCCESS, 'failed on opening files.'
    writer.set_header_size(1 << 14)
    writer.set_page_size(1 << 15)
    ret = writer.set_shard_header(header)
    assert ret == SUCCESS, 'failed on setting header.'
    ret = writer.write_raw_nlp_data({schema_id: data})
    assert ret == SUCCESS, 'failed on writing raw data.'
    ret = writer.commit()
    assert ret == SUCCESS, 'failed on committing.'
    generator = ShardIndexGenerator(os.path.realpath(paths[0]))
    generator.build()
    generator.write_to_db()
Beispiel #3
0
def test_add_empty_schema():
    """test schema add when schema is empty."""
    header = ShardHeader()
    schema = {}
    desc = "test_schema"
    with pytest.raises(Exception) as e:
        schema = header.build_schema(schema, ["data"], desc)
        schema_id = header.add_schema(schema)  # add schema
        assert schema_id == -1
    assert str(e.value) == "[MRMBuildSchemaError]: error_code: 1347690609, " \
                           "error_msg: Failed to build schema."
Beispiel #4
0
def test_add_empty_index():
    """test index add when index fields is empty string."""
    schema_json = {"file_name": {"type": "string"}, "label": {"type": "number"}}
    header = ShardHeader()
    schema = header.build_schema(schema_json, ["data"], "img")  # create schema
    header.add_schema(schema)  # add schema
    with pytest.raises(Exception, match="incompatible"):
        header.add_index_fields("")
Beispiel #5
0
def test_add_index_with_incorrect_field():
    """test index add with incorrect field(64)."""
    header = ShardHeader()
    mkv_schema_json = {"file_name": {"type": "string"},
                       "id": {"type": "number"}, "prelabel": {"type": "string"}}
    schema = header.build_schema(mkv_schema_json, ["data"], "mkv_schema")
    header.add_schema(schema)
    with pytest.raises(Exception, match="incompatible function arguments"):
        header.add_index_fields([(-1, "id")])
Beispiel #6
0
def test_mindrecord_add_index_016():
    """test index add when index fields are incorrect."""
    schema_json = {"id": {"type": "number"}, "name": {"type": "string"},
                   "label": {"type": "string"}, "key": {"type": "string"}}
    header = ShardHeader()
    schema = header.build_schema(schema_json, ["data"], "img")
    header.add_schema(schema)
    index_fields_list = [(0, "id")]
    with pytest.raises(Exception):
        header.add_index_fields(index_fields_list)
def test_mkv_file_writer():
    """test mkv file writer  using shard api"""
    data = get_mkv_data("../data/mindrecord/testVehPerData/")
    schema_json = {
        "file_name": {
            "type": "string"
        },
        "id": {
            "type": "number"
        },
        "prelabel": {
            "type": "string"
        }
    }
    header = ShardHeader()
    img_schema = header.build_schema(schema_json, ["data"], "img_schema")
    schema_id = header.add_schema(img_schema)
    assert schema_id == 0, 'failed on building schema.'
    index_fields_list = ["id", "file_name"]
    ret = header.add_index_fields(index_fields_list)
    assert ret == SUCCESS, 'failed on adding index fields.'

    writer = ShardWriter()
    paths = ["{}{}".format(MKV_FILE_NAME, x) for x in range(FILES_NUM)]
    ret = writer.open(paths)
    assert ret == SUCCESS, 'failed on opening files.'
    writer.set_header_size(1 << 24)
    writer.set_page_size(1 << 25)
    ret = writer.set_shard_header(header)
    assert ret == SUCCESS, 'failed on setting header.'
    ret = writer.write_raw_cv_data({schema_id: data})
    assert ret == SUCCESS, 'failed on writing raw data.'
    ret = writer.commit()
    assert ret == SUCCESS, 'failed on committing.'

    generator = ShardIndexGenerator(os.path.realpath(paths[0]))
    generator.build()
    generator.write_to_db()
Beispiel #8
0
def test_add_index_with_string_list():
    """test index add with list of string(64)."""
    header = ShardHeader()
    schema_json = {"id": {"type": "number"}, "name": {"type": "string"},
                   "label": {"type": "string"}, "key": {"type": "string"}}
    schema = header.build_schema(schema_json, ["key"], "schema_desc")
    header.add_schema(schema)
    ret = header.add_index_fields(["id", "label"])
    assert ret == SUCCESS
Beispiel #9
0
def test_file_writer_fail_add_index():
    """test file writer, read when failed on adding index."""
    data_raw = get_data("../data/mindrecord/testImageNetData/")
    schema_json = {
        "file_name": {
            "type": "string"
        },
        "label": {
            "type": "number"
        }
    }
    header = ShardHeader()
    schema = header.build_schema(schema_json, ["data"], "img")  # create schema
    schema_id = header.add_schema(schema)  # add schema
    with pytest.raises(TypeError, match="missing 1 "):
        ret = header.add_index_fields()
        assert ret == FAILED

    with pytest.raises(MRMAddIndexError):
        index_fields = []
        ret = header.add_index_fields(index_fields)
        assert ret == FAILED

    file_name = os.path.join(os.getcwd(),
                             "test_001.mindrecord")  # set output filename
    writer = ShardWriter()  # test_file_writer
    ret = writer.open([file_name])
    assert ret == SUCCESS, 'failed on opening files.'
    ret = writer.set_shard_header(header)  # write header
    assert ret == SUCCESS, 'failed on setting header.'
    ret = writer.write_raw_cv_data({schema_id: data_raw})
    assert ret == SUCCESS, 'failed on writing raw data.'
    ret = writer.commit()  # commit data
    assert ret == SUCCESS, "commit failed"
    # ShardIndexGenerator
    generator = ShardIndexGenerator(os.path.realpath(file_name))
    generator.build()
    generator.write_to_db()

    reader = ShardReader()
    ret = reader.open(file_name)
    reader.launch()
    index = 0
    _, blob_fields = reader.get_blob_fields()
    iterator = reader.get_next()
    while iterator:
        for blob, raw in iterator:
            raw[blob_fields[0]] = bytes(blob)
            logger.info("#item{}: {}".format(index, raw))
            index += 1
            iterator = reader.get_next()
    reader.finish()
    reader.close()

    os.remove("{}".format(file_name))
    os.remove("{}.db".format(file_name))
Beispiel #10
0
def test_mindrecord_add_index_011():
    """test index add"""
    schema_json = {"id": {"type": "number"}, "name": {"type": "string"},
                   "label": {"type": "string"}, "key": {"type": "string"}}
    header = ShardHeader()
    schema = header.build_schema(schema_json, ["data"], "img")  # create schema
    header.add_schema(schema)  # add schema
    index_fields_list = ["id", "name", "label", "key"]
    ret = header.add_index_fields(index_fields_list)
    assert ret == 0, 'failed on adding index fields.'
def test_mkv_file_writer_with_exactly_schema():
    """test mkv file writer using shard api"""
    header = ShardHeader()
    img_schema_json = {
        "annotation_name": {
            "type": "array",
            "items": {
                "type": "string"
            }
        },
        "annotation_pose": {
            "type": "array",
            "items": {
                "type": "string"
            }
        },
        "annotation_truncated": {
            "type": "array",
            "items": {
                "type": "string"
            }
        },
        "annotation_difficult": {
            "type": "array",
            "items": {
                "type": "string"
            }
        },
        "annotation_xmin": {
            "type": "array",
            "items": {
                "type": "number"
            }
        },
        "annotation_ymin": {
            "type": "array",
            "items": {
                "type": "number"
            }
        },
        "annotation_xmax": {
            "type": "array",
            "items": {
                "type": "number"
            }
        },
        "annotation_ymax": {
            "type": "array",
            "items": {
                "type": "number"
            }
        },
        "metadata_width": {
            "type": "number"
        },
        "metadata_height": {
            "type": "number"
        },
        "metadata_depth": {
            "type": "number"
        },
        "img_path": {
            "type": "string"
        },
        "annotation_path": {
            "type": "string"
        }
    }
    img_schema = header.build_schema(img_schema_json, ["data"], "image_schema")
    schema_id = header.add_schema(img_schema)
    assert schema_id == 0, 'failed on building schema.'

    writer = ShardWriter()
    paths = ["{}{}".format(MKV_FILE_NAME, x) for x in range(1)]
    ret = writer.open(paths)
    assert ret == SUCCESS, 'failed on opening files.'
    writer.set_header_size(1 << 24)
    writer.set_page_size(1 << 25)

    image_bytes = bytes("it's a image picutre", encoding="utf8")
    data = []
    data.append({
        "annotation_name": ["xxxxxxxxxx.jpg"],
        "annotation_pose": ["hahahahah"],
        "annotation_truncated": ["1"],
        "annotation_difficult": ["0"],
        "annotation_xmin": [100],
        "annotation_ymin": [200],
        "annotation_xmax": [300],
        "annotation_ymax": [400],
        "metadata_width": 333,
        "metadata_height": 222,
        "metadata_depth": 3,
        "img_path": "/tmp/",
        "annotation_path": "/tmp/annotation",
        "data": image_bytes
    })
    data.append({
        "annotation_name": ["xxxxxxxxxx.jpg"],
        "annotation_pose": ["hahahahah"],
        "annotation_truncated": ["1"],
        "annotation_difficult": ["0"],
        "annotation_xmin": [100],
        "annotation_ymin": [200],
        "annotation_xmax": [300],
        "annotation_ymax": [400],
        "metadata_width": 333,
        "metadata_height": 222,
        "metadata_depth": 3,
        "img_path": "/tmp/",
        "annotation_path": "/tmp/annotation",
        "data": image_bytes
    })
    ret = writer.set_shard_header(header)
    assert ret == SUCCESS, 'failed on setting header.'
    ret = writer.write_raw_cv_data({schema_id: data})
    assert ret == SUCCESS, 'failed on writing raw data.'
    ret = writer.commit()
    assert ret == SUCCESS, 'failed on committing.'

    generator = ShardIndexGenerator(os.path.realpath(paths[0]))
    generator.build()
    generator.write_to_db()