def test_cv_file_writer_loop_and_read():
    """tutorial for cv dataset loop writer."""
    writer = FileWriter(CV2_FILE_NAME, FILES_NUM)
    data = get_data("../data/mindrecord/testImageNetData/")
    cv_schema_json = {"file_name": {"type": "string"},
                      "label": {"type": "int64"}, "data": {"type": "bytes"}}
    writer.add_schema(cv_schema_json, "img_schema")
    writer.add_index(["file_name", "label"])
    for row in data:
        writer.write_raw_data([row])
    writer.commit()

    reader = FileReader(CV2_FILE_NAME + "0")
    count = 0
    for index, x in enumerate(reader.get_next()):
        assert len(x) == 3
        count = count + 1
        logger.info("#item{}: {}".format(index, x))
    assert count == 10
    reader.close()

    paths = ["{}{}".format(CV2_FILE_NAME, str(x).rjust(1, '0'))
             for x in range(FILES_NUM)]
    for x in paths:
        os.remove("{}".format(x))
        os.remove("{}.db".format(x))
Exemple #2
0
def write_mindrecord_tutorial():
    writer = FileWriter(MINDRECORD_FILE_NAME)
    data = get_data("./ImageNetDataSimulation")
    schema_json = {
        "file_name": {
            "type": "string"
        },
        "label": {
            "type": "int64"
        },
        "data": {
            "type": "bytes"
        }
    }
    writer.add_schema(schema_json, "img_schema")
    writer.add_index(["file_name", "label"])
    writer.write_raw_data(data)
    writer.commit()

    reader = FileReader(MINDRECORD_FILE_NAME)
    count = 0
    for index, x in enumerate(reader.get_next()):
        assert len(x) == 3
        count = count + 1
        # print("#item {}: {}".format(index, x))
    assert count == 20
    reader.close()
def test_cv_file_writer_no_blob():
    """test cv file writer without blob data."""
    writer = FileWriter(CV_FILE_NAME, 1)
    data = get_data("../data/mindrecord/testImageNetData/")
    cv_schema_json = {
        "file_name": {
            "type": "string"
        },
        "label": {
            "type": "int64"
        }
    }
    writer.add_schema(cv_schema_json, "no_blob_schema")
    writer.add_index(["file_name", "label"])
    writer.write_raw_data(data)
    writer.commit()
    reader = FileReader(CV_FILE_NAME)
    count = 0
    for index, x in enumerate(reader.get_next()):
        count += 1
        assert len(x) == 2
        logger.info("#item{}: {}".format(index, x))
    assert count == 10
    reader.close()
    os.remove(CV_FILE_NAME)
    os.remove("{}.db".format(CV_FILE_NAME))
def test_cv_file_writer_without_data():
    """test cv file writer without data."""
    writer = FileWriter(CV_FILE_NAME, 1)
    cv_schema_json = {
        "file_name": {
            "type": "string"
        },
        "label": {
            "type": "int64"
        },
        "data": {
            "type": "bytes"
        }
    }
    writer.add_schema(cv_schema_json, "img_schema")
    writer.add_index(["file_name", "label"])
    writer.commit()
    reader = FileReader(CV_FILE_NAME)
    count = 0
    for index, x in enumerate(reader.get_next()):
        count = count + 1
        logger.info("#item{}: {}".format(index, x))
    assert count == 0
    reader.close()
    os.remove(CV_FILE_NAME)
    os.remove("{}.db".format(CV_FILE_NAME))
Exemple #5
0
def test_issue_34():
    """test file writer"""
    writer = FileWriter(CV_FILE_NAME)
    data = get_data("../data/mindrecord/testImageNetData/")
    cv_schema_json = {
        "file_name": {
            "type": "string"
        },
        "label": {
            "type": "number"
        },
        "data": {
            "type": "bytes"
        }
    }
    writer.add_schema(cv_schema_json, "cv_schema")
    writer.add_index(["file_name", "label"])
    writer.write_raw_data(data)
    writer.commit()

    reader = FileReader(CV_FILE_NAME)
    i = 0
    for index, x in enumerate(reader.get_next()):
        logger.info("#item{}: {}".format(index, x))
        i = i + 1
    logger.info("count: {}".format(i))
    reader.close()
    os.remove(CV_FILE_NAME)
    os.remove("{}.db".format(CV_FILE_NAME))
Exemple #6
0
def test_shard_4_raw_data_1():
    """test file writer when shard_num equals 4 and number of sample equals 1."""
    writer = FileWriter(CV_FILE_NAME, FILES_NUM)
    schema_json = {
        "file_name": {
            "type": "string"
        },
        "label": {
            "type": "number"
        }
    }
    writer.add_schema(schema_json, "img_schema")
    writer.add_index(["label"])
    data = [{"file_name": "001.jpg", "label": 1}]
    writer.write_raw_data(data)
    writer.commit()

    reader = FileReader(CV_FILE_NAME + "0")
    count = 0
    for index, x in enumerate(reader.get_next()):
        assert len(x) == 2
        count = count + 1
        logger.info("#item{}: {}".format(index, x))
    assert count == 1
    reader.close()
    paths = [
        "{}{}".format(CV_FILE_NAME,
                      str(x).rjust(1, '0')) for x in range(FILES_NUM)
    ]
    for x in paths:
        os.remove("{}".format(x))
        os.remove("{}.db".format(x))
Exemple #7
0
def test_cv_file_writer_shard_num_10():
    """test cv dataset writer when shard_num equals 10."""
    shard_num = 10
    writer = FileWriter(CV_FILE_NAME, shard_num)
    data = get_data("../data/mindrecord/testImageNetData/")
    cv_schema_json = {
        "file_name": {
            "type": "string"
        },
        "label": {
            "type": "number"
        },
        "data": {
            "type": "bytes"
        }
    }
    writer.add_schema(cv_schema_json, "img_schema")
    writer.add_index(["file_name", "label"])
    writer.write_raw_data(data)
    writer.commit()
    reader = FileReader(CV_FILE_NAME + "0")
    for index, x in enumerate(reader.get_next()):
        logger.info("#item{}: {}".format(index, x))
    reader.close()

    paths = [
        "{}{}".format(CV_FILE_NAME,
                      str(x).rjust(1, '0')) for x in range(shard_num)
    ]
    for item in paths:
        os.remove("{}".format(item))
        os.remove("{}.db".format(item))
Exemple #8
0
def skip_test_issue_155():
    """test file writer loop."""
    writer = FileWriter(CV_FILE_NAME, FILES_NUM)
    data = get_data("../data/mindrecord/testImageNetData/")
    cv_schema_json = {
        "file_name": {
            "type": "string"
        },
        "label": {
            "type": "number"
        },
        "data": {
            "type": "bytes"
        }
    }
    writer.add_schema(cv_schema_json, "img_schema")
    writer.add_index(["file_name"])
    for _ in range(1000):
        writer.write_raw_data(data)
    writer.commit()
    reader = FileReader(CV_FILE_NAME + "0")
    count = 0
    for _ in reader.get_next():
        count += 1
    assert count == 10000, "Failed to read multiple writed data."
Exemple #9
0
def test_issue_39():
    """test cv dataset writer when schema fields' datatype does not match raw data."""
    writer = FileWriter(CV_FILE_NAME, 1)
    data = get_data("../data/mindrecord/testImageNetData/")
    cv_schema_json = {
        "file_name": {
            "type": "number"
        },
        "label": {
            "type": "number"
        },
        "data": {
            "type": "bytes"
        }
    }
    writer.add_schema(cv_schema_json, "img_schema")
    writer.add_index(["file_name", "label"])
    writer.write_raw_data(data)
    writer.commit()

    reader = FileReader(CV_FILE_NAME)
    index = 0
    for _ in reader.get_next():
        index += 1
    assert index == 0, "failed on reading data!"
    reader.close()
    os.remove("{}".format(CV_FILE_NAME))
    os.remove("{}.db".format(CV_FILE_NAME))
Exemple #10
0
def test_issue_73():
    """test file reader by column name."""
    writer = FileWriter(MKV_FILE_NAME, FILES_NUM)
    data = get_mkv_data("../data/mindrecord/testVehPerData/")
    mkv_schema_json = {
        "file_name": {
            "type": "string"
        },
        "id": {
            "type": "number"
        },
        "prelabel": {
            "type": "string"
        },
        "data": {
            "type": "bytes"
        }
    }
    writer.add_schema(mkv_schema_json, "mkv_schema")
    writer.add_index(["file_name", "prelabel"])
    writer.write_raw_data(data)
    writer.commit()

    reader = FileReader(MKV_FILE_NAME + "1", 4, ["file_name"])
    for index, x in enumerate(reader.get_next()):
        logger.info("#item{}: {}".format(index, x))
    reader.close()

    paths = [
        "{}{}".format(MKV_FILE_NAME,
                      str(x).rjust(1, '0')) for x in range(FILES_NUM)
    ]
    for x in paths:
        os.remove("{}".format(x))
        os.remove("{}.db".format(x))
Exemple #11
0
def test_write_raw_data_with_empty_list():
    """test write raw data with empty list."""
    writer = FileWriter(CV_FILE_NAME, FILES_NUM)
    cv_schema_json = {
        "file_name": {
            "type": "string"
        },
        "label": {
            "type": "number"
        },
        "data": {
            "type": "bytes"
        }
    }
    writer.add_schema(cv_schema_json, "img_schema")
    writer.add_index(["file_name", "label"])
    ret = writer.write_raw_data([])
    assert ret == SUCCESS
    writer.commit()

    reader = FileReader(CV_FILE_NAME + "0")
    for index, x in enumerate(reader.get_next()):
        logger.info("#item{}: {}".format(index, x))
    reader.close()

    paths = [
        "{}{}".format(CV_FILE_NAME,
                      str(x).rjust(1, '0')) for x in range(FILES_NUM)
    ]
    for x in paths:
        os.remove("{}".format(x))
        os.remove("{}.db".format(x))
def test_cv_file_writer_no_raw():
    """test cv file writer without raw data."""
    writer = FileWriter(NLP_FILE_NAME)
    data = list(
        get_nlp_data("../data/mindrecord/testAclImdbData/pos",
                     "../data/mindrecord/testAclImdbData/vocab.txt", 10))
    nlp_schema_json = {
        "input_ids": {
            "type": "int64",
            "shape": [1, -1]
        },
        "input_mask": {
            "type": "int64",
            "shape": [1, -1]
        },
        "segment_ids": {
            "type": "int64",
            "shape": [1, -1]
        }
    }
    writer.add_schema(nlp_schema_json, "no_raw_schema")
    writer.write_raw_data(data)
    writer.commit()
    reader = FileReader(NLP_FILE_NAME)
    count = 0
    for index, x in enumerate(reader.get_next()):
        count += 1
        assert len(x) == 3
        logger.info("#item{}: {}".format(index, x))
    assert count == 10
    reader.close()
    os.remove(NLP_FILE_NAME)
    os.remove("{}.db".format(NLP_FILE_NAME))
Exemple #13
0
def test_cv_file_writer_default_shard_num():
    """test cv dataset writer when shard_num is default value."""
    writer = FileWriter(CV_FILE_NAME)
    data = get_data("../data/mindrecord/testImageNetData/")
    cv_schema_json = {
        "file_name": {
            "type": "string"
        },
        "label": {
            "type": "number"
        },
        "data": {
            "type": "bytes"
        }
    }
    writer.add_schema(cv_schema_json, "img_schema")
    writer.add_index(["file_name", "label"])
    writer.write_raw_data(data)
    writer.commit()
    reader = FileReader(CV_FILE_NAME)
    for index, x in enumerate(reader.get_next()):
        logger.info("#item{}: {}".format(index, x))
    reader.close()

    os.remove("{}".format(CV_FILE_NAME))
    os.remove("{}.db".format(CV_FILE_NAME))
Exemple #14
0
def test_file_writer_raw_data_038():
    """test write raw data without verify."""
    shard_num = 11
    writer = FileWriter("test_file_writer_raw_data_", shard_num)
    data_raw = get_data("../data/mindrecord/testImageNetData/")
    schema_json = {"file_name": {"type": "string"}, "label": {"type": "number"},
                   "data": {"type": "bytes"}}
    writer.add_schema(schema_json, "img_schema")
    writer.add_index(["file_name"])
    for _ in range(shard_num):
        writer.write_raw_data(data_raw, False)
    writer.commit()

    file_name = ""
    if shard_num > 1:
        file_name = '99' if shard_num > 99 else str(shard_num - 1)
    reader = FileReader("test_file_writer_raw_data_" + file_name)
    i = 0
    for _, _ in enumerate(reader.get_next()):
        i = i + 1
    assert i == shard_num * 10
    reader.close()
    if shard_num == 1:
        os.remove("test_file_writer_raw_data_")
        os.remove("test_file_writer_raw_data_.db")
        return
    for x in range(shard_num):
        n = str(x)
        if shard_num > 10:
            n = '0' + str(x) if x < 10 else str(x)
        if os.path.exists("test_file_writer_raw_data_{}".format(n)):
            os.remove("test_file_writer_raw_data_{}".format(n))
        if os.path.exists("test_file_writer_raw_data_{}.db".format(n)):
            os.remove("test_file_writer_raw_data_{}.db".format(n))
Exemple #15
0
def test_cv_file_append_writer_absolute_path():
    """tutorial for cv dataset append writer."""
    writer = FileWriter(CV4_FILE_NAME, 4)
    data = get_data("../data/mindrecord/testImageNetData/")
    cv_schema_json = {"file_name": {"type": "string"},
                      "label": {"type": "int64"}, "data": {"type": "bytes"}}
    writer.add_schema(cv_schema_json, "img_schema")
    writer.add_index(["file_name", "label"])
    writer.write_raw_data(data[0:5])
    writer.commit()
    write_append = FileWriter.open_for_append(CV4_FILE_NAME + "0")
    write_append.write_raw_data(data[5:10])
    write_append.commit()
    reader = FileReader(CV4_FILE_NAME + "0")
    count = 0
    for index, x in enumerate(reader.get_next()):
        assert len(x) == 3
        count = count + 1
        logger.info("#item{}: {}".format(index, x))
    assert count == 10
    reader.close()

    paths = ["{}{}".format(CV4_FILE_NAME, str(x).rjust(1, '0'))
             for x in range(4)]
    for x in paths:
        os.remove("{}".format(x))
        os.remove("{}.db".format(x))
Exemple #16
0
def test_mnist_to_mindrecord_compare_data():
    """test transform mnist dataset to mindrecord and compare data."""
    mnist_transformer = MnistToMR(MNIST_DIR, FILE_NAME)
    mnist_transformer.transform()
    assert os.path.exists("mnist_train.mindrecord")
    assert os.path.exists("mnist_test.mindrecord")

    train_name, test_name = "mnist_train.mindrecord", "mnist_test.mindrecord"

    def _extract_images(filename, num_images):
        """Extract the images into a 4D tensor [image index, y, x, channels]."""
        with gzip.open(filename) as bytestream:
            bytestream.read(16)
            buf = bytestream.read(
                IMAGE_SIZE * IMAGE_SIZE * num_images * NUM_CHANNELS)
            data = np.frombuffer(buf, dtype=np.uint8)
            data = data.reshape(
                num_images, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS)
            return data

    def _extract_labels(filename, num_images):
        """Extract the labels into a vector of int64 label IDs."""
        with gzip.open(filename) as bytestream:
            bytestream.read(8)
            buf = bytestream.read(1 * num_images)
            labels = np.frombuffer(buf, dtype=np.uint8).astype(np.int64)
            return labels

    train_data_filename_ = os.path.join(MNIST_DIR,
                                        'train-images-idx3-ubyte.gz')
    train_labels_filename_ = os.path.join(MNIST_DIR,
                                          'train-labels-idx1-ubyte.gz')
    test_data_filename_ = os.path.join(MNIST_DIR,
                                       't10k-images-idx3-ubyte.gz')
    test_labels_filename_ = os.path.join(MNIST_DIR,
                                         't10k-labels-idx1-ubyte.gz')
    train_data = _extract_images(train_data_filename_, 60000)
    train_labels = _extract_labels(train_labels_filename_, 60000)
    test_data = _extract_images(test_data_filename_, 10000)
    test_labels = _extract_labels(test_labels_filename_, 10000)

    reader = FileReader(train_name)
    for x, data, label in zip(reader.get_next(), train_data, train_labels):
        _, img = cv2.imencode(".jpeg", data)
        assert np.array(x['data']) == img.tobytes()
        assert np.array(x['label']) == label
    reader.close()

    reader = FileReader(test_name)
    for x, data, label in zip(reader.get_next(), test_data, test_labels):
        _, img = cv2.imencode(".jpeg", data)
        assert np.array(x['data']) == img.tobytes()
        assert np.array(x['label']) == label
    reader.close()

    os.remove("{}".format("mnist_train.mindrecord"))
    os.remove("{}.db".format("mnist_train.mindrecord"))
    os.remove("{}".format("mnist_test.mindrecord"))
    os.remove("{}.db".format("mnist_test.mindrecord"))
Exemple #17
0
def test_lack_partition_and_db():
    """test file reader when mindrecord file does not exist."""
    with pytest.raises(MRMOpenError) as err:
        reader = FileReader('dummy.mindrecord')
        reader.close()
    assert '[MRMOpenError]: error_code: 1347690596, ' \
           'error_msg: MindRecord File could not open successfully.' \
           in str(err.value)
Exemple #18
0
def read(filename, fields_num=5):
    count = 0
    reader = FileReader(filename)
    for _, x in enumerate(reader.get_next()):
        assert len(x) == fields_num
        count = count + 1
        logger.info("data: {}".format(x))
    assert count == 5
    reader.close()
def test_cv_file_reader_file_list():
    """tutorial for cv file partial reader."""
    reader = FileReader([CV_FILE_NAME + str(x) for x in range(FILES_NUM)])
    count = 0
    for index, x in enumerate(reader.get_next()):
        assert len(x) == 3
        count = count + 1
        logger.info("#item{}: {}".format(index, x))
    assert count == 10
Exemple #20
0
def test_read_after_close(fixture_cv_file):
    """test file reader when close read."""
    create_cv_mindrecord(1)
    reader = FileReader(CV_FILE_NAME)
    reader.close()
    count = 0
    for index, x in enumerate(reader.get_next()):
        count = count + 1
        logger.info("#item{}: {}".format(index, x))
    assert count == 0
Exemple #21
0
def test_lack_db(fixture_cv_file):
    """test file reader when db file does not exist."""
    create_cv_mindrecord(1)
    os.remove("{}.db".format(CV_FILE_NAME))
    with pytest.raises(MRMOpenError) as err:
        reader = FileReader(CV_FILE_NAME)
        reader.close()
    assert '[MRMOpenError]: error_code: 1347690596, ' \
           'error_msg: MindRecord File could not open successfully.' \
           in str(err.value)
def test_nlp_file_reader_tutorial():
    """tutorial for nlp file reader."""
    reader = FileReader(NLP_FILE_NAME + "0")
    count = 0
    for index, x in enumerate(reader.get_next()):
        assert len(x) == 6
        count = count + 1
        logger.info("#item{}: {}".format(index, x))
    assert count == 10
    reader.close()
def read(filename):
    """test file reade"""
    count = 0
    reader = FileReader(filename)
    for _, x in enumerate(reader.get_next()):
        assert len(x) == 3
        count = count + 1
        if count == 1:
            logger.info("data: {}".format(x))
    assert count == 20
    reader.close()
def test_cv_file_reader_partial_tutorial():
    """tutorial for cv file partial reader."""
    reader = FileReader(CV_FILE_NAME + "0")
    count = 0
    for index, x in enumerate(reader.get_next()):
        assert len(x) == 3
        count = count + 1
        logger.info("#item{}: {}".format(index, x))
        if count == 5:
            reader.finish()
    assert count == 5
def use_filereader(mindrecord):
    start = time.time()
    columns_list = ["data", "label"]
    reader = FileReader(file_name=mindrecord,
                        num_consumer=4,
                        columns=columns_list)
    num_iter = 0
    for _, _ in enumerate(reader.get_next()):
        num_iter += 1
        print_log(num_iter)
    end = time.time()
    print("Read by FileReader - total rows: {}, cost time: {}s".format(num_iter, end - start))
Exemple #26
0
def test_lack_some_db(fixture_cv_file):
    """test file reader when some db does not exist."""
    create_cv_mindrecord(4)
    paths = [
        "{}{}".format(CV_FILE_NAME,
                      str(x).rjust(1, '0')) for x in range(FILES_NUM)
    ]
    os.remove("{}.db".format(paths[3]))
    with pytest.raises(MRMOpenError) as err:
        reader = FileReader(CV_FILE_NAME + "0")
        reader.close()
    assert '[MRMOpenError]: error_code: 1347690596, ' \
           'error_msg: MindRecord File could not open successfully.' \
           in str(err.value)
def test_write_read_process_with_define_index_field():
    mindrecord_file_name = "test.mindrecord"
    data = [{"file_name": "001.jpg", "label": 43, "score": 0.8, "mask": np.array([3, 6, 9], dtype=np.int64),
             "segments": np.array([[5.0, 1.6], [65.2, 8.3]], dtype=np.float32),
             "data": bytes("image bytes abc", encoding='UTF-8')},
            {"file_name": "002.jpg", "label": 91, "score": 5.4, "mask": np.array([1, 4, 7], dtype=np.int64),
             "segments": np.array([[5.1, 9.1], [2.0, 65.4]], dtype=np.float32),
             "data": bytes("image bytes def", encoding='UTF-8')},
            {"file_name": "003.jpg", "label": 61, "score": 6.4, "mask": np.array([7, 6, 3], dtype=np.int64),
             "segments": np.array([[0.0, 5.6], [3.0, 16.3]], dtype=np.float32),
             "data": bytes("image bytes ghi", encoding='UTF-8')},
            {"file_name": "004.jpg", "label": 29, "score": 8.1, "mask": np.array([2, 8, 0], dtype=np.int64),
             "segments": np.array([[5.9, 7.2], [4.0, 89.0]], dtype=np.float32),
             "data": bytes("image bytes jkl", encoding='UTF-8')},
            {"file_name": "005.jpg", "label": 78, "score": 7.7, "mask": np.array([3, 1, 2], dtype=np.int64),
             "segments": np.array([[0.6, 8.1], [5.3, 49.3]], dtype=np.float32),
             "data": bytes("image bytes mno", encoding='UTF-8')},
            {"file_name": "006.jpg", "label": 37, "score": 9.4, "mask": np.array([7, 6, 7], dtype=np.int64),
             "segments": np.array([[4.2, 6.3], [8.9, 81.8]], dtype=np.float32),
             "data": bytes("image bytes pqr", encoding='UTF-8')}
            ]
    writer = FileWriter(mindrecord_file_name)
    schema = {"file_name": {"type": "string"},
              "label": {"type": "int32"},
              "score": {"type": "float64"},
              "mask": {"type": "int64", "shape": [-1]},
              "segments": {"type": "float32", "shape": [2, 2]},
              "data": {"type": "bytes"}}
    writer.add_schema(schema, "data is so cool")
    writer.add_index(["label"])
    writer.write_raw_data(data)
    writer.commit()

    reader = FileReader(mindrecord_file_name)
    count = 0
    for index, x in enumerate(reader.get_next()):
        assert len(x) == 6
        for field in x:
            if isinstance(x[field], np.ndarray):
                assert (x[field] == data[count][field]).all()
            else:
                assert x[field] == data[count][field]
        count = count + 1
        logger.info("#item{}: {}".format(index, x))
    assert count == 6
    reader.close()

    os.remove("{}".format(mindrecord_file_name))
    os.remove("{}.db".format(mindrecord_file_name))
Exemple #28
0
def test_mkv_file_reader_with_negative_num_consumer():
    """test mkv file reader when the number of consumer is negative."""
    writer = FileWriter(MKV_FILE_NAME, FILES_NUM)
    data = get_mkv_data("../data/mindrecord/testVehPerData/")
    mkv_schema_json = {
        "file_name": {
            "type": "string"
        },
        "id": {
            "type": "number"
        },
        "prelabel": {
            "type": "string"
        },
        "data": {
            "type": "bytes"
        }
    }
    writer.add_schema(mkv_schema_json, "mkv_schema")
    writer.add_index(["file_name", "prelabel"])
    writer.write_raw_data(data)
    writer.commit()

    with pytest.raises(Exception) as e:
        FileReader(MKV_FILE_NAME + "1", -1)
    assert "Consumer number should between 1 and" in str(e.value)

    paths = [
        "{}{}".format(MKV_FILE_NAME,
                      str(x).rjust(1, '0')) for x in range(FILES_NUM)
    ]
    for x in paths:
        os.remove("{}".format(x))
        os.remove("{}.db".format(x))
def read(filename, columns, row_num):
    """test file reade"""
    if not pd:
        raise Exception(
            "Module pandas is not found, please use pip install it.")
    df = pd.read_csv(CSV_FILE)
    count = 0
    reader = FileReader(filename)
    for _, x in enumerate(reader.get_next()):
        for col in columns:
            assert x[col] == df[col].iloc[count]
        assert len(x) == len(columns)
        count = count + 1
        if count == 1:
            logger.info("data: {}".format(x))
    assert count == row_num
    reader.close()
Exemple #30
0
def test_invalid_db(fixture_cv_file):
    """test file reader when the content of db is illegal."""
    create_cv_mindrecord(1)
    os.remove("imagenet.mindrecord.db")
    with open('imagenet.mindrecord.db', 'w') as f:
        f.write('just for test')
    with pytest.raises(MRMOpenError) as err:
        FileReader('imagenet.mindrecord')
    assert '[MRMOpenError]: error_code: 1347690596, ' \
           'error_msg: MindRecord File could not open successfully.' \
           in str(err.value)