Python FileWriter.set_header_size 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: mindspore.mindrecord

클래스/타입: FileWriter

메소드/함수: set_header_size

hotexamples.com에서의 예제들: 4

Python FileWriter.set_header_size - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 mindspore.mindrecord.FileWriter.set_header_size에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

FileWriter(30)

add_index(30)

add_schema(30)

commit(16)

write_raw_data(11)

set_page_size(5)

set_header_size(4)

open_for_append(3)

open_and_set_header(2)

예제 #1

파일 보기

    def init_writer(mr_schema):
        """
        init writer
        """
        print("Init writer  ...")
        mr_writer = FileWriter(args.mindrecord_file,
                               args.mindrecord_partitions)

        # set the header size
        if args.mindrecord_header_size_by_bit != 24:
            header_size = 1 << args.mindrecord_header_size_by_bit
            mr_writer.set_header_size(header_size)

        # set the page size
        if args.mindrecord_page_size_by_bit != 25:
            page_size = 1 << args.mindrecord_page_size_by_bit
            mr_writer.set_page_size(page_size)

        # create the schema
        mr_writer.add_schema(mr_schema, "mindrecord_graph_schema")

        # open file and set header
        mr_writer.open_and_set_header()

        return mr_writer

예제 #2

파일 보기

def add_and_remove_nlp_file():
    """add/remove nlp file"""
    paths = ["{}{}".format(NLP_FILE_NAME, str(x).rjust(1, '0'))
             for x in range(FILES_NUM)]
    for x in paths:
        if os.path.exists("{}".format(x)):
            os.remove("{}".format(x))
        if os.path.exists("{}.db".format(x)):
            os.remove("{}.db".format(x))
    writer = FileWriter(NLP_FILE_NAME, FILES_NUM)
    data = [x for x in get_nlp_data(NLP_FILE_POS, NLP_FILE_VOCAB, 10)]
    nlp_schema_json = {"id": {"type": "string"}, "label": {"type": "int32"},
                       "rating": {"type": "float32"},
                       "input_ids": {"type": "int64",
                                     "shape": [-1]},
                       "input_mask": {"type": "int64",
                                      "shape": [1, -1]},
                       "segment_ids": {"type": "int64",
                                       "shape": [2, -1]}
                       }
    writer.set_header_size(1 << 14)
    writer.set_page_size(1 << 15)
    writer.add_schema(nlp_schema_json, "nlp_schema")
    writer.add_index(["id", "rating"])
    writer.write_raw_data(data)
    writer.commit()
    yield "yield_nlp_data"
    for x in paths:
        os.remove("{}".format(x))
        os.remove("{}.db".format(x))

예제 #3

파일 보기

def test_issue_84():
    """test file reader when db does not match."""
    writer = FileWriter(CV_FILE_NAME, FILES_NUM)
    data = get_data("../data/mindrecord/testImageNetData/")
    cv_schema_json = {"file_name": {"type": "string"},
                      "label": {"type": "number"}, "data": {"type": "bytes"}}
    writer.add_schema(cv_schema_json, "img_schema")
    writer.add_index(["file_name", "label"])
    writer.write_raw_data(data)
    writer.commit()

    writer = FileWriter(NLP_FILE_NAME, FILES_NUM)
    data = list(get_nlp_data("../data/mindrecord/testAclImdbData/pos",
                             "../data/mindrecord/testAclImdbData/vocab.txt",
                             10))
    nlp_schema_json = {"id": {"type": "string"}, "label": {"type": "number"},
                       "rating": {"type": "number"},
                       "input_ids": {"type": "array",
                                     "items": {"type": "number"}},
                       "input_mask": {"type": "array",
                                      "items": {"type": "number"}},
                       "segment_ids": {"type": "array",
                                       "items": {"type": "number"}}
                       }
    writer.set_header_size(1 << 14)
    writer.set_page_size(1 << 15)
    writer.add_schema(nlp_schema_json, "nlp_schema")
    writer.add_index(["id", "rating"])
    writer.write_raw_data(data)
    writer.commit()

    reader = ShardReader()
    os.rename("imagenet.mindrecord1.db", "imagenet.mindrecord1.db.bk")
    os.rename("aclImdb.mindrecord1.db", "imagenet.mindrecord1.db")
    file_name = os.path.join(os.getcwd(), "imagenet.mindrecord1")
    with pytest.raises(Exception) as e:
        reader.open(file_name)
    assert str(e.value) == "[MRMOpenError]: error_code: 1347690596, " \
                           "error_msg: " \
                           "MindRecord File could not open successfully."

    os.rename("imagenet.mindrecord1.db", "aclImdb.mindrecord1.db")
    paths = ["{}{}".format(NLP_FILE_NAME, str(x).rjust(1, '0'))
             for x in range(FILES_NUM)]
    for item in paths:
        os.remove("{}".format(item))
        os.remove("{}.db".format(item))

    os.rename("imagenet.mindrecord1.db.bk", "imagenet.mindrecord1.db")
    paths = ["{}{}".format(CV_FILE_NAME, str(x).rjust(1, '0'))
             for x in range(FILES_NUM)]
    for item in paths:
        os.remove("{}".format(item))
        os.remove("{}.db".format(item))

예제 #4

파일 보기

파일: writer.py 프로젝트: tuq820/mindspore

    num_tasks = mr_api.mindrecord_task_number()

    print("Write mindrecord ...")

    mindrecord_dict_data = mr_api.mindrecord_dict_data

    # get number of files
    writer = FileWriter(args.mindrecord_file, args.mindrecord_partitions)

    start_time = time.time()

    # set the header size
    try:
        header_size = mr_api.mindrecord_header_size
        writer.set_header_size(header_size)
    except AttributeError:
        print("Default header size: {}".format(1 << 24))

    # set the page size
    try:
        page_size = mr_api.mindrecord_page_size
        writer.set_page_size(page_size)
    except AttributeError:
        print("Default page size: {}".format(1 << 25))

    # get schema
    try:
        mindrecord_schema = mr_api.mindrecord_schema
    except AttributeError:
        raise RuntimeError("mindrecord_schema is not defined in mr_api.py.")