예제 #1
0
def extract_maru_tenten_from_etl6_7(file_path, multiply, dict):
    file_size = os.stat(file_path).st_size
    skip = 0

    with open(file_path, 'rb') as f:
        while skip * 2052 < file_size:
            f.seek(skip * 2052)
            s = f.read(2052)
            r = struct.unpack('>H2sH6BI4H4B4x2016s4x', s)

            try:
                iF = np.asarray(Image.frombytes('F', (64, 63), r[18], 'bit',
                                                4))
                label = normalize_char(jis0201_to_unicode(r[3]))

                if label == '゚' or label == '゙':
                    iFNew = iF * multiply
                    if dict.get(label) is None:
                        dict[label] = []
                    dict[label].append(iFNew)

            except KeyError as error:
                skip += 1
                continue

            skip += 1
    return dict
예제 #2
0
def etl3_loader(file_path):
    """Load an ETL1 file, and return numpy image file and labels

    @NOTE: the image has limited pixel ranges, to view an image, multiply it
    with 30.

    # Arguments
        file_path [str]: the path to ETL3 file

    # Returns
        [numpy array]: image X, in numpy array form
        [list of str]: the list of coressponding labels
    """
    file_size = os.stat(file_path).st_size
    skip = 0
    uniqueLabel = {}

    X, y = [], []
    each_file = bitstring.ConstBitStream(filename=file_path)
    while skip * 6 * 3936 < file_size:
        each_file.pos = skip * 6 * 3936
        r = each_file.readlist(
            '2*uint:36,uint:8,pad:28,uint:8,pad:28,4*uint:6,pad:12,'
            '15*uint:36,pad:1008,bytes:21888')
        iF = Image.frombytes('F', (r[18], r[19]), r[-1], 'bit', 4)
        X.append(np.asarray(iF))

        label = normalize_char(str(jis0201_to_unicode(r[2])))
        y.append(normalize_char(str(jis0201_to_unicode(r[2]))))

        if uniqueLabel.get(label) is None:
            uniqueLabel[label] = 1
        else:
            uniqueLabel[label] = uniqueLabel[label] + 1

        skip += 1

    print(uniqueLabel)

    return np.asarray(X, dtype=np.uint8), y
예제 #3
0
def etl7_loader(file_path, verbose=0):
    """Load an ETL1 file, and return numpy image file and labels

    @NOTE: the image has limited pixel ranges, to view an image, multiply it
    with 16.

    # Arguments
        file_path [str]: the path to ETL1 file

    # Returns
        [numpy array]: image X, in numpy array form
        [list of str]: the list of corresponding labels
    """
    X, y = [], []
    file_size = os.stat(file_path).st_size
    skip = 0
    uniqueLabel = {}

    with open(file_path, 'rb') as f:
        while skip * 2052 < file_size:
            f.seek(skip * 2052)
            s = f.read(2052)
            r = struct.unpack('>H2sH6BI4H4B4x2016s4x', s)

            try:
                iF = np.asarray(Image.frombytes('F', (64, 63), r[18], 'bit',
                                                4))
                label = normalize_char(jis0201_to_unicode(r[3]))
                X.append(iF)
                y.append(label)

                if uniqueLabel.get(label) is None:
                    uniqueLabel[label] = 1
                else:
                    uniqueLabel[label] = uniqueLabel[label] + 1

            except KeyError as error:
                if verbose > 0:
                    print(':WARNING: {}'.format(error))
                skip += 1
                continue

            skip += 1
    print(uniqueLabel)
    return np.asarray(X, dtype=np.uint8), y
예제 #4
0
def etl6_loader(file_path, verbose=0):
    """Load an ETL6 file, and return numpy image file and labels

    ## NOTE: the image has limited pixel ranges, to view an image, multiply it
    with 28.

    # Arguments
        file_path [str]: the path to ETL1 file

    # Returns
        [numpy array]: image X, in numpy array form
        [list of str]: the list of corresponding labels
    """
    X, y = [], []
    file_size = os.stat(file_path).st_size
    skip = 0
    uniqueLabel = {}

    with open(file_path, 'rb') as f:
        while skip * 2052 < file_size:
            f.seek(skip * 2052)
            s = f.read(2052)
            r = struct.unpack('>H2sH6BI4H4B4x2016s4x', s)

            try:
                iF = np.asarray(Image.frombytes('F', (64, 63), r[18], 'bit',
                                                4))
                label = normalize_char(jis0201_to_unicode(r[3]))
                X.append(iF)
                y.append(label)

                # label = normalize_char(jis0201_to_unicode(r[2]))
                if uniqueLabel.get(label) is None:
                    uniqueLabel[label] = 1
                else:
                    uniqueLabel[label] = uniqueLabel[label] + 1

                if IS_DEBUG and (label == '゚' or label == '゙'):
                    iFNew = iF * 28

                    # filename = 'C:\\Users\\ABC\\Desktop\\deletetemp\\GenDataHWBB\\synthesizedKana\\'+'_'+label+'_'+str(skip)+'_01_test_ori'+'.png'
                    # cv2.imwrite(filename, iF)

                    filename = 'C:\\Users\\ABC\\Desktop\\deletetemp\\GenDataHWBB\\synthesizedKana\\' + '6_' + label + '_' + str(
                        skip) + '_02_test_grey' + '.png'
                    cv2.imwrite(filename, iFNew)

                    binary = iFNew < 60
                    binaryInt = binary.astype(int) * 255

                    filename = 'C:\\Users\\ABC\\Desktop\\deletetemp\\GenDataHWBB\\synthesizedKana\\' + '6_' + label + '_' + str(
                        skip) + '_03_test_binary_60' + '.png'
                    cv2.imwrite(filename, binaryInt)

                    binary2 = iFNew < 70
                    binaryInt2 = binary.astype(int) * 255

                    filename = 'C:\\Users\\ABC\\Desktop\\deletetemp\\GenDataHWBB\\synthesizedKana\\' + '6_' + label + '_' + str(
                        skip) + '_03_test_binary_70' + '.png'
                    cv2.imwrite(filename, binaryInt2)

            except KeyError as error:
                if verbose > 0:
                    print(':WARNING: {}'.format(error))
                skip += 1
                continue

            skip += 1
    print(uniqueLabel)
    return np.asarray(X, dtype=np.uint8), y