Ejemplo n.º 1
0
def get_image_size():
    for file in get_all_files(dictionary):
        image = Image.open(file).convert('L')
        changed = resizeimage.resize_cover(image, [300, 300])
        # plt.imshow(changed)
        # print(np.array(image).shape)
        changed.save(file)
def get_text_value_from_generated_images(generated_image_dir,
                                         dictionary_dir=None):
    dictionary_dir = dictionary_dir or os.path.join(
        root, 'tokenization', 'gerber_tokenization', 'data', 'dictionary')

    file_counterpart_value = {}

    for file in tqdm(get_all_files(generated_image_dir),
                     total=len(os.listdir(generated_image_dir))):
        try:
            # result = find_similarity_from_hash_dir(file, dictionary_dir)
            result = get_predicate_by_image(file)

            if result:

                left, upper, width, height = get_coordination_from_filename(
                    file)
                char = result

                if char == 'dot': char = '.'

                file_counterpart_value[(left, upper, width, height)] = char
        except OSError:
            pass

    return file_counterpart_value
def find_similarity_from_hash_dir(image, dir_name):
    all_files = ''.join(get_all_files(dir_name)).encode()
    hex_dig = hashlib.sha384(all_files)
    hash_fname = '{}.pickle'.format(hex_dig.hexdigest())

    if not os.path.exists(os.path.join('data', hash_fname)):
        hash_dict = build_dict_hash(dir_name)
        with open(hash_fname, 'wb') as f:
            pickle.dump(hash_dict, f)
    else:
        with open(hash_fname, 'rb') as f:
            hash_dict = pickle.load(f)

    return find_image_label(image, hash_dict)
def build_dict_hash(dict_path):
    char_dict_hash = {}
    for f in get_all_files(dict_path):
        f_path = f
        if f_path[-3:] == 'png':
            chr_image = Image.open(f_path)
            chr_image = chr_image.convert('L')
            represent_hash = dhash.dhash_int(chr_image)
            if not represent_hash:
                continue
            chr_name = f_path.split('/')[-1][:-4]
            if represent_hash not in char_dict_hash.keys():
                char_dict_hash[represent_hash] = chr_name
    return char_dict_hash
def find_similarity_from_tree_dir(image, dir_name, diff_threshold=5):
    all_files = ''.join(get_all_files(dir_name)).encode()
    hex_dig = hashlib.sha384(all_files)
    tree_fname = '{}.pickle'.format(hex_dig.hexdigest())

    if not os.path.exists(os.path.join('data', tree_fname)):
        _chr_n, _hash_l, _bk_t = build_dict_tree(dir_name)
        with open(tree_fname, 'wb') as f:
            pickle.dump((_chr_n, _hash_l, _bk_t), f)
    else:
        with open(tree_fname, 'rb') as f:
            _chr_n, _hash_l, _bk_t = pickle.load(f)

    return judge_image_similarity(image, _chr_n, _hash_l, _bk_t,
                                  diff_threshold)
def build_dict_tree(dict_path):
    hash_list = []
    chr_name = []
    bk_tree = None
    for f in get_all_files(dict_path):
        f_path = f
        if f_path[-3:] == 'png':
            chr_image = Image.open(f_path)
            chr_image = chr_image.convert('L')
            represent_hash = dhash.dhash_int(chr_image)
            if not represent_hash:
                continue
            hash_list.append(represent_hash)
            chr_name.append(f_path.split('/')[-1][:-4])

        bk_tree = pybktree.BKTree(pybktree.hamming_distance, hash_list)
    return chr_name, hash_list, bk_tree
Ejemplo n.º 7
0
if __name__ == '__main__':

    # order_path = '/Users/weilei/project/fastprint/08_data/2019新单/7月/(0.02平米) 07-29 2 4S8RY04IA0.zip'
    # unpack_zip_file_path = unzip_order_list(order_path)

    cust_code = '12 S7CT/'
    month = ''
    order_dir = '/Users/weilei/project/fastprint/08_data/2019新单/' + cust_code + month
    unzip_dir = '/Users/weilei/project/fastprint/08_data/2019新单解压/'
    dest_path = '/Users/weilei/project/fastprint/08_data/2019新单按客户_文件类型分类/'
    dest_path_file_type_order_name = '/Users/weilei/project/fastprint/08_data/2019新单按文件类型_客户分类/'

    unzip_zip_file_path = '/Users/weilei/project/fastprint/08_data/2019新单无法解压的订单/' + cust_code + month

    order_zip_list = file_utilities.get_all_files(order_dir)

    for order_zip_path in order_zip_list:

        if order_zip_path.endswith('.DS_Store'):
            continue

        if file_utilities.is_zip_file(order_zip_path) is False:
            print('error: ' + order_zip_path + ' is not a zip file. ')
            continue

        unzip_foler_path = unzip_order_list(order_zip_path, unzip_dir)

        if unzip_foler_path == '':
            print(
                'error: Can not unzip current zip file, folder is emppty. order_zip_path is '
Ejemplo n.º 8
0

def remove_invisible_in_html(html_content):
    return ''.join(x for x in html_content if x.isprintable())


def get_tokenization_from_html(filename):
    encoding = get_file_encoding(filename)
    soup = BeautifulSoup(open(filename, encoding=encoding), 'html.parser')
    lines = []
    content = soup.text
    content = remove_comment_in_html(content)

    for i, line in enumerate(content.split('\n')):
        if line.strip():
            line = remove_invisible_in_html(line)
            lines.append((0, i, line))

    return lines


if __name__ == '__main__':
    from utilities.file_utilities import get_all_files

    with open('extract-htlm-content-result.txt', 'w', encoding='utf-8') as f:
        for file in get_all_files('tests'):
            for line in get_tokenization_from_html(file):
                f.write(str(line) + '\n')


    order_dir = '/Users/weilei/project/fastprint/08_data/2019新单/'
    unzip_dir = '/Users/weilei/project/fastprint/08_data/2019新单解压/'
    dest_path = '/Users/weilei/project/fastprint/08_data/2019新单按客户_文件类型分类/'
    dest_path_file_type_order_name = '/Users/weilei/project/fastprint/08_data/2019新单按文件类型_客户分类/'

    unzip_zip_file_path = '/Users/weilei/project/fastprint/08_data/2019新单无法解压的订单/'

    cust_code_list = os.listdir(order_dir)
    for cust_code in cust_code_list:
        if cust_code.endswith('.DS_Store'):
            continue

        cust_order_dir = order_dir + cust_code
        number = 0
        all_order_list = file_utilities.get_all_files(cust_order_dir)

        for order_name in all_order_list:
            if order_name.lower().endswith('.zip'):
                number = number + 1
        print(cust_code + ' order num is ' + str(number))

    dest_path = '/Users/weilei/project/fastprint/08_data/2019新单按客户_文件类型分类/'

    order_folder_list = os.listdir(dest_path)

    for order_folder in order_folder_list:
        if cust_code.endswith('.DS_Store'):
            continue

        order_file_list = file_utilities.get_all_files(dest_path +
        except OSError:
            pass

    return file_counterpart_value


if __name__ == '__main__':

    import os
    import gerber
    from constants.path_manager import root
    from gerber.render import GerberCairoContext

    for i, file in enumerate(  # 452488-1.0DrillDrawing.gdo
            get_all_files(
                os.path.join(
                    root,
                    'tokenization/gerber_tokenization/data/452599.gdo'))):
        another_gerber = file  # another_gerber file path

        print('jhhhhh', another_gerber)
        suffix = another_gerber.split('/')[-1].split('.')[
            0]  # suffix=drill  format(suffix)=drill
        generate_all_group_primitives(another_gerber,
                                      './group-connected-{}'.format(suffix))
    '''
    gm2_dir = os.path.join(root, 'tokenization/gerber_tokenization/data/TZ7.820.1780A.GM2')
    gerber_obj = gerber.read(gm2_dir)
    ctx = GerberCairoContext()
    gerber_obj.render(ctx)
    ctx.dump('test-gerber-{}.png'.format(1))
    print('generate end!')
Ejemplo n.º 11
0
import os


class GImage:
    def __init__(self, filename):
        self.filename = filename
        self.image = Image.open(filename)

    def __hash__(self):
        return dhash.dhash_int(self.image)

    def __eq__(self, other):
        return dhash.dhash_int(self.image) - dhash.dhash_int(other.image) < 30


all_files = list(
    get_all_files(
        '/Users/mqgao/PycharmProjects/auto-pcb-ii/tokenization/gerber_tokenization/group-connected-old'
    ))
ic(len(all_files))
all_unique_files = set(GImage(f) for f in all_files if f.endswith('.png'))

ic(len(all_unique_files))

for file in all_unique_files:
    name = file.filename
    dst = os.path.join(root, 'tokenization', 'gerber_tokenization', 'data',
                       'unlabelled',
                       name.split('/')[-1])
    shutil.copyfile(file.filename, dst)
assert len(get_numerics_str('tests-left_upper-N#N1dot0N#N-N#N0dot11N#N-width-N#N1dot013N#N-height-N#Nminus2dot0N#N.png'
                            )
           ) == 4
assert get_coordination_from_filename('N#Nminus0dot123N#N') == [-0.123]
assert get_coordination_from_filename('tests-left_upper-N#N1dot0N#N-N#N0dot11N#N-width-N#N1dot013N#N-height-N#Nminus2dot'
                                      '0N#N.png') == [1.0, 0.11, 1.013, -2.0]
"""

if __name__ == '__main__':
    from constants.path_manager import root
    from gerber.render import GerberCairoContext
    from gerber_handlers import gerber_path_constants

    r_path = gerber_path_constants.CLIENT_6V29U2G7A0
    for i, file in enumerate(
            get_all_files(os.path.join(r_path, 'cs'))):
        another_gerber = file  # another_gerber file path
        suffix = another_gerber.split('/')[-1].split('.')[0]  # suffix=drill  format(suffix)=drill
        # './group-connected-{}'.format(suffix)=./group-connected-dirll
        # try:
        generate_all_group_primitives(another_gerber, './group-connected-{}'.format(suffix), 1)

        # generate_all_group_primitives(another_gerber, format(suffix))
    # another_gerber = '/Users/mqgao/PycharmProjects/auto-pcb-ii/tokenization/gerber_tokenization/data/dirll.art'
    # except Exception as e:
    # pass
    '''
    root_dir = os.path.join(root, 'tokenization/gerber_tokenization/data/art/')
    dir_list = os.listdir(root_dir)
    for i in range(0, len(dir_list)):
        img_path = os.path.join(root_dir, dir_list[i])