def _add_images_doc(self, path: str, import_path: str, url_prefix: str):
        """
        为文件夹下的图片添加信息字典文件(id为不含扩展的文件名)

        @param {str} path - 要处理的文件目录
        """
        _import_path = os.path.realpath(import_path)

        # 处理当前文件夹
        _file_list = FileTool.get_filelist(path,
                                           regex_str=r'^((?!\.json$).)*$',
                                           is_fullname=True)
        for _file in _file_list:
            _ext = FileTool.get_file_ext(_file)
            _json_file = _file[0:-len(_ext)] + 'json'
            if os.path.exists(_json_file):
                # 字典文件已存在,无需处理
                continue

            # 生成并写入字典
            _file_name = FileTool.get_file_name_no_ext(_file)
            _url = os.path.realpath(_file)[len(_import_path):].replace(
                '\\', '/').lstrip('/')
            _url = '%s/%s' % (url_prefix, _url)
            _image_doc = {'id': _file_name, 'url': _url, 'path': _file}
            _json_str = json.dumps(_image_doc, ensure_ascii=False)
            with open(_json_file, 'wb') as _fid:
                _fid.write(_json_str.encode(encoding='utf-8'))

        # 处理子文件夹
        _sub_dir_list = FileTool.get_dirlist(path)
        for _sub_dir in _sub_dir_list:
            self._add_images_doc(_sub_dir, import_path, url_prefix)
    def rename_file_to_num(cls, path: str, start_index: int = 1) -> int:
        """
        重命名文件为数字序号

        @param {str} path - 要处理的文件夹
        @param {int} start_index=1 - 开始序号

        @returns {int} - 返回当前序号
        """
        # 处理当前目录
        _start_index = start_index
        _path = os.path.realpath(path)
        _file_list = FileTool.get_filelist(_path, is_fullname=False)
        for _file in _file_list:
            _ext = FileTool.get_file_ext(_file)
            os.rename(
                os.path.join(_path, _file),
                os.path.join(
                    _path,
                    StringTool.fill_fix_string(str(_start_index), 10, '0') +
                    '.' + _ext))
            _start_index += 1

        # 处理子目录
        _sub_dir_list = FileTool.get_dirlist(_path)
        for _sub_dir in _sub_dir_list:
            _start_index = cls.rename_file_to_num(_sub_dir,
                                                  start_index=_start_index)

        # 返回当前序号值
        return _start_index
Ejemplo n.º 3
0
    def labelimg_rename_filename(cls, path: str, fix_len: int = 10):
        """
        重名名labelimg对应目录下的文件名(图片文件和标注文件同步修改)

        @param {str} path - 要修改文件名的路径
        @param {int} fix_len=10 - 文件名长度
        """
        _path = os.path.realpath(path)
        _files = FileTool.get_filelist(path=_path, is_fullname=False)
        _index = 1
        for _file in _files:
            _file_ext = FileTool.get_file_ext(_file)
            if _file_ext == 'xml':
                # 标签文件不处理
                continue

            _file_no_ext = FileTool.get_file_name_no_ext(_file)
            # 获取最新的文件名
            while True:
                _new_name = StringTool.fill_fix_string(str(_index),
                                                       fix_len,
                                                       '0',
                                                       left=True)
                _new_file = _new_name + '.' + _file_ext
                _index += 1
                if os.path.exists(os.path.join(path, _new_file)):
                    # 文件名已存在
                    _index += 1
                    continue

                # 文件名不存在,跳出循环
                break

            # 修改文件名
            os.rename(os.path.join(_path, _file),
                      os.path.join(_path, _new_file))
            if os.path.exists(os.path.join(_path, _file_no_ext + '.xml')):
                # 需要修改标签文件
                _xml_file = _new_name + '.xml'
                os.rename(os.path.join(_path, _file_no_ext + '.xml'),
                          os.path.join(_path, _xml_file))

                # 修改标签文件内容
                _tree = ET.parse(os.path.join(_path, _xml_file))
                _root = _tree.getroot()
                _root.find('filename').text = _new_file
                _root.find('path').text = os.path.join(_path, _new_file)
                _tree.write(os.path.join(_path, _xml_file),
                            encoding='utf-8',
                            method="xml",
                            xml_declaration=None)
Ejemplo n.º 4
0
    def load_predef_by_file(self, file: str, encoding: str = 'utf-8'):
        """
        通过文件装载预定义模板

        @param {str} file - 要装载的文件名
        @param {str} encoding='utf-8' - 文件编码
        """
        _ext = FileTool.get_file_ext(file).lower()
        if _ext == 'json':
            with open(file, 'r', encoding=encoding) as _fp:
                _json = _fp.read()

            _config = json.loads(_json)
        else:
            raise AttributeError('Not support file type [%s]!' % _ext)

        # 装载到机器人
        self.load_predef_by_config(_config)
Ejemplo n.º 5
0
    def import_images(self, path: str, pipeline: str, encoding: str = 'utf-8'):
        """
        将指定路径的图片导入搜索库

        @param {str} path - 图片及信息字典所在路径,包含文件:
            图片文件,例如"abc.jpg"
            对应的json信息字典文件,例如"abc.json",文件内容按标准json字符串格式编写
            注:json中可以通过添加collection域指定该图片的所属分类集合名
        @param {str} pipeline - 处理管道标识
        @param {str} encoding='utf-8' - json文件的编码
        """
        _pipeline_obj = self._get_pipeline(pipeline)
        _file_list = FileTool.get_filelist(path,
                                           regex_str=r'^((?!\.json$).)*$',
                                           is_fullname=True)
        for _file in _file_list:
            try:
                # 获取图片的信息字典
                _ext = FileTool.get_file_ext(_file)
                _json_file = _file[0:-len(_ext)] + 'json'
                if not os.path.exists(_json_file):
                    self.log_debug(
                        'Json file not exists, not imported: [%s]!' % _file)
                    continue

                with open(_json_file, 'r', encoding=encoding) as _fid:
                    _image_doc = json.loads(_fid.read())

                # 导入图片
                _collection = _image_doc.get('collection', '')
                with open(_file, 'rb') as _fid:
                    self._image_to_search_db(_fid.read(),
                                             _image_doc,
                                             _pipeline_obj,
                                             init_collection=_collection)

                # 输出日志
                self.log_debug('image [%s] imported success' % _file)
            except:
                self.log_debug('image [%s] import error: %s' %
                               (_file, traceback.format_exc()))
Ejemplo n.º 6
0
    def _get_pic_file_list(cls, input_path: str) -> list:
        """
        获取制定目录下的所有图片文件清单

        @param {str} input_path - 要处理的目录

        @returns {list} - 文件清单列表
        """
        _list = []

        # 先获取当前目录下的所有xml文件
        for _file in FileTool.get_filelist(input_path, is_fullname=True):
            _ext = FileTool.get_file_ext(_file)
            if _ext.lower() in ('jpg', 'jpeg'):
                _list.append(_file)

        # 获取子目录
        for _dir in FileTool.get_dirlist(input_path):
            _temp_list = cls._get_pic_file_list(_dir)
            _list.extend(_temp_list)

        return _list
Ejemplo n.º 7
0
    def labelimg_del_not_rgb_pic(cls, path: str):
        """
        删除位深不为RGB三通道的图片
        (解决image_size must contain 3 elements[4]报错)

        @param {str} path - 要处理的路径
        """
        _path = os.path.realpath(path)
        # 遍历所有子目录
        _sub_dirs = FileTool.get_dirlist(path=_path, is_fullpath=True)
        for _dir in _sub_dirs:
            # 递归删除子目录的信息
            cls.labelimg_del_not_rgb_pic(_dir)

        # 检查自己目录下的图片
        _files = FileTool.get_filelist(path=_path, is_fullname=False)
        for _file in _files:
            _file_ext = FileTool.get_file_ext(_file)
            if _file_ext == 'xml':
                # 标签文件不处理
                continue

            # 打开图片判断位深
            _fp = open(os.path.join(_path, _file), 'rb')
            _img = Image.open(_fp)
            if _img.mode != 'RGB':
                # 需要删除掉
                _fp.close()
                _img_file = os.path.join(_path, _file)
                _xml_file = os.path.join(
                    _path,
                    FileTool.get_file_name_no_ext(_file) + '.xml')
                print('delete %s' % _img_file)
                FileTool.remove_file(_img_file)
                if os.path.exists(_xml_file):
                    FileTool.remove_file(_xml_file)
            else:
                _fp.close()
Ejemplo n.º 8
0
    def UploadFile(cls,
                   upload_type: str,
                   note: str,
                   interface_seq_id: str,
                   methods=['POST']):
        """
        上传文件(单文件上传)  (/api/Qa/UploadFiles/<upload_type>/<note>/<interface_seq_id>)

        @param {str} upload_type - 文件类型,必须在UploadFileConfig表中有配置
        @param {str} note - 文件注解
        @param {str} interface_seq_id - 客户端序号,客户端可传入该值来支持异步调用

        @return {str} - 返回回答的json字符串
            status : 处理状态
                00000 - 成功, 返回一条回答
                10001 - 没有指定上传文件
                2XXXX - 处理失败
            msg : 处理状态对应的描述
            answer_type: 'text'或'json',指示返回的答案是文本数组,还是一个json对象
            answers : 回答内容
            url : 文件上传后的url,含文件名和url路径
        """
        _ret_json = {
            'interface_seq_id': interface_seq_id,
            'status': '00000',
            'msg': 'success',
            'answer_type': 'text',
            'answers': [],
            'url': ''
        }
        _qa_loader = RunTool.get_global_var('QA_LOADER')
        try:
            if 'file' not in request.files or request.files[
                    'file'].filename == '':
                _ret_json['status'] = '10001'
                _ret_json['msg'] = 'No file upload!'
                return jsonify(_ret_json)

            # 获取上传类型配置
            _upload_config = UploadFileConfig.get_or_none(
                UploadFileConfig.upload_type == upload_type)
            if _upload_config is None:
                _ret_json['status'] = '10002'
                _ret_json['msg'] = 'upload type not exists!'
                return jsonify(_ret_json)

            # 检查文件大小
            if _upload_config.size > 0:
                if request.content_length > _upload_config.size * 1024 * 1024:
                    _ret_json['status'] = '10003'
                    _ret_json['msg'] = 'upload file size to large!'
                    return jsonify(_ret_json)

            # 检查文件类型是否支持
            _file = request.files['file']
            _old_filename = _file.filename
            _file_ext = FileTool.get_file_ext(_old_filename)
            _allow_ext = eval(_upload_config.exts.upper())
            if len(_allow_ext) > 0 and _file_ext.upper() not in _allow_ext:
                _ret_json['status'] = '10004'
                _ret_json['msg'] = 'Type [%s] not allow upload [.%s] file!' % (
                    upload_type, _file_ext)
                return jsonify(_ret_json)

            # 处理新的文件名
            def _replace_var_fun(m):
                _match_str = m.group(0)
                _value = None
                if _match_str.startswith('{$datetime='):
                    # 按格式化字符替换当前的时间
                    _key = _match_str[11:-2]
                    _value = datetime.datetime.now().strftime(_key)
                elif _match_str.startswith('{$uuid='):
                    # 指定uuid字符类型
                    _key = _match_str[7:-2]
                    str(uuid.uuid1())
                    _value = eval('str(uuid.uuid%s())' % _key)
                elif _match_str.startswith('{$random'):
                    # 产生指定两个整数之间的随机数,总位数与最大的数一致,左补零
                    _key = _match_str[8:-2]
                    _args = eval('(%s)' % _key)
                    _value = StringTool.fill_fix_string(
                        str(random.randint(*_args)), len(_args[1]), '0')
                elif _match_str.startswith('{$file_ext='):
                    # 原文件扩展名
                    _value = _file_ext
                elif _match_str.startswith('{$file_name='):
                    # 原文件指定位置的字符
                    _key = _match_str[12:-2]
                    _args = eval('(%s)' % _key)
                    if len(_args) > 1:
                        _value = _old_filename[_args[0]:_args[1]]
                    else:
                        _value = _old_filename[_args[0]:]

                if _value is not None:
                    return str(_value)
                else:
                    return _match_str

            if _upload_config.rename == '':
                _new_filename = _old_filename
            else:
                _new_filename = re.sub(r'\{\$.+?\$\}', _replace_var_fun,
                                       _upload_config.rename, re.M)

            # 处理保存文件路径和url路径
            if _upload_config.url != '':
                _ret_json['url'] = '%s/%s' % (_upload_config.url,
                                              _new_filename)

            _save_path = os.path.realpath(
                os.path.join(_qa_loader.execute_path, _upload_config.save_path,
                             _new_filename))

            # 创建文件目录
            FileTool.create_dir(os.path.split(_save_path)[0], exist_ok=True)

            # 保存文件
            _file.save(_save_path)

            # 上传后处理
            _after = eval(_upload_config.after)
            if len(_after) > 0:
                _after_fun = _qa_loader.plugins['upload_after'][_after[0]][
                    _after[1]]
                _status, _msg, _answers = _after_fun(upload_type, note,
                                                     _new_filename, _save_path,
                                                     _ret_json['url'],
                                                     **_after[2])
                _ret_json['status'] = _status
                _ret_json['msg'] = _msg
                if len(_answers) > 0 and type(_answers[0]) == dict:
                    _ret_json['answer_type'] = 'json'
                    _ret_json['answers'] = _answers[0]
                else:
                    _ret_json['answers'] = _answers
                if _ret_json['status'] != '00000':
                    # 后处理失败,删除文件
                    FileTool.remove_file(_save_path)
                    if _qa_loader.logger:
                        _qa_loader.logger.debug(
                            'remove upload file [dest:%s][source:%s] when after deal error[%s]: %s'
                            % (_new_filename, _old_filename, _status, _msg))
        except:
            if _qa_loader.logger:
                _qa_loader.logger.error('Exception: %s' %
                                        traceback.format_exc(),
                                        extra={'callFunLevel': 1})
            _ret_json = {
                'interface_seq_id': interface_seq_id,
                'status': '20001',
                'msg': '上传文件异常'
            }

        return jsonify(_ret_json)
Ejemplo n.º 9
0
    def labelimg_crop_pic_by_flags(cls,
                                   path: str,
                                   dest_path: str,
                                   copy_no_flag_pic: bool = True,
                                   with_sub_dir: bool = True,
                                   fix_len: int = 10):
        """
        根据标注进行图片截图处理

        @param {str} path - 需要处理的目录
        @param {str} dest_path - 截图图片存放目录
        @param {bool} copy_no_flag_pic=True - 直接复制没有标注的图片
        @param {bool} with_sub_dir=True - 是否按原目录结构存储图片
        @param {int} fix_len=10 - 图片重命名的文件名长度

        @returns {iter_list} - 通过yield返回的处理进度信息清单
            [总文件数int, 当前已处理文件数int, 是否成功]
        """
        try:
            # 获取所有要处理的图片清单
            _file_list = cls._get_pic_file_list(path)
            _total = len(_file_list)
            _deal_num = 0

            # 先返回进度情况
            if _total == 0:
                yield [_deal_num, _total, True]
                return

            # 创建复制文件夹
            FileTool.create_dir(dest_path, exist_ok=True)

            # 遍历处理
            _rename_index = 1
            _src_path = os.path.realpath(path)
            _dest_path = os.path.realpath(dest_path)
            for _file in _file_list:
                # 当前进展
                yield [_deal_num, _total, True]

                # 路径处理
                _file_path, _filename = os.path.split(_file)
                if with_sub_dir:
                    # 创建子目录
                    _dest_path = os.path.join(
                        os.path.realpath(dest_path),
                        os.path.realpath(_file_path)[len(_src_path):].strip(
                            '/\\'))
                    FileTool.create_dir(_dest_path, exist_ok=True)

                # 获取标注文件
                _ext = FileTool.get_file_ext(_filename)
                _xml_file = os.path.join(_file_path,
                                         _filename[0:-len(_ext)] + 'xml')

                if not os.path.exists(_xml_file):
                    # 标注文件不存在
                    if copy_no_flag_pic:
                        # 直接复制文件
                        shutil.copy(
                            _file,
                            os.path.join(
                                _dest_path,
                                StringTool.fill_fix_string(
                                    str(_rename_index), fix_len, '0') + '.' +
                                _ext))
                        _rename_index += 1

                    # 下一个
                    _deal_num += 1
                    continue

                # 将图片放入内存
                with open(_file, 'rb') as _fid:
                    _file_bytes = _fid.read()
                    _image = Image.open(BytesIO(_file_bytes))

                # 处理标注
                _tree = ET.parse(_xml_file)
                _root = _tree.getroot()

                for _member in _root.findall('object'):
                    # 逐个标注进行处理
                    _crop_image = _image.crop(
                        (int(_member[4][0].text), int(_member[4][1].text),
                         int(_member[4][2].text), int(_member[4][3].text)))

                    _crop_image.save(os.path.join(
                        _dest_path,
                        StringTool.fill_fix_string(str(_rename_index), fix_len,
                                                   '0') + '.' + _ext),
                                     format='JPEG')

                    _rename_index += 1

                # 下一个
                _deal_num += 1

            # 返回结果
            yield [_total, _total, True]
        except:
            print('labelimg_crop_pic_by_flags error: %s\r\n%s' %
                  (path, traceback.format_exc()))
            yield [-1, -1, False]
Ejemplo n.º 10
0
    def labelimg_pic_deal(cls, path: str):
        """
        TFRecord图片兼容处理
        1.删除位深不为RGB三通道的图片
        (解决image_size must contain 3 elements[4]报错)
        2.转换图片格式为jpg
        3.检查xml文件的文件名和路径是否正确

        @param {str} path - 要处理的路径
        """
        _path = os.path.realpath(path)
        # 遍历所有子目录
        _sub_dirs = FileTool.get_dirlist(path=_path, is_fullpath=True)
        for _dir in _sub_dirs:
            # 递归删除子目录的信息
            cls.labelimg_pic_deal(_dir)

        # 检查自己目录下的图片
        _files = FileTool.get_filelist(path=_path, is_fullname=False)
        for _file in _files:
            _file_ext = FileTool.get_file_ext(_file)
            if _file_ext == 'xml':
                # 标签文件不处理
                continue

            _img_file = os.path.join(_path, _file)
            _file_no_ext = FileTool.get_file_name_no_ext(_file)

            if _file_ext in ('png', 'gif'):
                # 转换图片格式
                _fp = open(_img_file, 'rb')
                _img = Image.open(_fp)
                _rgb_im = _img.convert('RGB')

                _rgb_im.save(os.path.join(_path, _file_no_ext + '.jpg'))
                _fp.close()

                # 删除原文件,修改xml中的文件名
                FileTool.remove_file(_img_file)
                _xml_file = os.path.join(_path, _file_no_ext + '.xml')
                if os.path.exists(_xml_file):
                    _tree = ET.parse(os.path.join(_path, _xml_file))
                    _root = _tree.getroot()
                    _root.find('filename').text = _file_no_ext + '.jpg'
                    _root.find('path').text = os.path.join(
                        _path, _file_no_ext + '.jpg')
                    _tree.write(os.path.join(_path, _xml_file),
                                encoding='utf-8',
                                method="xml",
                                xml_declaration=None)

                # 修改文件名变量
                _img_file = os.path.join(_path, _file_no_ext + '.jpg')

            # 打开图片判断位深
            _fp = open(_img_file, 'rb')
            _img = Image.open(_fp)
            if _img.mode != 'RGB':
                # 需要删除掉
                _fp.close()
                _xml_file = os.path.join(
                    _path,
                    FileTool.get_file_name_no_ext(_file) + '.xml')
                print('delete %s' % _img_file)
                FileTool.remove_file(_img_file)
                if os.path.exists(_xml_file):
                    FileTool.remove_file(_xml_file)
            else:
                _fp.close()

            # 检查xml文件
            _xml_file = os.path.join(_path, _file_no_ext + '.xml')
            if os.path.exists(_xml_file):
                _tree = ET.parse(os.path.join(_path, _xml_file))
                _root = _tree.getroot()
                if _root.find('filename'
                              ).text != _file_no_ext + '.jpg' or os.path.split(
                                  _root.find('path').text)[0] != _path:
                    _root.find('filename').text = _file_no_ext + '.jpg'
                    _root.find('path').text = os.path.join(
                        _path, _file_no_ext + '.jpg')
                    _tree.write(os.path.join(_path, _xml_file),
                                encoding='utf-8',
                                method="xml",
                                xml_declaration=None)
Ejemplo n.º 11
0
    def labelimg_copy_flags_pics(cls,
                                 input_path: str,
                                 output_path: str,
                                 use_mapping: bool = False,
                                 mapping: dict = None):
        """
        按类别复制图片和标注文件到指定目录

        @param {str} input_path - 图片路径
        @param {str} output_path - 输出路径
        @param {bool} use_mapping=False - 是否使用mapping处理映射
        @param {dict} mapping=None - mapping.josn字典

        @returns {iter_list} - 通过yield返回的处理进度信息清单
            [总文件数int, 当前已处理文件数int, 是否成功]
        """
        try:
            # 遍历所有文件夹,获取需要处理的文件数量
            _file_list = cls._get_labelimg_annotation_file_list(input_path)
            _total = len(_file_list)
            _deal_num = 0

            # 先返回进度情况
            if _total == 0:
                yield [_deal_num, _total, True]
                return

            # 创建复制文件夹
            FileTool.create_dir(output_path, exist_ok=True)

            # 遍历处理
            for _file in _file_list:
                # 当前进展
                yield [_deal_num, _total, True]

                # 逐个标注文件进行处理
                _tree = ET.parse(_file)
                _root = _tree.getroot()

                _annotations = dict()
                _annotations['filename'] = _root.find('filename').text
                _annotations['file_path'] = os.path.join(
                    os.path.split(_file)[0], _annotations['filename'])

                # 获取信息字典
                _info_dict = ExtendLib.get_info_dict(_annotations['file_path'],
                                                     mapping['info_key_dict'])

                # 逐个标签处理
                _save_class_path = ''  # 要保存到的分类路径
                _is_copy = False  # 标注是否已复制文件
                _new_xml_name = ''  # 新的xml名
                for _member in _root.findall('object'):
                    _member_class = _member[0].text
                    if use_mapping:
                        # 使用映射处理
                        if _member_class == mapping['set_by_info'][
                                'class_name']:
                            # 需要获取真实的信息
                            if mapping['set_by_info'][
                                    'info_tag'] in _info_dict.keys():
                                _member_class = _info_dict[
                                    mapping['set_by_info']['info_tag']]

                                # 变更分类名
                                _member[0].text = _member_class

                        # 过滤不需要的类别
                        if _member_class not in mapping['class_int'].keys():
                            _deal_num += 1
                            continue

                        # 保存分类路径
                        _save_class_path = os.path.join(
                            output_path, mapping['class'][_member_class])
                    else:
                        # 普通分类
                        _save_class_path = os.path.join(
                            output_path, _member_class)

                    # 复制文件
                    if not _is_copy:
                        # 处理重复文件名
                        _file_name = FileTool.get_file_name_no_ext(
                            _annotations['filename'])
                        _file_ext = FileTool.get_file_ext(
                            _annotations['filename'])
                        _rename_num = 1
                        _new_file_name = '%s.%s' % (_file_name, _file_ext)
                        _new_xml_name = '%s.xml' % (_file_name, )
                        while os.path.exists(
                                os.path.join(_save_class_path,
                                             _new_file_name)):
                            _new_file_name = '%s_%d.%s' % (
                                _file_name, _rename_num, _file_ext)
                            _new_xml_name = '%s_%d.xml' % (_file_name,
                                                           _rename_num)
                            _rename_num += 1

                        # 创建文件夹
                        FileTool.create_dir(_save_class_path, exist_ok=True)
                        shutil.copyfile(
                            _annotations['file_path'],
                            os.path.join(_save_class_path, _new_file_name))

                        # 修改xml里面的文件名和文件路径
                        _root.find('filename').text = _new_file_name
                        _root.find('path').text = os.path.join(
                            _save_class_path, _new_file_name)

                        _is_copy = True

                if _is_copy:
                    # 有修改xml内容
                    _tree.write(os.path.join(_save_class_path, _new_xml_name),
                                encoding='utf-8',
                                method="xml",
                                xml_declaration=None)

                # 继续循环处理
                _deal_num += 1

            # 返回结果
            yield [_total, _total, True]
        except:
            print('labelimg_copy_flags_pics error: %s\r\n%s' %
                  (input_path, traceback.format_exc()))
            yield [-1, -1, False]
Ejemplo n.º 12
0
    def _clean_file_path(cls, path: str, class_path: str):
        """
        清理当前目录文件

        @param {str} path - 要处理的目录地址
        @param {str} class_path - 类目录
        """
        # 处理自身目录,先获取商品信息
        _info = dict()
        _info_file = os.path.join(path, 'info.json')
        if os.path.exists(_info_file):
            with open(_info_file, 'rb') as f:
                _eval = str(f.read(), encoding='utf-8')
                _info = eval(_eval)

            # 判断是否不处理
            _shop_name = _info['店名']
            # if _info['款式'] == '挂件' and _info['挂件类型'] == '':
            #     return

            # 遍历文件进行处理
            _product_num = FileTool.get_dir_name(path)
            _files = FileTool.get_filelist(path)
            _order = 1
            for _file in _files:
                _file_ext = FileTool.get_file_ext(_file).lower()
                if _file_ext not in ['jpg', 'jpeg', 'png', 'bmp']:
                    # 不是合适的文件类型
                    continue

                # 判断是否有括号
                if _file.find('(') >= 0:
                    FileTool.remove_file(_file)
                    continue

                # 判断是否匹配上要删除的图片大小
                if _shop_name in DEL_SHOP_PIC_SIZE.keys() and os.path.getsize(
                        _file) in DEL_SHOP_PIC_SIZE[_shop_name]:
                    FileTool.remove_file(_file)
                    continue

                # 修改文件名
                if not FileTool.get_file_name(_file).startswith(_product_num):
                    os.rename(
                        _file,
                        os.path.join(
                            path, '%s_%s_%d.%s' %
                            (_product_num, 'main' if _file.find('主图') >= 0
                             or _file.find('main') >= 0 else 'detail', _order,
                             _file_ext)))

                # 下一个文件
                _order += 1

            # 移动文件夹到指定的分类目录
            _class_path = _info['款式']
            if _class_path in PROP_TYPE_TRAN_DICT.keys():
                _class_path = PROP_TYPE_TRAN_DICT[_info['款式']]
            shutil.move(path,
                        os.path.join(class_path, _class_path, _product_num))

        # 处理完成,返回
        return
Ejemplo n.º 13
0
    def labelimg_copy_flags_pics(cls,
                                 input_path: str,
                                 output_path: str,
                                 is_cc: bool = False):
        """
        按类别复制图片和标注文件到指定目录

        @param {str} input_path - 图片路径
        @param {str} output_path - 输出路径
        @param {bool} is_cc=False - 是否CC项目

        @returns {iter_list} - 通过yield返回的处理进度信息清单
            [总文件数int, 当前已处理文件数int, 是否成功]
        """
        try:
            # 遍历所有文件夹,获取需要处理的文件数量
            _file_list = cls._get_labelimg_annotation_file_list(input_path)
            _total = len(_file_list)
            _deal_num = 0

            # 先返回进度情况
            if _total == 0:
                yield [_deal_num, _total, True]
                return

            # 创建复制文件夹
            FileTool.create_dir(output_path, exist_ok=True)

            # 遍历处理
            for _file in _file_list:
                # 当前进展
                yield [_deal_num, _total, True]

                # 逐个标注文件进行处理
                _tree = ET.parse(_file)
                _root = _tree.getroot()

                _annotations = dict()
                _annotations['filename'] = _root.find('filename').text
                _annotations['file_path'] = os.path.join(
                    os.path.split(_file)[0], _annotations['filename'])

                # 逐个标签处理
                _save_class_path = ''  # 要保存到的分类路径
                _is_copy = False  # 标注是否已复制文件
                _is_change_class = False  # 标注是否有修改分类名
                _new_xml_name = ''  # 新的xml名
                for _member in _root.findall('object'):
                    _member_class = _member[0].text
                    if is_cc:
                        # CC专属的类型转换
                        if _member_class == '翡翠':
                            # 需要获取真实的信息
                            _info_file = os.path.join(
                                os.path.split(_file)[0], 'info.json')
                            _info = dict()
                            with open(_info_file, 'rb') as f:
                                _eval = str(f.read(), encoding='utf-8')
                                _info = eval(_eval)

                            if _info['款式'] == '挂件':
                                # 挂件,需要二级分类
                                _member_class = _info['挂件类型']
                            else:
                                # 一级分类
                                _member_class = _info['款式']

                            # 变更分类名
                            _member[0].text = _member_class
                            _is_change_class = True

                        # 过滤不需要的类别
                        if _member_class not in USE_CLASS_TEXT_LIST:
                            _deal_num += 1
                            continue

                        # 保存分类路径
                        _save_class_path = os.path.join(
                            output_path, cls._cc_get_class_text(_member_class))
                    else:
                        # 普通分类
                        _save_class_path = os.path.join(
                            output_path, _member_class)

                    # 复制文件
                    if not _is_copy:
                        # 处理重复文件名
                        _file_name = FileTool.get_file_name_no_ext(
                            _annotations['filename'])
                        _file_ext = FileTool.get_file_ext(
                            _annotations['filename'])
                        _rename_num = 1
                        _new_file_name = '%s.%s' % (_file_name, _file_ext)
                        _new_xml_name = '%s.xml' % (_file_name, )
                        while os.path.exists(
                                os.path.join(_save_class_path,
                                             _new_file_name)):
                            _new_file_name = '%s_%d.%s' % (
                                _file_name, _rename_num, _file_ext)
                            _new_xml_name = '%s_%d.xml' % (_file_name,
                                                           _rename_num)
                            _rename_num += 1

                        # 创建文件夹
                        FileTool.create_dir(_save_class_path, exist_ok=True)
                        shutil.copyfile(
                            _annotations['file_path'],
                            os.path.join(_save_class_path, _new_file_name))

                        _is_copy = True

                if _is_copy:
                    # 有复制文件
                    if _is_change_class:
                        # 有修改xml内容
                        _tree.write(os.path.join(_save_class_path,
                                                 _new_xml_name),
                                    encoding='utf-8',
                                    method="xml",
                                    xml_declaration=None)
                    else:
                        shutil.copyfile(
                            _file, os.path.join(_save_class_path,
                                                _new_xml_name))

                # 继续循环处理
                _deal_num += 1

            # 返回结果
            yield [_total, _total, True]
        except:
            print('labelimg_copy_flags_pics error: %s\r\n%s' %
                  (input_path, traceback.format_exc()))
            yield [-1, -1, False]