Ejemplo n.º 1
0
def FindInvalidImg(path, delflag=0, exclude_files='', exclude_exts=''):
    '''查找非法图像文件
    @param path ---- 搜索路径
    @param delflag --- 删除标记,0-忽略,1-删除满足条件的搜索文件
    @param exclude_files --- 排除文件列表,如:"db.json,num.txt"
    @param exclude_exts ---- 排除扩展名列表,如:".json,.txt"
    @return list
        如:
        ['/temp/a.jpg','/temp/a/b.jpg']
    使用范例:
        files=FindInvalidImg('./temp',0,"db.json,num.txt",".json,.txt")
    '''
    print('FindInvalidImg(%s,delflag=%d)' % (path, delflag))
    invalid_files = []
    print('GatherFiles(%s,exts='
          ',exclude_files="%s",exclude_exts="%s")' %
          (path, exclude_files, exclude_exts))
    files = GatherFiles(path,
                        exclude_files=exclude_files,
                        exclude_exts=exclude_exts)
    nFiles = len(files)
    pb = ProcessBar.ShowProcess(100, 'Inv Search', '', 'OK')
    print('Search results:')
    for i, sfile in enumerate(files):
        img = cv2.imread(sfile)
        if img is None:
            invalid_files.append(sfile)
            if delflag == 1:
                os.remove(sfile)
        if i % 50 == 0:
            pb.show_process(int(i * 100 / nFiles))
    pb.show_process(100)
    return invalid_files
Ejemplo n.º 2
0
 def GetSameFiles(self):
     #构造数据表
     df = pd.DataFrame(columns=['sfile', 'fsize', 'md5'])
     #收集文件
     print('收集文件列表')
     files = funs.GatherFiles(self.path, exts=self.exts)
     print('计算文件特征')
     pb = ProcessBar.ShowProcess(100, '', '', infoDone='Done')
     #计算文件特征
     nFiles = len(files)
     for i, sfile in enumerate(files):
         #获取文件特征
         feats = self.get_feature(sfile)
         #添加一行
         df.loc[i] = feats
         if i % 50 == 0:
             pb.show_process(int(i * 100 / nFiles))
     pb.show_process(100)
     #汇总
     files_same = []
     df_group = df.groupby(['fsize', 'md5'])
     for name, group in df_group:
         if group.shape[0] > 1:
             files_same.append(group.loc[:, 'sfile'])
     return files_same
Ejemplo n.º 3
0
def FindGIFFiles(path, delflag=0, unzipflag=0):
    '''查找GIF文件
    @param path -------- 搜索目录
    @param delflag ----- 删除标记,0-保留GIF文件,1-删除搜索到的GIF文件
    @param unzipflag --- 图像提取标记,0-忽略操作,1-把GIF的图像序列提取到同目录下,文件命名为:sfile_{n}.png
    @return list
        如:['/temp/a.gif','/temp/b.gif']
        
    使用范例:
        files=FindGIFFIles('/temp',0,0)
    '''
    print('FindGIFFIles(%s,delflag=%d,unzipflag=%d)' %
          (path, delflag, unzipflag))
    gif_files = []
    print('GatherFiles(%s,exts="")' % (path))
    files = GatherFiles(path)
    nFiles = len(files)
    pb = ProcessBar.ShowProcess(100, 'GIF Search', '', 'OK')
    for i, sfile in enumerate(files):
        #gif文件处理
        if imghdr.what(sfile) == 'gif':
            gif_files.append(sfile)
            if unzipflag == 1:
                gif2png(sfile, os.path.split(sfile)[0])
            if delflag == 1:
                os.remove(sfile)
        if i % 50 == 0:
            pb.show_process(int(i * 100 / nFiles))
    pb.show_process(100)
    return gif_files
Ejemplo n.º 4
0
 def DelSameFiles(self):
     #检索相同文件
     files_same = self.GetSameFiles()
     print('删除相同文件')
     pb = ProcessBar.ShowProcess(100, '', '', infoDone='Done')
     #删除相同文件
     nGroups = len(files_same)
     for i, files_sub in enumerate(files_same):
         for sfile in files_sub[1:]:
             os.remove(sfile)
         pb.show_process(int(i * 100 / nGroups))
     pb.show_process(100)
Ejemplo n.º 5
0
def CreateThumbs(path,
                 ori_name,
                 thumb_name,
                 exts='',
                 exclude_files='',
                 exclude_exts='',
                 width=100,
                 height=100):
    '''创建缩略图
    @param path 相册根目录
    @param ori_name 原始图像目录名称
    @param thumb_name 缩略图像目录名称
    @param width 缩略图宽度
    @param height 缩略图高度
    '''
    ori_path = '%s/%s' % (path, ori_name)  #原始图像路径
    thumb_path = '%s/%s' % (path, thumb_name)  #缩略图像路径
    if not os.path.exists(ori_path):
        print('%s not exists!' % (ori_path))
    else:
        #搜集原始文件列表
        ori_files = []
        funs.GatherFilesEx(ori_path,
                           ori_files,
                           exts=exts,
                           exclude_files=exclude_files,
                           exclude_exts=exclude_exts)
        files_num = len(ori_files)
        #支持视频格式
        vedio_exts = '.mp4'
        #创建缩略图
        pbar = ProcessBar.ShowProcess()
        rep_ori_name = '/%s/' % (ori_name)  #原始目录标记
        rep_thumb_name = '/%s/' % (thumb_name)  #缩略目录标记
        for i, sfile in enumerate(ori_files):
            src_file = sfile
            dst_file = src_file.replace(rep_ori_name, rep_thumb_name)
            if os.path.exists(src_file) and not os.path.exists(dst_file):
                #print('src:%s'%(src_file))
                #print('dst:%s'%(dst_file))
                if os.path.splitext(src_file)[1] in vedio_exts:
                    #视频
                    CreateThumb_vedio(src_file, '%s.jpg' % (dst_file), width,
                                      height)
                else:
                    #图像
                    CreateThumb_img(src_file, dst_file, width, height)
            if i % 10 == 0:
                pbar.show_process(int(i * 100 / files_num))
        pbar.show_process(100)
Ejemplo n.º 6
0
 def MoveSameFilesTo(self, to_path):
     #检索相同文件
     files_same = self.GetSameFiles()
     print('迁移相同文件:', to_path)
     pb = ProcessBar.ShowProcess(100, '', '', infoDone='Done')
     #拷贝相同文件
     nGroups = len(files_same)
     src_path_len = len(self.path)
     for i, files_sub in enumerate(files_same):
         for sfile in files_sub[1:]:
             src_file = sfile
             dst_file = '%s%s' % (to_path, sfile[src_path_len:])
             dst_path = os.path.split(dst_file)[0]
             if not os.path.exists(dst_path):
                 os.makedirs(dst_path)
             print('src:', src_file)
             print('dst:', dst_file)
             shutil.move(src_file, dst_file)
         pb.show_process(int(i * 100) / nGroups)
     pb.show_process(100)
Ejemplo n.º 7
0
def Predicts(path):
    #收集图像文件
    files = funs.GatherFiles(path, exts='.jpg,.jpeg,.png')
    pb = ProcessBar.ShowProcess(100)
    model = ResNet50(weights='imagenet')
    nFiles = len(files)
    for i, sfile in enumerate(files):
        img_path = sfile
        img = image.load_img(img_path, target_size=(224, 224))
        if img == None:
            print('img.load_img(%s)=None' % (img_path))
            continue
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)

        preds = model.predict(x)
        # decode the results into a list of tuples (class, description, probability)
        # (one such list for each sample in the batch)
        print(sfile)
        print('Predicted:', decode_predictions(preds, top=3)[0])
        # Predicted: [(u'n02504013', u'Indian_elephant', 0.82658225), (u'n01871265', u'tusker', 0.1122357), (u'n02504458', u'African_elephant', 0.061040461)]
        pb.show_process(int(i * 100 / nFiles))
    pb.show_process(100)
Ejemplo n.º 8
0
def FindExtNotMatchFiles(path, exts, bFix=False):
    '''由于imghdr.what(sfile)返回的类型为空时,图像文件仍有效!本函数的比较作用失去意义!
    建议:停用!
    '''
    print('FindExtNotMatchFiles(%s,exts=%s,bFix=%d)' % (path, exts, bFix))
    rets = []
    print('GatherFiles(%s,exts=%s)' % (path, exts))
    files = GatherFiles(path, exts)
    nFiles = len(files)
    pb = ProcessBar.ShowProcess(100, 'Not Match Search', '', 'OK')
    #不匹配查询
    for i, sfile in enumerate(files):
        ext1 = os.path.splitext(sfile)[1]
        ext2 = imghdr.what(sfile)
        ext2 = '.' if ext2 == None else '.' + ext2
        if ext1 != ext2 and not (ext1 == '.jpg' and ext2 == '.jpeg'):
            rets.append((sfile, ext2))
            if bFix:
                new_file = '%s%s' % (os.path.splitext(sfile)[0], ext2)
                shutil.move(sfile, new_file)
        if i % 50 == 0:
            pb.show_process(int(i * 100 / nFiles))
    pb.show_process(100)
    return rets
Ejemplo n.º 9
0
 def __init__(self, path, exts=''):
     self.path = path  #搜索目录
     self.exts = exts  #扩展名,如:".jpg,.png"
     self.pb = ProcessBar.ShowProcess(100, '', '', '')  #进度条
     self.info_path = funs.PathStat(path)  #目录信息统计