def test_duplication2(self): expected = 0 img1 = SuspiciousImage(os.path.join(DIR, 'test/image/yrc_16.png')) img2 = SuspiciousImage(os.path.join(DIR, 'test/image/yrc_5_po.png')) detector = Duplication() actual = detector.detect(img1, img2) self.assertEqual(expected, actual)
def test_photopick(self): expected = [1, 0] img1 = SuspiciousImage(os.path.join(DIR, 'test/image/yrc_16.png')) img2 = SuspiciousImage(os.path.join(DIR, 'test/image/graph.png')) detector = PhotoPick() actual = detector.detect([img1, img2]) self.assertEqual(expected[0], actual[0]) self.assertEqual(expected[1], actual[1])
def test_duplication(self): expected = 1 img1 = SuspiciousImage(os.path.join(DIR, 'test/image/yrc_16.png')) img2 = SuspiciousImage( os.path.join(DIR, 'test/image/yrc_16_60degree.png')) detector = Duplication() actual = detector.detect(img1, img2) detector.save_image( os.path.join(DIR, 'test/image/duplication_result.jpg')) self.assertEqual(expected, actual)
def test_cutpaste(self): # TODO: re-traing a model expected = [0, 1] img1 = SuspiciousImage( os.path.join(DIR, 'test/image/yrc_1000_505_cp.png')) img2 = SuspiciousImage(os.path.join(DIR, 'test/image/yrc_16.png')) detector = CutPaste() actual = detector.detect([img1, img2]) print(actual) self.assertEqual(expected[0], actual[0]) self.assertEqual(expected[1], actual[1])
def extraction(PMC_DIRs, from_where='from_pdf'): detector = PhotoPick() for PMC_DIR in PMC_DIRs: print(PMC_DIR) # from PDF files files = glob.glob(os.path.join(PMC_DIR, '*.pdf')) if len(files) > 0: pdf_file = files[0] SAVE_DIR = mkdir(os.path.join(PMC_DIR, from_where)) extract_img_from_pdf(pdf_file, SAVE_DIR) # # from JPEG images # # image_files = glob.glob(os.path.join(PMC_DIR, '*.jpg')) # if len(image_files) > 0: # SAVE_DIR = mkdir(PMC_DIR, 'from_jpg') # Dismantler().dismantle(image_files, SAVE_DIR) subimgs_path = glob.glob( os.path.join(PMC_DIR, from_where, 'subimg_cut', '*.png')) photo_DIR = mkdir(os.path.join(PMC_DIR, 'photo')) other_DIR = mkdir(os.path.join(PMC_DIR, from_where, 'other')) suspicious_imgs = [ SuspiciousImage(img_path) for img_path in subimgs_path ] if len(suspicious_imgs) is 0: print("No images") continue pred = detector.detect(suspicious_imgs) for i, p in enumerate(pred): if p == 1: shutil.copy(subimgs_path[i], photo_DIR) elif p == 0: shutil.copy(subimgs_path[i], other_DIR)
def test_paintout1(self): expected = 1 img1 = SuspiciousImage(os.path.join(DIR, 'test/image/yrc_5_po.png')) detector = Clipping() actual = detector.detect(img1) detector.save_image(os.path.join(DIR, 'test/image/paintout_result.jpg')) self.assertEqual(expected, actual)
def test_copymove(self): expected = 1 img1 = SuspiciousImage(os.path.join(DIR, 'test/image/yrc_7_cm.png')) detector = CopyMove() actual = detector.detect(img1) detector.save_image(os.path.join(DIR, 'test/image/copymove_result.jpg')) self.assertEqual(expected, actual)
def test_paintout0(self): expected = 0 img1 = SuspiciousImage(os.path.join(DIR, 'test/image/yrc_16.png')) detector = Clipping() actual = detector.detect(img1) self.assertEqual(expected, actual)
def test_copymove2(self): expected = 0 img1 = SuspiciousImage(os.path.join(DIR, 'test/image/yrc_16.png')) detector = CopyMove() actual = detector.detect(img1) self.assertEqual(expected, actual)
def detection(post_id): """ zipファイル展開 """ file = File.objects.get(pk=post_id) p_id = post_id % 10 OUT_DIR = os.path.join(BASE_DIR, 'media', 'images', str(p_id)) if os.path.exists(OUT_DIR): shutil.rmtree(OUT_DIR) os.makedirs(OUT_DIR) if file.zip.path.split('.')[-1] == 'zip': with zipfile.ZipFile(os.path.join(BASE_DIR, file.zip.path)) as existing_zip: existing_zip.extractall(OUT_DIR) # TODO: fix images_path = glob.glob(os.path.join(OUT_DIR, '*', '*')) images_path = [ p for p in images_path if p.split('.')[-1] in ['jpg', 'png', 'tif', 'JPG', 'JPEG', 'TIF'] ] images_url = list(map(lambda x: x.split('media/')[-1], images_path)) # images_name = list(map(lambda x: x.split( # '/')[-1].split('.')[0], images_url)) print(images_url) elif file.zip.path.split('.')[-1] in [ 'jpg', 'png', 'tif', 'JPG', 'JPEG', 'TIF' ]: images_path = [file.zip.path] else: return 0 print('loaded') RESULT_DIR = os.path.join(BASE_DIR, 'media', 'results', str(p_id)) if os.path.exists(RESULT_DIR): shutil.rmtree(RESULT_DIR) os.makedirs(RESULT_DIR) # # pdfファイル展開 # if file.zip.path.split('.')[-1] == 'pdf' or file.zip.path.split('.')[-1] == 'PDF': # images = imgminer(file.zip.path, OUT_DIR) # """ 図の分割 """ # if len(images_path) > 0: # Dismantler().dismantle(images_path, EXTRACT_DIR) # subimgs_path = glob.glob(os.path.join(EXTRACT_DIR, 'subimg_cut', '*.png')) # suspicious_images = [ # SuspiciousImage(img_path) for img_path in subimgs_path] # if len(suspicious_images) is 0: # print("No images") # return 0 # detector = PhotoPick( # model_name=os.path.join( # MODEL_DIR, 'photopicker_rf_lee_2700.sav'), # param_name=os.path.join( # MODEL_DIR, 'photopicker_rf_lee_2700.sav-param.npz')) # pred = detector.detect(suspicious_images) # suspicious_images = [img for i, img in enumerate( # suspicious_images) if pred[i] == 1] suspicious_images = [ SuspiciousImage(path, nfeatures=5000) for path in images_path ] print(suspicious_images) len_sus = len(suspicious_images) # """ 画像の切り出し """ # images = splitting(images) """ Detection """ # Detectors # # detector_no = Noise( # model_name=os.path.join( # MODEL_DIR, # 'noise_oneclass_42.sav')) detector_cl = Clipping(min_area=100) detector_cm = CopyMove(min_kp=20, min_match=20, min_key_ratio=0.75, flags=0) detector_du = Duplication(min_kp=20, min_match=20, min_key_ratio=0.75, flags=0) # detector_cp = CutPaste( # model_name=os.path.join( # MODEL_DIR, 'cutpaste_svm_uci_200.sav'), param_name=os.path.join( # MODEL_DIR, 'cutpaste_svm_uci_200.sav-param.npz'), ) for img in suspicious_images: dsize = (img.w, img.h) # Clipping check # pred = detector_cl.detect(img) img.clipping = pred if pred is 1: ratio = detector_cl.ratio_ img.area_ratio = ratio img.cl_img = cv.resize(detector_cl.image_, dsize=dsize) # Copy-move check # pred = detector_cm.detect(img) img.copymove = pred if pred is 1: ratio = detector_cm.mask_ratio_ img.mask_ratio = ratio img.cm_img = cv.resize(detector_cm.image_, dsize=dsize) print('detected') for img in suspicious_images: nameroot = img.name dsize = (img.w, img.h) suspicious = Suspicious() suspicious.post_id = p_id suspicious.w, suspicious.h = dsize suspicious.name = img.name suspicious.size = img.size file_name = os.path.join(RESULT_DIR, '{}.jpg'.format(nameroot)) cv.imwrite(file_name, cv.resize(img.mat, dsize=dsize)) suspicious.original = file_name.split('media/')[-1] # suspicious.noise = img.noise file_name = os.path.join(RESULT_DIR, '{}_no.jpg'.format(nameroot)) cv.imwrite(file_name, img.no_img) suspicious.no_img = file_name.split('media/')[-1] # suspicious.no_img = img.no_img suspicious.clipping = img.clipping if img.clipping: file_name = os.path.join(RESULT_DIR, '{}_cl.jpg'.format(nameroot)) cv.imwrite(file_name, img.cl_img) suspicious.cl_img = file_name.split('media/')[-1] else: suspicious.cl_img = suspicious.original suspicious.area_ratio = int(img.area_ratio * 100) suspicious.copymove = img.copymove if suspicious.copymove: file_name = os.path.join(RESULT_DIR, '{}_cm.jpg'.format(nameroot)) cv.imwrite(file_name, img.cm_img) suspicious.cm_img = file_name.split('media/')[-1] else: suspicious.cm_img = suspicious.original suspicious.mask_ratio = int(img.mask_ratio * 100) # suspicious.cutpaste = img.cutpaste file_name = os.path.join(RESULT_DIR, '{}_cp.jpg'.format(nameroot)) cv.imwrite( file_name, cv.resize(img.keyimg[img.gap:-img.gap, img.gap:-img.gap], dsize=dsize)) suspicious.cp_img = file_name.split('media/')[-1] suspicious.save() print('saved') ### Duplication check ### n_dp = 0 for i in range(len_sus): img = suspicious_images[i] imgname = img.name for j in range(i + 1, len_sus): pred = detector_du.detect(suspicious_images[j], img) if pred is 1: file_name = os.path.join( RESULT_DIR, '{}_{}_duplication.jpg'.format(suspicious_images[j].name, imgname)) detector_du.save_image(file_name) suspicious = SuspiciousDuplication() suspicious.post_id = p_id suspicious.name1 = suspicious_images[j].name suspicious.name2 = imgname suspicious.du_img = file_name.split('media/')[-1] suspicious.mask_ratio = int(detector_du.mask_ratio_ * 100) suspicious.save() n_dp = n_dp + 1 # for i in range(len(images)): # imgname = images[i].name.split('/')[-1] # img = images[i] # # # ### Painting-out check ### # isPaintingOut, img_detect = paintout(images[i]) # if isPaintingOut: # result_path = os.path.join(RESULT_DIR, imgname.split('.')[0] + '_po.jpg') # result_url = result_path.split('media/')[-1] # cv.imwrite(result_path, img_detect) # photo = Photo() # photo.name = imgname # photo.result = result_url # photo.title = 'Over-adjustment of contrast/brightness or painting-out' # photo.ratio = 50 # photo.save() # continue # print('paintingout') # # ### Copy-move check ### # isCopyMove, img_detect = copymove(img) # if isCopyMove: # result_path = os.path.join(Duplication.RESULT_DIR, imgname.split('.')[0] + '_cm.jpg') # result_url = result_path.split('media/')[-1] # cv.imwrite(result_path, img_detect) # photo = Photo() # photo.name = imgname # photo.result = result_url # photo.title = 'Reuse within a same image' # photo.ratio = 50 # photo.save() # print('copymove') # # print('DONE: ', imgname) # # print(Photo.objects.all()) return len_sus, n_dp
def detection(MODEL_DIR, REPORT_DIR, PMC_DIRs, log_file_name, wave_thres=488, gap=32): # Detectors # detector_cl = Clipping() detector_no = Noise(model_name=MODEL_DIR + 'noise_oneclass_42.sav') detector_cm = CopyMove(min_kp=20, min_match=20, min_key_ratio=0.75) detector_du = Duplication(min_kp=20, min_match=20, min_key_ratio=0.75) detector_cp = CutPaste( model_name=MODEL_DIR + 'cutpaste_svm_uci_200.sav', param_name=MODEL_DIR + 'cutpaste_svm_uci_200.sav-param.npz', ) log = [] for PMC_DIR in PMC_DIRs: OUT_DIR = os.path.join(PMC_DIR, 'output') if os.path.exists(OUT_DIR): shutil.rmtree(OUT_DIR) os.makedirs(OUT_DIR) PMCID = os.path.basename(PMC_DIR) # Load images # images_path = glob.glob(os.path.join(PMC_DIR, 'photo', '*.png')) if len(images_path) is 0: print(PMC_DIR, "No images.") log.append([PMCID, 0, 0, 0, 0, 0, 0, 0]) continue suspicious_images = [ SuspiciousImage(path, hist_eq=True, algorithm='orb', nfeatures=2000, gap=gap) for path in images_path ] len_sus = len(suspicious_images) # Report # report = pd.DataFrame(0, index=[img.name for img in suspicious_images], columns=[ 'Clipping', 'area_ratio', 'CopyMove', 'mask_ratio', 'CutPaste', 'proba', 'Duplication', ]) for i in range(len_sus): img = suspicious_images[i] imgname = img.name # Paint-out (blown-out highlights, clipping) check # pred = detector_cl.detect(img) report.loc[imgname, 'Clipping'] = pred img.clipping = pred if pred is 1: ratio = detector_cl.ratio_ report.loc[imgname, 'area_ratio'] = ratio img.area_ratio = ratio file_name = os.path.join(OUT_DIR, '{}_clipping.jpg'.format(imgname)) detector_cl.save_image(file_name) img.cl_img = detector_cl.image_ # Copy-move check # pred = detector_cm.detect(img) report.loc[imgname, 'CopyMove'] = pred img.copymove = pred if pred is 1: ratio = detector_cm.mask_ratio_ report.loc[imgname, 'mask_ratio'] = ratio img.mask_ratio = ratio file_name = os.path.join(OUT_DIR, '{}_copymove.jpg'.format(imgname)) detector_cm.save_image(file_name) img.cm_img = detector_cm.image_ # Noise check # report['Noise'] = detector_no.detect(suspicious_images) report['dist'] = detector_no.dist_ for i, img in enumerate(suspicious_images): img.noise = report.Noise[i] # Cut-paste check # enough_size_images = [ img for img in suspicious_images if img.gray.shape[0] >= wave_thres and img.gray.shape[1] >= wave_thres ] if len(enough_size_images) > 0: results = detector_cp.detect(enough_size_images) probas = detector_cp.proba_ for i, img in enumerate(enough_size_images): img.cutpaste = results[i] img.prob = probas[i] for i, img in enumerate(suspicious_images): report.iloc[i, 4] = img.cutpaste report.iloc[i, 5] = img.prob # Duplication check # # flip_images = [ # SuspiciousImage().make_flip(img.mat, img.name + '-flip') # for img in suspicious_images] result_imgnames = [] result_imgarrs = [] result_ratios = [] for i in range(len_sus): img = suspicious_images[i] imgname = img.name for j in range(i + 1, len_sus): pred = detector_du.detect(suspicious_images[j], img) report.loc[imgname, 'Duplication'] += pred if pred is 1: file_name = os.path.join( OUT_DIR, '{}_{}_duplication.jpg'.format( suspicious_images[j].name, imgname)) detector_du.save_image(file_name) result_imgnames.append( [suspicious_images[j].name, imgname]) result_imgarrs.append(detector_du.image_) result_ratios.append(detector_du.mask_ratio_) # # flipped images # pred = detector_du.detect( # img, flip_images[j]) # report.loc[imgname, 'Duplication'] += pred # if pred: # file_name = os.path.join( # OUT_DIR, '{}_{}_duplication.jpg'.format( # imgname, flip_images[j].name)) # detector_du.save_image(file_name) # result_imgnames.append([imgname, flip_images[j].name]) # result_imgarrs.append(detector_du.image_) # result_ratios.append(detector_du.mask_ratio_) # Output report # print(PMC_DIR, len(glob.glob(os.path.join(OUT_DIR, '*.jpg')))) report.to_csv( os.path.join(PMC_DIR, 'report_{}.csv'.format(os.path.basename(PMC_DIR)))) report = report[report == 1].sum() len_output = len(glob.glob(os.path.join(OUT_DIR, '*.jpg'))) log.append([ PMCID, len_sus, len_output, report['Clipping'], report['CopyMove'], report['Duplication'], report['CutPaste'], report['Noise'] ]) for i, img in enumerate(suspicious_images): plot_report(img, len_sus, i, PMCID, REPORT_DIR) if len(result_ratios) > 0: plot_duplication_report(result_imgnames, result_imgarrs, result_ratios, PMCID, REPORT_DIR) log = pd.DataFrame(log) log.columns = [ 'PMCID', 'Extracted', 'Suspicious', 'Clipping', 'CopyMove', 'Duplication', 'CutPaste', 'Noise' ] log.to_csv(os.path.join(DIR, log_file_name))