Esempio n. 1
0
 def test_duplication2(self):
     expected = 0
     img1 = SuspiciousImage(os.path.join(DIR, 'test/image/yrc_16.png'))
     img2 = SuspiciousImage(os.path.join(DIR, 'test/image/yrc_5_po.png'))
     detector = Duplication()
     actual = detector.detect(img1, img2)
     self.assertEqual(expected, actual)
Esempio n. 2
0
 def test_photopick(self):
     expected = [1, 0]
     img1 = SuspiciousImage(os.path.join(DIR, 'test/image/yrc_16.png'))
     img2 = SuspiciousImage(os.path.join(DIR, 'test/image/graph.png'))
     detector = PhotoPick()
     actual = detector.detect([img1, img2])
     self.assertEqual(expected[0], actual[0])
     self.assertEqual(expected[1], actual[1])
Esempio n. 3
0
 def test_duplication(self):
     expected = 1
     img1 = SuspiciousImage(os.path.join(DIR, 'test/image/yrc_16.png'))
     img2 = SuspiciousImage(
         os.path.join(DIR, 'test/image/yrc_16_60degree.png'))
     detector = Duplication()
     actual = detector.detect(img1, img2)
     detector.save_image(
         os.path.join(DIR, 'test/image/duplication_result.jpg'))
     self.assertEqual(expected, actual)
Esempio n. 4
0
 def test_cutpaste(self):
     # TODO: re-traing a model
     expected = [0, 1]
     img1 = SuspiciousImage(
         os.path.join(DIR, 'test/image/yrc_1000_505_cp.png'))
     img2 = SuspiciousImage(os.path.join(DIR, 'test/image/yrc_16.png'))
     detector = CutPaste()
     actual = detector.detect([img1, img2])
     print(actual)
     self.assertEqual(expected[0], actual[0])
     self.assertEqual(expected[1], actual[1])
Esempio n. 5
0
def extraction(PMC_DIRs, from_where='from_pdf'):
    detector = PhotoPick()

    for PMC_DIR in PMC_DIRs:
        print(PMC_DIR)
        # from PDF files
        files = glob.glob(os.path.join(PMC_DIR, '*.pdf'))
        if len(files) > 0:
            pdf_file = files[0]
            SAVE_DIR = mkdir(os.path.join(PMC_DIR, from_where))
            extract_img_from_pdf(pdf_file, SAVE_DIR)

        # # from JPEG images #
        # image_files = glob.glob(os.path.join(PMC_DIR, '*.jpg'))
        # if len(image_files) > 0:
        #     SAVE_DIR = mkdir(PMC_DIR, 'from_jpg')
        #     Dismantler().dismantle(image_files, SAVE_DIR)

        subimgs_path = glob.glob(
            os.path.join(PMC_DIR, from_where, 'subimg_cut', '*.png'))
        photo_DIR = mkdir(os.path.join(PMC_DIR, 'photo'))
        other_DIR = mkdir(os.path.join(PMC_DIR, from_where, 'other'))
        suspicious_imgs = [
            SuspiciousImage(img_path) for img_path in subimgs_path
        ]
        if len(suspicious_imgs) is 0:
            print("No images")
            continue
        pred = detector.detect(suspicious_imgs)
        for i, p in enumerate(pred):
            if p == 1:
                shutil.copy(subimgs_path[i], photo_DIR)
            elif p == 0:
                shutil.copy(subimgs_path[i], other_DIR)
Esempio n. 6
0
 def test_paintout1(self):
     expected = 1
     img1 = SuspiciousImage(os.path.join(DIR, 'test/image/yrc_5_po.png'))
     detector = Clipping()
     actual = detector.detect(img1)
     detector.save_image(os.path.join(DIR,
                                      'test/image/paintout_result.jpg'))
     self.assertEqual(expected, actual)
Esempio n. 7
0
 def test_copymove(self):
     expected = 1
     img1 = SuspiciousImage(os.path.join(DIR, 'test/image/yrc_7_cm.png'))
     detector = CopyMove()
     actual = detector.detect(img1)
     detector.save_image(os.path.join(DIR,
                                      'test/image/copymove_result.jpg'))
     self.assertEqual(expected, actual)
Esempio n. 8
0
 def test_paintout0(self):
     expected = 0
     img1 = SuspiciousImage(os.path.join(DIR, 'test/image/yrc_16.png'))
     detector = Clipping()
     actual = detector.detect(img1)
     self.assertEqual(expected, actual)
Esempio n. 9
0
 def test_copymove2(self):
     expected = 0
     img1 = SuspiciousImage(os.path.join(DIR, 'test/image/yrc_16.png'))
     detector = CopyMove()
     actual = detector.detect(img1)
     self.assertEqual(expected, actual)
Esempio n. 10
0
def detection(post_id):
    """ zipファイル展開 """
    file = File.objects.get(pk=post_id)
    p_id = post_id % 10
    OUT_DIR = os.path.join(BASE_DIR, 'media', 'images', str(p_id))
    if os.path.exists(OUT_DIR):
        shutil.rmtree(OUT_DIR)
    os.makedirs(OUT_DIR)
    if file.zip.path.split('.')[-1] == 'zip':
        with zipfile.ZipFile(os.path.join(BASE_DIR,
                                          file.zip.path)) as existing_zip:
            existing_zip.extractall(OUT_DIR)
        # TODO: fix
        images_path = glob.glob(os.path.join(OUT_DIR, '*', '*'))
        images_path = [
            p for p in images_path
            if p.split('.')[-1] in ['jpg', 'png', 'tif', 'JPG', 'JPEG', 'TIF']
        ]
        images_url = list(map(lambda x: x.split('media/')[-1], images_path))
        # images_name = list(map(lambda x: x.split(
        #     '/')[-1].split('.')[0], images_url))
        print(images_url)
    elif file.zip.path.split('.')[-1] in [
            'jpg', 'png', 'tif', 'JPG', 'JPEG', 'TIF'
    ]:
        images_path = [file.zip.path]
    else:
        return 0
    print('loaded')

    RESULT_DIR = os.path.join(BASE_DIR, 'media', 'results', str(p_id))
    if os.path.exists(RESULT_DIR):
        shutil.rmtree(RESULT_DIR)
    os.makedirs(RESULT_DIR)

    # # pdfファイル展開
    # if file.zip.path.split('.')[-1] == 'pdf' or file.zip.path.split('.')[-1] == 'PDF':
    #     images = imgminer(file.zip.path, OUT_DIR)

    # """ 図の分割 """
    # if len(images_path) > 0:
    #     Dismantler().dismantle(images_path, EXTRACT_DIR)
    # subimgs_path = glob.glob(os.path.join(EXTRACT_DIR, 'subimg_cut', '*.png'))
    # suspicious_images = [
    #     SuspiciousImage(img_path) for img_path in subimgs_path]
    # if len(suspicious_images) is 0:
    #     print("No images")
    #     return 0
    # detector = PhotoPick(
    #     model_name=os.path.join(
    #         MODEL_DIR, 'photopicker_rf_lee_2700.sav'),
    #     param_name=os.path.join(
    #         MODEL_DIR, 'photopicker_rf_lee_2700.sav-param.npz'))
    # pred = detector.detect(suspicious_images)
    # suspicious_images = [img for i, img in enumerate(
    #     suspicious_images) if pred[i] == 1]

    suspicious_images = [
        SuspiciousImage(path, nfeatures=5000) for path in images_path
    ]

    print(suspicious_images)
    len_sus = len(suspicious_images)

    # """ 画像の切り出し """
    # images = splitting(images)
    """ Detection """
    # Detectors #
    # detector_no = Noise(
    #     model_name=os.path.join(
    #         MODEL_DIR,
    #         'noise_oneclass_42.sav'))
    detector_cl = Clipping(min_area=100)
    detector_cm = CopyMove(min_kp=20,
                           min_match=20,
                           min_key_ratio=0.75,
                           flags=0)
    detector_du = Duplication(min_kp=20,
                              min_match=20,
                              min_key_ratio=0.75,
                              flags=0)
    # detector_cp = CutPaste(
    #     model_name=os.path.join(
    #         MODEL_DIR, 'cutpaste_svm_uci_200.sav'), param_name=os.path.join(
    #         MODEL_DIR, 'cutpaste_svm_uci_200.sav-param.npz'), )

    for img in suspicious_images:
        dsize = (img.w, img.h)

        # Clipping check #
        pred = detector_cl.detect(img)
        img.clipping = pred
        if pred is 1:
            ratio = detector_cl.ratio_
            img.area_ratio = ratio
            img.cl_img = cv.resize(detector_cl.image_, dsize=dsize)

        # Copy-move check #
        pred = detector_cm.detect(img)
        img.copymove = pred
        if pred is 1:
            ratio = detector_cm.mask_ratio_
            img.mask_ratio = ratio
            img.cm_img = cv.resize(detector_cm.image_, dsize=dsize)
    print('detected')

    for img in suspicious_images:
        nameroot = img.name
        dsize = (img.w, img.h)

        suspicious = Suspicious()
        suspicious.post_id = p_id
        suspicious.w, suspicious.h = dsize
        suspicious.name = img.name
        suspicious.size = img.size
        file_name = os.path.join(RESULT_DIR, '{}.jpg'.format(nameroot))
        cv.imwrite(file_name, cv.resize(img.mat, dsize=dsize))
        suspicious.original = file_name.split('media/')[-1]

        # suspicious.noise = img.noise
        file_name = os.path.join(RESULT_DIR, '{}_no.jpg'.format(nameroot))
        cv.imwrite(file_name, img.no_img)
        suspicious.no_img = file_name.split('media/')[-1]
        # suspicious.no_img = img.no_img

        suspicious.clipping = img.clipping
        if img.clipping:
            file_name = os.path.join(RESULT_DIR, '{}_cl.jpg'.format(nameroot))
            cv.imwrite(file_name, img.cl_img)
            suspicious.cl_img = file_name.split('media/')[-1]
        else:
            suspicious.cl_img = suspicious.original
        suspicious.area_ratio = int(img.area_ratio * 100)

        suspicious.copymove = img.copymove
        if suspicious.copymove:
            file_name = os.path.join(RESULT_DIR, '{}_cm.jpg'.format(nameroot))
            cv.imwrite(file_name, img.cm_img)
            suspicious.cm_img = file_name.split('media/')[-1]
        else:
            suspicious.cm_img = suspicious.original
        suspicious.mask_ratio = int(img.mask_ratio * 100)

        # suspicious.cutpaste = img.cutpaste
        file_name = os.path.join(RESULT_DIR, '{}_cp.jpg'.format(nameroot))
        cv.imwrite(
            file_name,
            cv.resize(img.keyimg[img.gap:-img.gap, img.gap:-img.gap],
                      dsize=dsize))
        suspicious.cp_img = file_name.split('media/')[-1]

        suspicious.save()
    print('saved')

    ### Duplication check ###
    n_dp = 0
    for i in range(len_sus):
        img = suspicious_images[i]
        imgname = img.name
        for j in range(i + 1, len_sus):
            pred = detector_du.detect(suspicious_images[j], img)
            if pred is 1:
                file_name = os.path.join(
                    RESULT_DIR,
                    '{}_{}_duplication.jpg'.format(suspicious_images[j].name,
                                                   imgname))
                detector_du.save_image(file_name)
                suspicious = SuspiciousDuplication()
                suspicious.post_id = p_id
                suspicious.name1 = suspicious_images[j].name
                suspicious.name2 = imgname
                suspicious.du_img = file_name.split('media/')[-1]
                suspicious.mask_ratio = int(detector_du.mask_ratio_ * 100)
                suspicious.save()
                n_dp = n_dp + 1

    # for i in range(len(images)):
    #     imgname = images[i].name.split('/')[-1]
    #     img = images[i]
    #
    #
    #     ### Painting-out check ###
    #     isPaintingOut, img_detect = paintout(images[i])
    #     if isPaintingOut:
    #         result_path = os.path.join(RESULT_DIR, imgname.split('.')[0] + '_po.jpg')
    #         result_url = result_path.split('media/')[-1]
    #         cv.imwrite(result_path, img_detect)
    #         photo = Photo()
    #         photo.name = imgname
    #         photo.result = result_url
    #         photo.title = 'Over-adjustment of contrast/brightness or painting-out'
    #         photo.ratio = 50
    #         photo.save()
    #         continue
    #     print('paintingout')
    #
    #     ### Copy-move check ###
    #     isCopyMove, img_detect = copymove(img)
    #     if isCopyMove:
    #         result_path = os.path.join(Duplication.RESULT_DIR, imgname.split('.')[0] + '_cm.jpg')
    #         result_url = result_path.split('media/')[-1]
    #         cv.imwrite(result_path, img_detect)
    #         photo = Photo()
    #         photo.name = imgname
    #         photo.result = result_url
    #         photo.title = 'Reuse within a same image'
    #         photo.ratio = 50
    #         photo.save()
    #     print('copymove')
    #
    #     print('DONE: ', imgname)
    #
    # print(Photo.objects.all())

    return len_sus, n_dp
Esempio n. 11
0
def detection(MODEL_DIR,
              REPORT_DIR,
              PMC_DIRs,
              log_file_name,
              wave_thres=488,
              gap=32):
    # Detectors #
    detector_cl = Clipping()
    detector_no = Noise(model_name=MODEL_DIR + 'noise_oneclass_42.sav')
    detector_cm = CopyMove(min_kp=20, min_match=20, min_key_ratio=0.75)
    detector_du = Duplication(min_kp=20, min_match=20, min_key_ratio=0.75)
    detector_cp = CutPaste(
        model_name=MODEL_DIR + 'cutpaste_svm_uci_200.sav',
        param_name=MODEL_DIR + 'cutpaste_svm_uci_200.sav-param.npz',
    )

    log = []
    for PMC_DIR in PMC_DIRs:
        OUT_DIR = os.path.join(PMC_DIR, 'output')
        if os.path.exists(OUT_DIR):
            shutil.rmtree(OUT_DIR)
        os.makedirs(OUT_DIR)

        PMCID = os.path.basename(PMC_DIR)

        # Load images #
        images_path = glob.glob(os.path.join(PMC_DIR, 'photo', '*.png'))
        if len(images_path) is 0:
            print(PMC_DIR, "No images.")
            log.append([PMCID, 0, 0, 0, 0, 0, 0, 0])
            continue
        suspicious_images = [
            SuspiciousImage(path,
                            hist_eq=True,
                            algorithm='orb',
                            nfeatures=2000,
                            gap=gap) for path in images_path
        ]
        len_sus = len(suspicious_images)

        # Report #
        report = pd.DataFrame(0,
                              index=[img.name for img in suspicious_images],
                              columns=[
                                  'Clipping',
                                  'area_ratio',
                                  'CopyMove',
                                  'mask_ratio',
                                  'CutPaste',
                                  'proba',
                                  'Duplication',
                              ])

        for i in range(len_sus):
            img = suspicious_images[i]
            imgname = img.name

            # Paint-out (blown-out highlights, clipping) check #
            pred = detector_cl.detect(img)
            report.loc[imgname, 'Clipping'] = pred
            img.clipping = pred
            if pred is 1:
                ratio = detector_cl.ratio_
                report.loc[imgname, 'area_ratio'] = ratio
                img.area_ratio = ratio
                file_name = os.path.join(OUT_DIR,
                                         '{}_clipping.jpg'.format(imgname))
                detector_cl.save_image(file_name)
                img.cl_img = detector_cl.image_

            # Copy-move check #
            pred = detector_cm.detect(img)
            report.loc[imgname, 'CopyMove'] = pred
            img.copymove = pred
            if pred is 1:
                ratio = detector_cm.mask_ratio_
                report.loc[imgname, 'mask_ratio'] = ratio
                img.mask_ratio = ratio
                file_name = os.path.join(OUT_DIR,
                                         '{}_copymove.jpg'.format(imgname))
                detector_cm.save_image(file_name)
                img.cm_img = detector_cm.image_

        # Noise check #
        report['Noise'] = detector_no.detect(suspicious_images)
        report['dist'] = detector_no.dist_
        for i, img in enumerate(suspicious_images):
            img.noise = report.Noise[i]

        # Cut-paste check #
        enough_size_images = [
            img for img in suspicious_images if img.gray.shape[0] >= wave_thres
            and img.gray.shape[1] >= wave_thres
        ]
        if len(enough_size_images) > 0:
            results = detector_cp.detect(enough_size_images)
            probas = detector_cp.proba_
            for i, img in enumerate(enough_size_images):
                img.cutpaste = results[i]
                img.prob = probas[i]
        for i, img in enumerate(suspicious_images):
            report.iloc[i, 4] = img.cutpaste
            report.iloc[i, 5] = img.prob

        # Duplication check #
        # flip_images = [
        #     SuspiciousImage().make_flip(img.mat, img.name + '-flip')
        #     for img in suspicious_images]
        result_imgnames = []
        result_imgarrs = []
        result_ratios = []
        for i in range(len_sus):
            img = suspicious_images[i]
            imgname = img.name
            for j in range(i + 1, len_sus):
                pred = detector_du.detect(suspicious_images[j], img)
                report.loc[imgname, 'Duplication'] += pred
                if pred is 1:
                    file_name = os.path.join(
                        OUT_DIR, '{}_{}_duplication.jpg'.format(
                            suspicious_images[j].name, imgname))
                    detector_du.save_image(file_name)
                    result_imgnames.append(
                        [suspicious_images[j].name, imgname])
                    result_imgarrs.append(detector_du.image_)
                    result_ratios.append(detector_du.mask_ratio_)

                # # flipped images
                # pred = detector_du.detect(
                #     img, flip_images[j])
                # report.loc[imgname, 'Duplication'] += pred
                # if pred:
                #     file_name = os.path.join(
                #         OUT_DIR, '{}_{}_duplication.jpg'.format(
                #             imgname, flip_images[j].name))
                #     detector_du.save_image(file_name)
                #     result_imgnames.append([imgname, flip_images[j].name])
                #     result_imgarrs.append(detector_du.image_)
                #     result_ratios.append(detector_du.mask_ratio_)

        # Output report #
        print(PMC_DIR, len(glob.glob(os.path.join(OUT_DIR, '*.jpg'))))
        report.to_csv(
            os.path.join(PMC_DIR,
                         'report_{}.csv'.format(os.path.basename(PMC_DIR))))

        report = report[report == 1].sum()
        len_output = len(glob.glob(os.path.join(OUT_DIR, '*.jpg')))
        log.append([
            PMCID, len_sus, len_output, report['Clipping'], report['CopyMove'],
            report['Duplication'], report['CutPaste'], report['Noise']
        ])

        for i, img in enumerate(suspicious_images):
            plot_report(img, len_sus, i, PMCID, REPORT_DIR)

        if len(result_ratios) > 0:
            plot_duplication_report(result_imgnames, result_imgarrs,
                                    result_ratios, PMCID, REPORT_DIR)

    log = pd.DataFrame(log)
    log.columns = [
        'PMCID', 'Extracted', 'Suspicious', 'Clipping', 'CopyMove',
        'Duplication', 'CutPaste', 'Noise'
    ]
    log.to_csv(os.path.join(DIR, log_file_name))