Example #1
0
def check_blank(orig_link_path): 
    print "\noriginal link: ", orig_link_path 
    # create image from original link
    orig_img_name = 'orig.png'
    try:
        for line in run_command([PHANTOMJS, RASTERIZE, orig_link_path, orig_img_name]):
            print (line)
        timeit.default_timer # from last call
        start = timeit.default_timer()
         # compare two images
        img1 = cv2.imread(orig_img_name, 0)
        img1_size = os.path.getsize(orig_img_name)
        # calculate OCR value from original image
        ocr = blank.ocr(orig_img_name)
        # remove temporary images
        os.remove(orig_img_name)
        execution_time = timeit.default_timer() - start
        print "execution time: ", execution_time
        # store in CSV and MongoDB resulting timstamp in ms, execution time in sec, 
        # original URL, wayback machine URL, wayback timestamp, SIFT features total count, 
        # matched features count, resulting message, OCR count, file sizes
        import datetime
        current_time =  datetime.datetime.utcnow()
        return ocr
    except Exception, e:
        print "Error:", e, " Please check if OCR tool is running!" 
Example #2
0
def compare_screenshots_sift_ext2(orig_link_path, wayback_link_path, ocr_flag, psnr_flag): 
    # create screenshot from Wayback using timestamp
    print "\noriginal link: ", orig_link_path, ", wayback link: ", wayback_link_path, ", ocr flag: ", ocr_flag, ", psnr flag: ", psnr_flag 
    import datetime
    current_file_id =  timeit.default_timer()
    # create image from original link
    orig_img_name = 'qa/orig/' + str(current_file_id) + '.png'
    # create image from Wayback link
    wayback_img_name = 'wayback.png'
    try:
        # Set the timer for five seconds for file creation due to programm hanging 
        # if file could not be created from the link by internet
        t = Timer(FILE_CREATION_TIME, create_file(orig_link_path, orig_img_name))
        t.start() 
        tw = Timer(FILE_CREATION_TIME, create_file(wayback_link_path, wayback_img_name))
        tw.start() 
        if os.path.exists(orig_img_name):
            start = timeit.default_timer()
            # compare two images
            img1 = cv2.imread(orig_img_name, 0)
            img1_size = os.path.getsize(orig_img_name)
            img2 = cv2.imread(wayback_img_name, 0)
            img2_size = os.path.getsize(wayback_img_name)
            print "extract features ..."
            fc1, fc2, mc, msg = compare_screenshots.compare_ext(img1, img2, 'sift', ts1, ts2)
            ocr = 0
            if str2bool(ocr_flag):
                # calculate OCR value from original image
                print "perform OCR analysis ..."
                ocr = blank.ocr(orig_img_name)
            psnr_similarity = "None"
            psnr_threshold = ts_psnr
            psnr_msg = ""
            if str2bool(psnr_flag):
               # compare images using imagemagick tool and PSNR metric 
               print "perform PSNR analysis ..."
               psnr_similarity = "DIFFERENT"
               psnr_similarity, psnr_threshold, psnr_msg = compare_psnr(orig_img_name, wayback_img_name)
            # remove temporary images
            os.remove(wayback_img_name)
            execution_time = timeit.default_timer() - start
            print "execution time: ", execution_time
            # store in CSV and MongoDB resulting timstamp in ms, execution time in sec, 
            # original URL, wayback machine URL, wayback timestamp, SIFT features total count, 
            # matched features count, resulting message, OCR count, file sizes, PSNR value
            # and original file
            wayback_url_list = wayback_link_path.split("/")
            wayback_timestamp = wayback_url_list[-2]
            print "wayback timestamp: ", wayback_timestamp
            import datetime
            current_time =  datetime.datetime.utcnow()
            orig_img_name = 'http://127.0.0.1:8000/' + orig_img_name
            #print "original link: ", orig_link
            store_in_file(current_time, execution_time, orig_link_path, wayback_link_path, wayback_timestamp, fc1, fc2, mc, msg, ocr, img1_size, img2_size, psnr_similarity, psnr_threshold, psnr_msg, orig_img_name)
            insert_mongo(current_time, execution_time, orig_link_path, wayback_link_path, wayback_timestamp, fc1, fc2, mc, msg, ocr, img1_size, img2_size, psnr_similarity, psnr_threshold, psnr_msg, orig_img_name)
            return mc, fc1, ocr, psnr_similarity
        else:
           print "Warning: original file could not be retrieved from internet!"
    except Exception, e:
        print "Error:", e, " Please check if Wayback machine and MongoDB are running!" 
def check_blank(orig_link_path):
    print "\noriginal link: ", orig_link_path
    # create image from original link
    orig_img_name = 'orig.png'
    try:
        for line in run_command(
            [PHANTOMJS, RASTERIZE, orig_link_path, orig_img_name]):
            print(line)
        timeit.default_timer  # from last call
        start = timeit.default_timer()
        # compare two images
        img1 = cv2.imread(orig_img_name, 0)
        img1_size = os.path.getsize(orig_img_name)
        # calculate OCR value from original image
        ocr = blank.ocr(orig_img_name)
        # remove temporary images
        os.remove(orig_img_name)
        execution_time = timeit.default_timer() - start
        print "execution time: ", execution_time
        # store in CSV and MongoDB resulting timstamp in ms, execution time in sec,
        # original URL, wayback machine URL, wayback timestamp, SIFT features total count,
        # matched features count, resulting message, OCR count, file sizes
        import datetime
        current_time = datetime.datetime.utcnow()
        return ocr
    except Exception, e:
        print "Error:", e, " Please check if OCR tool is running!"
Example #4
0
def compare_images_by_path_and_link_ext(orig_link_path, wayback_link_name, ocr_flag, ts1=60, ts2=30, psnr_flag=False, psnr_th=None): 
    # create screenshot from Wayback using timestamp
    print "\noriginal link: ", orig_link_path, ", wayback link: ", wayback_link_name, ", ocr flag: ", ocr_flag, ", high ts1: ", ts1, ", low ts2: ", ts2 
    
    # create screenshot from Wayback using timestamp
    wayback_link = urllib.unquote_plus(wayback_link_name)
    
    # create image from Wayback link
    wayback_img_name = "wayback.png"
    convert_url_to_file(wayback_link, wayback_img_name)
    print "wayback file: ", wayback_img_name
    try:
        timeit.default_timer 
        start = timeit.default_timer()
         # compare two images
        img1 = cv2.imread(orig_link_path, 0)
        img1_size = os.path.getsize(orig_link_path)
        img2 = cv2.imread(wayback_img_name, 0)
        img2_size = os.path.getsize(wayback_img_name)
        print "extract features ..."
        fc1, fc2, mc, msg = compare_screenshots.compare_ext(img1, img2, 'sift', ts1, ts2)
        ocr = 0
        if str2bool(ocr_flag):
            # calculate OCR value from original image
            print "perform OCR analysis ..."
            ocr = blank.ocr(orig_link_path)
        psnr_similarity = "DIFFERENT"
        psnr_msg = ""
        if str2bool(psnr_flag):
            # compare images using imagemagick tool and PSNR metric 
            print "perform PSNR analysis ..."
            if psnr_th is None:
                psnr_th = th_psnr
            psnr_similarity, psnr_threshold, psnr_msg = compare_psnr(orig_link_path, wayback_img_name)
        # remove temporary images
   #     os.remove(wayback_img_name)
        execution_time = timeit.default_timer() - start
        print "execution time: ", execution_time
        # store in CSV and MongoDB resulting timstamp in ms, execution time in sec, 
        # original URL, wayback machine URL, wayback timestamp, SIFT features total count, 
        # matched features count, resulting message, OCR count, file sizes
        import datetime
        current_time =  datetime.datetime.utcnow()
        wayback_url_list = wayback_link.split("/")
        wayback_timestamp = wayback_url_list[-2]
        print "wayback timestamp: ", wayback_timestamp
        #print "original link: ", orig_link
        store_in_file(current_time, execution_time, orig_link_path, wayback_link, wayback_timestamp, fc1, fc2, mc, msg, ocr, img1_size, img2_size, psnr_similarity, psnr_threshold, psnr_msg, orig_link_path)
        insert_mongo(current_time, execution_time, orig_link_path, wayback_link, wayback_timestamp, fc1, fc2, mc, msg, ocr, img1_size, img2_size, psnr_similarity, psnr_threshold, psnr_msg, orig_link_path)
        print "result message: ", msg
        return msg
    except Exception, e:
        print "Error:", e, " Please check if Wayback machine and MongoDB are running!" 
def compare_images_by_path_and_link_ext(orig_link_path,
                                        wayback_link_name,
                                        ocr_flag,
                                        ts1=60,
                                        ts2=30,
                                        psnr_flag=False,
                                        psnr_th=None):
    # create screenshot from Wayback using timestamp
    print "\noriginal link: ", orig_link_path, ", wayback link: ", wayback_link_name, ", ocr flag: ", ocr_flag, ", high ts1: ", ts1, ", low ts2: ", ts2

    # create screenshot from Wayback using timestamp
    wayback_link = urllib.unquote_plus(wayback_link_name)

    # create image from Wayback link
    wayback_img_name = "wayback.png"
    convert_url_to_file(wayback_link, wayback_img_name)
    print "wayback file: ", wayback_img_name
    try:
        timeit.default_timer
        start = timeit.default_timer()
        # compare two images
        img1 = cv2.imread(orig_link_path, 0)
        img1_size = os.path.getsize(orig_link_path)
        img2 = cv2.imread(wayback_img_name, 0)
        img2_size = os.path.getsize(wayback_img_name)
        print "extract features ..."
        fc1, fc2, mc, msg = compare_screenshots.compare_ext(
            img1, img2, 'sift', ts1, ts2)
        ocr = 0
        if str2bool(ocr_flag):
            # calculate OCR value from original image
            print "perform OCR analysis ..."
            ocr = blank.ocr(orig_link_path)
        psnr_similarity = "DIFFERENT"
        psnr_msg = ""
        if str2bool(psnr_flag):
            # compare images using imagemagick tool and PSNR metric
            print "perform PSNR analysis ..."
            if psnr_th is None:
                psnr_th = th_psnr
            psnr_similarity, psnr_threshold, psnr_msg = compare_psnr(
                orig_link_path, wayback_img_name)
        # remove temporary images
#     os.remove(wayback_img_name)
        execution_time = timeit.default_timer() - start
        print "execution time: ", execution_time
        # store in CSV and MongoDB resulting timstamp in ms, execution time in sec,
        # original URL, wayback machine URL, wayback timestamp, SIFT features total count,
        # matched features count, resulting message, OCR count, file sizes
        import datetime
        current_time = datetime.datetime.utcnow()
        wayback_url_list = wayback_link.split("/")
        wayback_timestamp = wayback_url_list[-2]
        print "wayback timestamp: ", wayback_timestamp
        #print "original link: ", orig_link
        store_in_file(current_time, execution_time, orig_link_path,
                      wayback_link, wayback_timestamp, fc1, fc2, mc, msg, ocr,
                      img1_size, img2_size, psnr_similarity, psnr_threshold,
                      psnr_msg, orig_link_path)
        insert_mongo(current_time, execution_time, orig_link_path,
                     wayback_link, wayback_timestamp, fc1, fc2, mc, msg, ocr,
                     img1_size, img2_size, psnr_similarity, psnr_threshold,
                     psnr_msg, orig_link_path)
        print "result message: ", msg
        return msg
    except Exception, e:
        print "Error:", e, " Please check if Wayback machine and MongoDB are running!"
def compare_screenshots_sift_ext2(orig_link_path, wayback_link_path, ocr_flag,
                                  psnr_flag):
    # create screenshot from Wayback using timestamp
    print "\noriginal link: ", orig_link_path, ", wayback link: ", wayback_link_path, ", ocr flag: ", ocr_flag, ", psnr flag: ", psnr_flag
    import datetime
    current_file_id = timeit.default_timer()
    # create image from original link
    orig_img_name = 'qa/orig/' + str(current_file_id) + '.png'
    # create image from Wayback link
    wayback_img_name = 'wayback.png'
    try:
        # Set the timer for five seconds for file creation due to programm hanging
        # if file could not be created from the link by internet
        t = Timer(FILE_CREATION_TIME, create_file(orig_link_path,
                                                  orig_img_name))
        t.start()
        tw = Timer(FILE_CREATION_TIME,
                   create_file(wayback_link_path, wayback_img_name))
        tw.start()
        if os.path.exists(orig_img_name):
            start = timeit.default_timer()
            # compare two images
            img1 = cv2.imread(orig_img_name, 0)
            img1_size = os.path.getsize(orig_img_name)
            img2 = cv2.imread(wayback_img_name, 0)
            img2_size = os.path.getsize(wayback_img_name)
            print "extract features ..."
            fc1, fc2, mc, msg = compare_screenshots.compare_ext(
                img1, img2, 'sift', ts1, ts2)
            ocr = 0
            if str2bool(ocr_flag):
                # calculate OCR value from original image
                print "perform OCR analysis ..."
                ocr = blank.ocr(orig_img_name)
            psnr_similarity = "None"
            psnr_threshold = ts_psnr
            psnr_msg = ""
            if str2bool(psnr_flag):
                # compare images using imagemagick tool and PSNR metric
                print "perform PSNR analysis ..."
                psnr_similarity = "DIFFERENT"
                psnr_similarity, psnr_threshold, psnr_msg = compare_psnr(
                    orig_img_name, wayback_img_name)
            # remove temporary images
            os.remove(wayback_img_name)
            execution_time = timeit.default_timer() - start
            print "execution time: ", execution_time
            # store in CSV and MongoDB resulting timstamp in ms, execution time in sec,
            # original URL, wayback machine URL, wayback timestamp, SIFT features total count,
            # matched features count, resulting message, OCR count, file sizes, PSNR value
            # and original file
            wayback_url_list = wayback_link_path.split("/")
            wayback_timestamp = wayback_url_list[-2]
            print "wayback timestamp: ", wayback_timestamp
            import datetime
            current_time = datetime.datetime.utcnow()
            orig_img_name = 'http://127.0.0.1:8000/' + orig_img_name
            #print "original link: ", orig_link
            store_in_file(current_time, execution_time, orig_link_path,
                          wayback_link_path, wayback_timestamp, fc1, fc2, mc,
                          msg, ocr, img1_size, img2_size, psnr_similarity,
                          psnr_threshold, psnr_msg, orig_img_name)
            insert_mongo(current_time, execution_time, orig_link_path,
                         wayback_link_path, wayback_timestamp, fc1, fc2, mc,
                         msg, ocr, img1_size, img2_size, psnr_similarity,
                         psnr_threshold, psnr_msg, orig_img_name)
            return mc, fc1, ocr, psnr_similarity
        else:
            print "Warning: original file could not be retrieved from internet!"
    except Exception, e:
        print "Error:", e, " Please check if Wayback machine and MongoDB are running!"