Ejemplo n.º 1
0
def can_run_ocr():
    """Check if pytessaract can communicate with tessaract."""
    try:
        pytesseract.get_tesseract_version()
        return True
    except pytesseract.TesseractNotFoundError:
        return False
Ejemplo n.º 2
0
def ensure_ocr_enabled():
    try:
        pytesseract.get_tesseract_version()
    except pytesseract.TesseractNotFoundError():
        raise HTTPException(
            status_code=400,
            detail="OCR(with Tesseract) is not enabled/installed on the server",
        )
Ejemplo n.º 3
0
 def fullWindow(self):
     self.setWindowIcon(QIcon('icon.png'))
     self.setWindowTitle('Capture')
     flags = Qt.WindowFlags(Qt.FramelessWindowHint | Qt.WindowStaysOnTopHint
                            | Qt.Dialog)
     self.setWindowFlags(flags)
     self.showMaximized()
     self.setStyleSheet(
         "background-color: rgba(255,255,255,0.0); border: 3px solid rgb(16, 229, 125);"
     )
     self.show()
     try:
         pytesseract.get_tesseract_version()
     except:
         self.showTesseractError()
Ejemplo n.º 4
0
    def __init__(self, editor, software_version):
        self.logger = get_logger("gui")
        self.editor = editor
        self.captured_map_coords = None
        self.profile = self.editor.get_profile('')
        self.profile.aircraft = "hornet"
        self.exit_quick_capture = False
        self.values = None
        self.capturing = False
        self.capture_key = self.editor.settings.get("PREFERENCES",
                                                    "capture_key")
        self.software_version = software_version

        tesseract_path = self.editor.settings['PREFERENCES'].get(
            'tesseract_path', "tesseract")
        self.logger.info(f"Tesseract path is: {tesseract_path}")
        pytesseract.pytesseract.tesseract_cmd = tesseract_path
        try:
            self.tesseract_version = pytesseract.get_tesseract_version()
            self.capture_status = "Status: Not capturing"
            self.capture_button_disabled = False
        except pytesseract.pytesseract.TesseractNotFoundError:
            self.tesseract_version = None
            self.capture_status = "Status: Tesseract not found"
            self.capture_button_disabled = True

        self.logger.info(f"Tesseract version is: {self.tesseract_version}")
        self.window = self.create_gui()
Ejemplo n.º 5
0
 def __init__(self) -> None:
     super().__init__()
     self.flagSem = Semaphore(1)
     pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract'
     try:
         print(pytesseract.get_tesseract_version())
     except pytesseract.pytesseract.TesseractNotFoundError:
         messagebox.showinfo(title="tesseract 에러", message="tesseract OCR 프로그램을 감지 할 수 없습니다.")
         exit(0)
Ejemplo n.º 6
0
    def __init__(self, editor, software_version):
        self.logger = get_logger("gui")
        self.editor = editor
        self.captured_map_coords = None
        self.profile = Profile('')
        self.profile.aircraft = "hornet"
        self.exit_quick_capture = False
        self.values = None
        self.capturing = False
        self.capture_key = try_get_setting(self.editor.settings, "capture_key",
                                           "ctrl+t")
        self.quick_capture_hotkey = try_get_setting(self.editor.settings,
                                                    "quick_capture_hotkey",
                                                    "ctrl+alt+t")
        self.enter_aircraft_hotkey = try_get_setting(self.editor.settings,
                                                     "enter_aircraft_hotkey",
                                                     "ctrl+shift+t")
        self.software_version = software_version
        self.is_focused = True
        self.scaled_dcs_gui = False
        self.selected_wp_type = "WP"

        try:
            with open(
                    f"{self.editor.settings.get('PREFERENCES', 'dcs_path')}\\Config\\options.lua",
                    "r") as f:
                dcs_settings = lua.decode(f.read().replace("options = ", ""))
                self.scaled_dcs_gui = dcs_settings["graphics"]["scaleGui"]
        except (FileNotFoundError, ValueError, TypeError):
            self.logger.error("Failed to decode DCS settings", exc_info=True)

        tesseract_path = self.editor.settings['PREFERENCES'].get(
            'tesseract_path', "tesseract")
        self.logger.info(f"Tesseract path is: {tesseract_path}")
        pytesseract.pytesseract.tesseract_cmd = tesseract_path
        try:
            self.tesseract_version = pytesseract.get_tesseract_version()
            self.capture_status = "Status: Not capturing"
            self.capture_button_disabled = False
        except pytesseract.pytesseract.TesseractNotFoundError:
            self.tesseract_version = None
            self.capture_status = "Status: Tesseract not found"
            self.capture_button_disabled = True

        self.logger.info(f"Tesseract version is: {self.tesseract_version}")
        self.window = self.create_gui()
        keyboard.add_hotkey(self.quick_capture_hotkey,
                            self.toggle_quick_capture)
        if self.enter_aircraft_hotkey != '':
            keyboard.add_hotkey(self.enter_aircraft_hotkey,
                                self.enter_coords_to_aircraft)
Ejemplo n.º 7
0
def ocr(filename, genre):
    pytesseract.pytesseract.tesseract_cmd = r'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
    print(pytesseract.get_tesseract_version())
    image_files = os.listdir(os.path.join(target_cleaned, filename))
    image_files.sort(key=natural_keys)
    print(image_files)

    i = 0
    file_ptr = open(os.path.join(target_cleaned, filename, filename) + ".txt", "a")

    for i in range(len(image_files)):
        print(image_files[i])
        img = cv2.imread(os.path.join(target_cleaned, filename, image_files[i]))
        newdata=pytesseract.image_to_osd(Image.open(os.path.join(target_cleaned, filename, image_files[i])), output_type=Output.DICT)
        print(newdata, newdata['rotate'], type(newdata), newdata['orientation'])
        # print(img)
        img = imutils.rotate_bound(img, newdata['rotate'])
        # angle=360-int(re.search('(?<=Rotate: )\d+', pytesseract.image_to_osd(Image.open(os.path.join(target_cleaned, filename, image_files[i])))).group(0))
        # print('anglle is: ', angle)
        # (h, w) = img.shape[:2]

        # if center is None:
        #     center = (w / 2, h / 2)
        # # Perform the rotation
        # M = cv2.getRotationMatrix2D(center, angle, scale)
        # rotated = cv2.warpAffine(img, M, (w, h))
        # cv2.imshow(img,"Properly rotated")
        # rot_data = pytesseract.image_to_osd(Image.open(os.path.join(target_cleaned, filename, image_files[i])));
        # print("[OSD] "+rot_data)
        # rot = re.search('(?<=Rotate: )\d+', rot_data).group(0)
        # angle = float(rot)
        # print('angle rotated: ', angle)

        # # Perform the rotation
        # M = cv2.getRotationMatrix2D(center, angle, scale)
        # rotated = cv2.warpAffine(img, M, (w, h))
        # cv2.imshow(img,"Properly rotated")

        # rotate the image to deskew it
        # rotated = imutils.rotate_bound(Image.open(os.path.join(target_cleaned, filename, image_files[i])), angle) #added


        # #  TODO: Rotated image can be saved here
        # print(pytesseract.image_to_osd(rotated));
        text = pytesseract.image_to_string(img, lang='eng')
        print(len(text))
        # print(text)
        file_ptr.write(text)
    
    file_ptr.close()
    return render_template("spellcheck.html", filename = filename, genre = genre)
Ejemplo n.º 8
0
def get_text(img_path):
    pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
    print('Tesseract version:', pytesseract.get_tesseract_version())

    print('Processing...')
    # Read image
    img = cv2.imread(img_path)

    # Preprocessing image
    # img = get_grayscale(img)
    img = remove_noise(img)
    # img = thresholding(img)
    # img = dilate(img)

    # Tesseract OCR scanning
    """
    'oem' (Optical Engine Method) argument options: 
        0    Legacy engine only.
        1    Neural nets LSTM engine only.
        2    Legacy + LSTM engines.
        3    Default, based on what is available.
    """
    """
    'psm' (Page Segmentation Mode) argument options:
        0    Orientation and script detection (OSD) only.
        1    Automatic page segmentation with OSD.
        2    Automatic page segmentation, but no OSD, or OCR.
        3    Fully automatic page segmentation, but no OSD. (Default)
        4    Assume a single column of text of variable sizes.
        5    Assume a single uniform block of vertically aligned text.
        6    Assume a single uniform block of text.
        7    Treat the image as a single text line.
        8    Treat the image as a single word.
        9    Treat the image as a single word in a circle.
        10    Treat the image as a single character.
        11    Sparse text. Find as much text as possible in no particular order.
        12    Sparse text with OSD.
        13    Raw line. Treat the image as a single text line,
    """

    # 'tessedit_char_blacklist' = mengabaikan karakter yang tidak diinginkan.
    custom_config = r'-l ind+eng -c tessedit_char_blacklist=0123456789 --oem 3 --psm 6'
    text = pytesseract.image_to_string(img, config=custom_config)

    print('Done.')
    print('Scan results:', text)

    return text
Ejemplo n.º 9
0
    def __init__(
        self,
        custom_config=r"--oem 3 --psm 6",
        tesseract_path="",
        verbose=False,
    ):
        self.custom_config = custom_config
        self.tesseract_path = tesseract_path

        # set up tesseract
        if tesseract_path != "":
            pytesseract.tesseract_cmd = tesseract_path

        if verbose:
            print("Tesseract version:", get_tesseract_version())

        return
Ejemplo n.º 10
0
def ocr(pic_name, output_path, ocr_lang='chi_sim'):
    im = cv2.imread(pic_name)
    if im is None:
        print('Image file not exists!')
        exit(-1)
    im = cv2.resize(im,
                    None,
                    fx=1 / 4,
                    fy=1 / 4,
                    interpolation=cv2.INTER_LINEAR)
    print('image size:', im.shape)
    print(ts.get_tesseract_version())
    stri = ts.image_to_string(im, lang=ocr_lang)
    boxes = ts.image_to_boxes(im, lang=ocr_lang)
    data = ts.image_to_data(im, lang=ocr_lang, output_type=ts.Output.DICT)
    #osd = ts.image_to_osd(im, lang='eng')
    df = pd.DataFrame(data)
    df = df[['left', 'width', 'top', 'height', 'text', 'conf']]
    df = df[df['conf'] > 60]
    df = df[df['text'].str.replace(' ', '').replace('\t', '').str.len() > 0]
    df = df.sort_values(['top', 'left'])
    print(df)
    draw_boxes(im, df, pic_name, output_path)
Ejemplo n.º 11
0
def main():
    runningGame()
    print(pytesseract.get_tesseract_version())
    gameStart()
Ejemplo n.º 12
0
'''
Created on 02-Oct-2020

@author: somsh
'''
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
import pytesseract as tess
import pdf2image 
from PIL import Image,ImageEnhance,ImageFilter

pages=pdf2image.convert_from_path('D:\Software\eclipse\jee-2019-12\eclipse-workspace\pdf_word_convert\pdf_word_convert\pp5.pdf',1000)
for page in pages:
    page.save('pp5.jpg','JPEG')
 
im=Image.open("pp5.jpg")
im=im.convert('RGB')
# im=im.filter(ImageFilter.MedianFilter())
# enhancer=ImageEnhance.Contrast(im)
# im=enhancer.enhance(2)
# im=im.convert('1')
im.save("enh_pp5.jpg")

tess.pytesseract.tesseract_cmd='D:/Software/Tesseract-OCR/tesseract.exe'
print(tess.get_tesseract_version())
text=tess.pytesseract.image_to_string('enh_pp5.jpg',lang='ben')
f=open('pp5.txt','w',encoding="utf-8")
f.write(text)
f.close()
Ejemplo n.º 13
0
import cv2
import numpy as np
import pytesseract
import argparse
import os

from imutils import resize, grab_contours
from skimage.filters import threshold_local
from pyimagesearch.transform import four_point_transform

try:
    from PIL import Image
except ImportError:
    import Image

print("Using tesseract version:", pytesseract.get_tesseract_version())

# Process command line arguments:
ap = argparse.ArgumentParser()
ap.add_argument("-i",
                "--image",
                required=True,
                help="Path to image to be parsed")
ap.add_argument("-p",
                "--preprocess",
                type=str,
                default="thresh",
                help="Type of preprocessing used (Default: thresh)")
ap.add_argument("-e",
                "--engine",
                type=str,
Ejemplo n.º 14
0
import cv2
import pytesseract
import tensorflow as tf
import sys
import keras


print("[INFO] python=" + sys.version)
print("[INFO] cv2=" + str(cv2.__version__))
print("[INFO] tensorflow=" + str(tf.__version__))
print("[INFO] keras=" + str(keras.__version__))
print("[INFO] tesseract=" + str(pytesseract.get_tesseract_version()))
Ejemplo n.º 15
0
    def __init__(
            self,
            input_dir=None,
            out_file=None,
            *,
            files_list=None,
            task_class=PdfExtractTask,

            # Config params
            small=False,
            check_input=True,
            chunksize=None,
            saving_interval=5000,
            max_files_memory=3000,
            files_pattern='*.pdf',

            # Task_params
            ocr=False,
            ocr_image_size=None,
            ocr_lang='por',
            features='all',
            image_format='jpeg',
            image_size=None,
            **ray_params):
        self.input_dir = Path(input_dir).resolve() if input_dir else None
        self.files_list = [Path(f) for f in files_list] if files_list else None

        self.out_file = Path(out_file).resolve() if out_file else None

        if check_input:
            self._check_input()

        if not small:
            self._check_outfile()

        if ocr:
            # Will raise exception if tesseract was not found
            get_tesseract_version()

        self.num_cpus = ray_params.get('num_cpus') or os.cpu_count()
        self.ray_params = ray_params
        self.chunksize = chunksize
        self.small = small
        self.max_files_memory = max_files_memory
        self.files_pattern = files_pattern

        self.num_skipped = None

        self.task_class = task_class
        self.task_params = {
            'sel_features': features,
            'ocr': ocr,
            'ocr_lang': ocr_lang,
            'ocr_image_size': ocr_image_size,
            'image_format': image_format,
            'image_size': image_size,
        }

        columns = self.list_columns()
        schema = self.task_class.get_schema(columns)

        max_results_size = saving_interval if not small else None
        self.results = Results(self.input_dir,
                               self.out_file,
                               schema,
                               max_size=max_results_size)

        self.results_queue = Queue(max_files_memory)
Ejemplo n.º 16
0
        print(f"[DEBUG] mapped parts: {parts}")
    return parts


################
#    SETUP
################

# create config file if not already present
if not configPath.exists():
    print("No config found")
    writeConfig()
readConfig()

# pytesseract version
print("[INFO] currently using tesseract: " + str(pytesseract.get_tesseract_version()))

# other languages can be installed by
# sudo apt install tesseract-ocr-[language code]
langs = pytesseract.get_languages(config="")
print(f"[INFO] following languages are availible:\n   {langs}")

# get a numerical sorted list of all files and the total count
numbers = re.compile(r"(\d+)")  # matches numerical token with multiple digits
files = sorted(listdir(path=src_image_path), key=numericalSort)
totalNumOfImages = len(files)
if enable_debug:
    print(f'[INFO] {totalNumOfImages} image files are present in "{src_image_path}"')
    print(f"[INFO] Following files are present:\n    {files}")

################
Ejemplo n.º 17
0
def get_tesseract_version() -> version.Version:
    """Get info abput tesseract setup."""
    tesseract_version = str(
        pytesseract.get_tesseract_version()).splitlines()[0]
    return version.parse(tesseract_version)
Ejemplo n.º 18
0
def read_root():
    return pytesseract.get_tesseract_version()
Ejemplo n.º 19
0
import cv2  # 3.4.2
import pytesseract  # 5.0.0-alpha.20200328
from PIL import Image, ImageEnhance

print(cv2.__version__)
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
print(pytesseract.get_tesseract_version())

orig_name = "imgs/test_marker.jpg"
rotated = "imgs/rotated.jpg"

resize = False
params = '-c tessedit_char_whitelist=0123456789. --psm 11 --dpi 72'

img = cv2.imread(rotated)

height, width, channels = img.shape

imgResized = img
if resize:
    imgResized = cv2.resize(img, (width * 3, height * 3))
    im = Image.fromarray(imgResized)

cv2.imshow("img", imgResized)
cv2.waitKey()

data = pytesseract.image_to_string(img, config=params)
print(data)

h, w, c = img.shape
boxes = pytesseract.image_to_boxes(img, lang='rus', config=params)
Ejemplo n.º 20
0
    except (SystemError, NameError):
        trayicon = QtWidgets.QSystemTrayIcon(
            QtGui.QIcon(
                QtGui.QPixmap.fromImage(QtGui.QImage(1, 1, QtGui.QImage.Format_Mono))
            )
        )
        trayicon.show()
        trayicon.showMessage("TextShot", msg, QtWidgets.QSystemTrayIcon.NoIcon)
        trayicon.hide()


if __name__ == "__main__":
    QtCore.QCoreApplication.setAttribute(Qt.AA_DisableHighDpiScaling)
    app = QtWidgets.QApplication(sys.argv)
    try:
        pytesseract.get_tesseract_version()
    except EnvironmentError:
        notify(
            "Tesseract is either not installed or cannot be reached.\n"
            "Have you installed it and added the install directory to your system path?"
        )
        print(
            "ERROR: Tesseract is either not installed or cannot be reached.\n"
            "Have you installed it and added the install directory to your system path?"
        )
        sys.exit()

    window = QtWidgets.QMainWindow()
    snipper = Snipper(window)
    snipper.show()
    sys.exit(app.exec_())
Ejemplo n.º 21
0
		#save the dataframe(pdf) data into csv
		save_to_csv(df,PARSE_DATA_CSVS+pdf_file_name_without_ext+".csv")
		print("CSV saved")

	except Exception as e:
		print('ERROR:', e, pdf_file_name_without_ext)
		traceback.print_exc()
	finally:
		print("Clean up working files...")
		shutil.rmtree(input_pdf_images_path, ignore_errors=True)
		shutil.rmtree(input_images_blocks_path, ignore_errors=True)

	end_time = time.time()

	return pdf_file_name_without_ext, end_time - begin_time

if __name__ == '__main__':

	print('Tesseract Version:', pytesseract.get_tesseract_version())
	print('multiprocessing cpu_count:', multiprocessing.cpu_count())
	print('os cpu_count:', os.cpu_count())
	print('sched_getaffinity:', len(os.sched_getaffinity(0)))

	#a_pool = multiprocessing.Pool(multiprocessing.cpu_count())
	#results = a_pool.map(pdf_process, state_pdfs_files)

	with MPIPoolExecutor() as executor:
		results = executor.map(pdf_process, state_pdfs_files)
		for res in results:
			print(res)
Ejemplo n.º 22
0
    print(filename)
    print("Removing the MSS screenshot.")
    import os
    os.remove('mss_fullscreen.png')
except (IOError, OSError) as e:
    print bcolors.FAIL + "ERROR" + bcolors.ENDC
    print e
except:
    print("Unexpected error:", sys.exc_info()[0])
    raise
else:
    print ""
    print "Version: " + mss.__version__
    print ""
    print bcolors.BOLD + bcolors.OKGREEN + "MSS working correctly." + bcolors.ENDC

print "_____________________________________________________________________________________"
print ""
print bcolors.OKBLUE + "Testing Pytesseract" + bcolors.ENDC
print ""

try:
    print "pytesseract.get_tesseract_version()"
    print pytesseract.get_tesseract_version()
except (IOError, OSError) as e:
    print bcolors.FAIL + "ERROR" + bcolors.ENDC
    print e
else:
    print("")
    print bcolors.BOLD + bcolors.OKGREEN + "Pytesseract working correctly." + bcolors.ENDC
Ejemplo n.º 23
0
def hasOCR():
    try:
        pytesseract.get_tesseract_version()
        return True
    except:
        return False
Ejemplo n.º 24
0
def handler_tesseract():
    result_bucket = []

    if request.method == "GET":

        html = "<title>Tesseract</title>" \
               "<h2>Hello, Tesseract Server!</h2>" \
               "<i>Current Ver: %s </i>" % pytesseract.get_tesseract_version()

        return html

    elif request.method == "POST":
        b64_img = request.form.get("b64_img")
        files = request.files

        if not b64_img and not files:
            result_bucket.append(
                dict(result=0, err="Missing params: b64_img or files ."))

        elif files:
            for fk in files:
                file = files[fk]

                async_result = ocr_tesseract.apply_async(
                    kwargs=dict(im_buff=BytesIO(file.read())),
                    serializer="pickle",
                )

                response = dict(origin_name=fk)
                if async_result.status == "SUCCESS":
                    response["result"] = 1
                    response["txt"] = async_result.result

                else:
                    response["result"] = 0

                response["celery_id"] = async_result.task_id
                response["celery_st"] = async_result.status

                result_bucket.append(response)

        elif b64_img:
            decode_data = base64.b64decode(b64_img)

            async_result = ocr_tesseract.apply_async(
                kwargs=dict(im_buff=BytesIO(decode_data)),
                serializer="pickle",
            )

            response = dict()
            if async_result.status == "SUCCESS":
                response["result"] = 1
                response["txt"] = async_result.result

            else:
                response["result"] = 0

            response["celery_id"] = async_result.task_id
            response["celery_st"] = async_result.status

            result_bucket.append(response)

        return jsonify(result_bucket)
Ejemplo n.º 25
0
def main():
    args = parse_args()

    if int(str(pytesseract.get_tesseract_version())[0]) < 4:
        sys.exit('Tesseract 4.0.0 or greater required!')

    if args.everything:
        jetson_UART = "/dev/ttyTHS1"
        drawer = drw.Drawer(jetson_UART)

    cam = cv.VideoCapture(0, cv.CAP_V4L2)
    cam.set(3, 1280)  # height
    cam.set(4, 720)  # width

    xyz = capture_image(cam)
    xyz_params = chessboard_calibrate('calibration_dummy', 6, 8, debug=False)
    ret, mtx, dist, rvecs, tvecs = xyz_params
    h, w = xyz.shape[:2]
    newcameramtx, roi = cv.getOptimalNewCameraMatrix(mtx, dist, (w, h), 1,
                                                     (w, h))
    xyz = cv.undistort(xyz, mtx, dist, None, newcameramtx)
    x, y, w, h = roi
    xyz = xyz[y:y + h, x:x + w]
    xy_check = i2wt.uv_to_xy(xyz, xyz_params, [], True)
    display(xy_check[0])
    print(xy_check[1])

    while True:
        try:
            if args.image:
                img = cv.imread(args.image)
            else:
                img = capture_image(cam)

            # Camera Calibration
            # param order ret, mtx, dist, rvecs, tvecs
            if not args.image:
                params = chessboard_calibrate('calibration', 6, 8, debug=False)
                ret, mtx, dist, rvecs, tvecs = params
                h, w = img.shape[:2]
                newcameramtx, roi = cv.getOptimalNewCameraMatrix(
                    mtx, dist, (w, h), 1, (w, h))
                img = cv.undistort(img, mtx, dist, None, newcameramtx)
                x, y, w, h = roi
                img = img[y:y + h, x:x + w]

                #display(img, 'Calibration Output')

            img = remove_shadow(img)
            puzzle, bank, x_offset, y_offset = segment(img, True)

            detected_puzzle, detected_bank, char_coords = tesseract(puzzle,
                                                                    bank,
                                                                    x_offset,
                                                                    y_offset,
                                                                    debug=True,
                                                                    img=img)
            solved_word_points = permutative_solve(detected_bank,
                                                   detected_puzzle)
            print(solved_word_points)
            solved_uv_points = i2wt.wordsearch_to_uv(char_coords,
                                                     solved_word_points)

            print(
                solved_uv_points
            )  #char_coords[solved_uv_points[0][0][0]][solved_uv_points[0][0][1]][2])

            #solved_uv_points = [[[[468 ], [222]],[[470],[642]]], [[[764],[446]], [[1064],[220]]]]
            to_MSP_points = i2wt.uv_to_xy(xyz, xyz_params, solved_uv_points,
                                          False)
            display(to_MSP_points[0])
            if args.everything:
                scaling_factor_x = 0.22
                scaling_factor_y = 0.22
                start_offset_x = 3.75
                start_offset_y = 6.6
                drawer.read(1)
                for point_pair in to_MSP_points[1]:
                    x1 = int(
                        round((point_pair[0][0] + start_offset_x) /
                              scaling_factor_x))
                    y1 = int(
                        round((point_pair[0][1] + start_offset_y) /
                              scaling_factor_y))
                    x2 = int(
                        round((point_pair[1][0] + start_offset_x) /
                              scaling_factor_x))
                    y2 = int(
                        round((point_pair[1][1] + start_offset_y) /
                              scaling_factor_y))
                    to_draw = [(x1, y1), (x2, y2)]
                    drawer.draw(to_draw)
                    drawer.read(1)
                drawer.send(255)
                cv.destroyAllWindows()
        except Exception as e:
            print(e)
        except (KeyboardInterrupt):
            print('See ya later!')
            if args.everything:
                drawer.cleanup()
            break
Ejemplo n.º 26
0
def extract_time(video, log):

    print(pytesseract.get_tesseract_version())
    threshold_error = timedelta(hours=1, minutes=0)
    ocr_time_failed = False
    file_time_failed = False
    file_name_time = None
    file_name_date = None
    default_time = timedelta(hours=9, minutes=0)
    default_date = "2020-05-28"

    try:
        file_name_date, file_name_time = extract_date(video)
        ## check date format
        date_match = datetime.strptime(file_name_date, "%Y-%m-%d")
    except Exception as e:
        log.write("ERROR in extracting the date-time from the file_name\n")
        log.write(str(e) + "\n")
        file_time_failed = True

    try:
        video_object = cv2.VideoCapture(video)
        print(video)
        ret, frame = video_object.read()
        print(ret)
        ocr_time_stamp = get_timestamp(frame)
        ocr_date, ocr_time = clean_OCR_Time(ocr_time_stamp)
    except Exception as e:
        log.write(video + "ERROR in extracting the date-time from the OCR\n")
        log.write(str(e) + "\n")
        ocr_time_failed = True

    if (file_time_failed and ocr_time_failed):
        log.write("Using a default time_stamp " + default_date + 'T' +
                  str(default_time) + "\n")
        return (default_date, default_time)

    elif ocr_time_failed:
        log.write("Using file extracted time_stamp " + file_name_date + "T" +
                  str(file_name_time) + "\n")
        file_name_date, file_name_time = convert_to_UTC(
            file_name_date, file_name_time)
        return (file_name_date, file_name_time)

    elif file_time_failed:
        log.write("Using OCR extracted time_stamp and OCR date " + ocr_date +
                  "T" + str(ocr_time) + "\n")
        ocr_date, ocr_time = convert_to_UTC(ocr_date, ocr_time)
        return (ocr_date, ocr_time)

    else:
        if abs(ocr_time - file_name_time) < threshold_error:
            log.write("Using OCR timestamp " + file_name_date + "T" +
                      str(ocr_time) + "\n")
            file_name_date, ocr_time = convert_to_UTC(file_name_date, ocr_time)
            return (file_name_date, ocr_time)
        else:
            log.write("Using file_name timestamp " + file_name_date + "T" +
                      str(file_name_time) + "\n")
            file_name_date, file_name_time = convert_to_UTC(
                file_name_date, file_name_time)
            return (file_name_date, file_name_time)
Ejemplo n.º 27
0
#!/usr/bin/env python

import platform
import os

print("Platform {}".format(platform.platform()))
print("Python v {}".format(platform.python_version()))

from PyQt5 import QtCore;
print("Successfully installed PyQt v. {}".format(QtCore.PYQT_VERSION_STR))

import vtk
print("Successfully installed vtk v. {}".format(vtk.vtkVersion.GetVTKSourceVersion()))

import pytesseract
print("Successfully installed pytesseract v. {}".format(pytesseract.get_tesseract_version()))

try:
    import caffe
    print("Successfully installed pycaffe")
except:
    print("Error: pycaffe not installed ! (python3?)")



Ejemplo n.º 28
0
    def get_pixelsize(self, debug=False):
        """
        Reads the scalebar from images of the Tecnai TEM microscopes using 
        text recognition via pytesseract or with manual input when pytesseract
        is not installed

        Parameters
        ----------
        debug : bool, optional
            enable debug mode which prints extra information and figures to
            troubleshoot any issues with calibration. The default is False.

        Returns
        -------
        pixelsize : float
            the pixelsize in calibrated (physical) units
        unit : string
            the physical unit of the pixelsize

        """
        import re
        
        #find contour corners sorted left to right
        if len(self.scalebar) == 0:
            print('[WARNING] tecnai.get_pixelsize: original scale bar not found!')
            pixelsize = float(input('Please give pixelsize in nm: '))
            self.unit = 'nm'
            self.pixelsize = pixelsize
            return pixelsize,'nm'
        else:
            if int(cv2.__version__[0]) >= 4:
                corners,_ = cv2.findContours(self.scalebar,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)
            else:
                _,corners,_ = cv2.findContours(self.scalebar,cv2.RETR_LIST,cv2.CHAIN_APPROX_SIMPLE)
            corners = sorted(corners, key=lambda c: cv2.boundingRect(c)[0])
        
        #length in pixels between bottom left corners of vertical bars
        barlength = corners[0][7,0,0]-corners[0][1,0,0]
        
        if debug:
            import matplotlib.pyplot as plt
            print('\n------- DEBUGGING IMAGE CALIBRATION -------')
            print('- length:',barlength,'pixels')
            plt.figure('[DEBUG MODE] scale bar corners')
            plt.imshow(self.scalebar)
            plt.scatter(corners[0][:,0,0],corners[0][:,0,1],color='r',label='corners')
            plt.scatter(corners[0][[1,7],0,0],corners[0][[1,7],0,1],color='green',label='used for calibration')
            plt.legend()
            plt.show(block=False)
        
        #take the text of the databar
        bartext = self.scalebar[:,
            min(corners[1][:,0,0])-int(6*self.shape[1]/1024):\
                max(corners[-1][:,0,0])+int(6*self.shape[1]/1024+1)
        ]
        bartext = bartext.max() - bartext
        
        #upscale if needed for OCR
        if self.shape[1] < 4096:
            if self.shape[1] < 2048:
                factor = 4
            else:
                factor = 2
            bartextshape = np.shape(bartext)
            bartext = cv2.resize(
                bartext,
                (factor*bartextshape[1],factor*bartextshape[0]),
                interpolation = cv2.INTER_CUBIC
            )
            bartext = cv2.erode(
                cv2.threshold(bartext,0,255,cv2.THRESH_BINARY+cv2.THRESH_OTSU)[1],
                np.ones((5,5),np.uint8)
            )
            if debug:
                print('- preprocessing text, resizing text image from',bartextshape,'to',np.shape(bartext))
        
        try:
            #load tesseract-OCR for reading the text
            import pytesseract
            
            #switch error handling from a ValueError (we may also raise later
            #in case of text recognition problems) to one we can only raise 
            #here, so we can give the correct warning
            try:
                tesseract_version = float(str(pytesseract.get_tesseract_version())[:3])
            except ValueError:
                raise FileNotFoundError
            
            #settings vary per version, so use tesseract_verion to use correct
            if tesseract_version == 4.0:
                text = pytesseract.image_to_string(
                    bartext,
                    config="--oem 0 -c tessedit_char_whitelist=0123456789pnuµm --psm 7"
                )
                #oem 0 selects older version of tesseract which still takes the char_whitelist param
                #tessedit_char_whitelist takes list of characters it searches for (to reduce reading errors)
                #psm 7 is a mode that tells tesseract to assume a single line of text in the image
            else:
                text = pytesseract.image_to_string(
                    bartext,
                    config="-c tessedit_char_whitelist=0123456789pnuµm --psm 7"
                )
                #since version 4.1 char whitelist is added back
            
            text = text.replace('\x0c','')
            if debug:
                plt.figure('[DEBUG MODE] scale bar text')
                plt.imshow(bartext)
                plt.show(block=False)
                print('- text:',text)
                
            #split value and unit
            value = float(re.findall(r'\d+',text)[0])
            unit = re.findall(r'[a-z]+',text)[0]
        
        #give different warnings for missing installation or reading problems
        except ImportError:
            print('pytesseract not found, defaulting to manual mode')
            unit = input('give scale bar unit: ')
            value = float(input('give scale bar size in '+unit+': '))
        except FileNotFoundError:
            print('[WARNING] tecnai.get_pixelsize(): tesseract OCR engine was'+
                  ' not found by pytesseract. Switching to manual mode.')
            unit = input('give scale bar unit: ')
            value = float(input('give scale bar size in '+unit+': '))
        except:
            print('[WARNING] tecnai.get_pixelsize(): could not read scale bar'+
                  ' text, perhaps try debug=True. Switching to manual mode.')
            unit = input('give scale bar unit: ')
            value = float(input('give scale bar size in '+unit+': '))
        
        if unit == 'um':
            unit = 'µm'
        
        #determine pixelsize
        pixelsize = value/barlength
        
        if debug:            
            print('- value:',value)
            print('- unit:',unit)
            print('- 2 figures created')
            print('-------------------------------------------\n')
        
        print('Original scale bar: {:.3g}'.format(value),unit)
        print('Pixel size: {:.5g}'.format(pixelsize),unit)
        
        self.pixelsize = pixelsize
        self.unit = unit
        self.scalebarlength = value
        self.scalebarlength_px = barlength
        
        return pixelsize,unit
Ejemplo n.º 29
0
if numpy_installed:
    import numpy as np

if pandas_installed:
    import pandas

try:
    from PIL import Image
except ImportError:
    import Image

IS_PYTHON_2 = version_info[:1] < (3, )
IS_PYTHON_3 = not IS_PYTHON_2

TESSERACT_VERSION = tuple(get_tesseract_version().version)  # to skip tests

DATA_DIR = path.join(path.dirname(path.abspath(__file__)), 'data')
TEST_JPEG = path.join(DATA_DIR, 'test.jpg')

pytestmark = pytest.mark.pytesseract  # used marker for the module
string_type = unicode if IS_PYTHON_2 else str  # noqa: 821


@pytest.fixture(scope='session')
def test_file():
    return TEST_JPEG


@pytest.fixture(scope='session')
def test_invalid_file():
Ejemplo n.º 30
0
# OpenCV(4.1.0)                                 pip3 install opencv-python
# Python Image Library -> Pillow(6.0.0)         pip3 install pillow
# Numpy(1.16.2)                                 pip3 install numpy
# Scipy(1.2.1)                                  pip3 install scipy
# Matplotlib(3.0.3)                             pip3 install matplotlib
# Imutils(0.5.2)                                pip3 install imutils
# Tesseract(4.0.0)                              pip3 install pytesseract
# Sklearn(0.21.3)                               pip3 install -U scikit-learn
# Utils(0.21.3)                               pip3 install utils

import cv2
import platform
import PIL.Image as Image
import numpy as np
import scipy
import matplotlib as mpl
import imutils
import pytesseract
import sklearn


print("You are running python", platform.python_version())
print("You are running opencv", cv2.__version__)
print("You are running pillow", Image.PILLOW_VERSION)
print("You are running numpy", np.version.version)
print("You are running scipy", scipy.version.version)
print("You are running matplotlib", mpl.__version__)
print("You are running imutils", imutils.__version__)
print("You are running tesseract", pytesseract.get_tesseract_version())
print("You are running sklearn", sklearn.__version__)