Exemple #1
0
def translate(text):
    """Attempt to translate text using food_words and then the CEDICT dictionary."""
    start = dt.datetime.now()
    words = find_words(text)
    start = time_elapsed("Find words", start)
    results = check_words(words)
    start = time_elapsed("Check words", start)
    return results
Exemple #2
0
def translate(text):
    """Attempt to translate text using food_words and then the CEDICT dictionary."""
    start = dt.datetime.now()
    words = find_words(text)
    start = time_elapsed("Find words", start)
    results = check_words(words)
    start = time_elapsed("Check words", start)
    return results
Exemple #3
0
def search(text):
    """Takes a string and returns dish information or translation"""

    # timing information, can delete later
    start = datetime.datetime.now()

    # Returns search data for a particular query.
    results = search_dish_name(text)
    time_elapsed("Search and translate", start)

    return json.dumps(results)
Exemple #4
0
def search(text):
    """Takes a string and returns dish information or translation"""

    # timing information, can delete later
    start = datetime.datetime.now()

    # Returns search data for a particular query.
    results = search_dish_name(text)
    time_elapsed("Search and translate", start)

    return json.dumps(results)
Exemple #5
0
def preprocess_image(path):
    """Do standard preprocessing on image before sending to Tesseract."""
    start = dt.datetime.now()

    # Steps using PIL
    im = Image.open(path)

    im = smooth_and_grayscale(im)
    start = time_elapsed("Smoothing", start)

    im = binarize(im)
    start = time_elapsed("Binarization", start)

    im.save(path)
Exemple #6
0
def preprocess_image(path):
    """Do standard preprocessing on image before sending to Tesseract."""
    start = dt.datetime.now()

    # Steps using PIL
    im = Image.open(path)

    im = smooth_and_grayscale(im)
    start = time_elapsed("Smoothing", start)

    im = binarize(im)
    start = time_elapsed("Binarization", start)

    im.save(path)
Exemple #7
0
def upload():
    """Handler for uploading an image, processing and sending to Tesseract."""

    if request.data:
        file = request.data
        now = datetime.datetime.utcnow()

        # Create a unique filename for the image.
        filename = now.strftime('%Y%m%d%M%S') + '.png'
        image_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)

        # write the image data to the file
        with open(image_path, 'wb') as f:
            f.write(file)

        # timing information for performance, can delete later
        start = datetime.datetime.now()
        start = time_elapsed("Writing file", start)

        # do some preprocessing on the image to optimize it for Tesseract
        preprocess_image(image_path)
        start = time_elapsed("Preprocessing", start)

        # run the image through tesseract and extract text
        text = image_file_to_string(image_path,
                                    lang="chi_sim",
                                    graceful_errors=True)
        text = text.strip()
        start = time_elapsed("Tesseract", start)

        if not text:
            # if Tesseract returns nothing, do some additional processing to see if results improve.
            smooth_and_thin_image(image_path)

            # run through tesseract again
            text = image_file_to_string(image_path,
                                        lang=LANG,
                                        graceful_errors=True)
            text = text.strip()
            start = time_elapsed("Tesseract", start)

            if not text:
                # if still no text from Tesseract, send an error to the client
                error_data = {"error": "No results found. Please try again."}
                return json.dumps(error_data)

        # if text was received, redirect to search
        return redirect(url_for("search", text=text))
Exemple #8
0
def upload():
    """Handler for uploading an image, processing and sending to Tesseract."""

    if request.data:
        file = request.data
        now = datetime.datetime.utcnow()

        # Create a unique filename for the image.
        filename = now.strftime('%Y%m%d%M%S') + '.png'
        image_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)

        # write the image data to the file
        with open(image_path, 'wb') as f:
            f.write(file)

        # timing information for performance, can delete later
        start = datetime.datetime.now()
        start = time_elapsed("Writing file", start)

        # do some preprocessing on the image to optimize it for Tesseract
        preprocess_image(image_path)
        start = time_elapsed("Preprocessing", start)

        # run the image through tesseract and extract text
        text = image_file_to_string(image_path, lang="chi_sim", graceful_errors=True)
        text = text.strip()
        start = time_elapsed("Tesseract", start)

        if not text:
            # if Tesseract returns nothing, do some additional processing to see if results improve.
            smooth_and_thin_image(image_path)

            # run through tesseract again
            text = image_file_to_string(image_path, lang=LANG, graceful_errors=True)
            text = text.strip()
            start = time_elapsed("Tesseract", start)

            if not text:
                # if still no text from Tesseract, send an error to the client
                error_data = {"error": "No results found. Please try again."}
                return json.dumps(error_data)

        # if text was received, redirect to search
        return redirect(url_for("search", text=text))
Exemple #9
0
def search_dish_name(text):
    """Searches for text in the dishes database. If not found, translates text and
    looks for similar dishes in database. Returns JSON data for dish or search results."""

    # timing information, can delete later.
    start = dt.datetime.now()

    results = {}
    if type(text) != unicode:
        text = text.decode('utf-8')
    if len(text) > 10:
        # Most dish names are 3-5 characters. 
        # If Tesseract returned more than 10 characters, something probably went wrong.
        print "Input text is too long."
        return None
    else:
        # Find a matching dish, if it exists.
        match = Dish.find_match(text)
        if match:
            # If result is found, return JSON representation of dish.
            results = match.get_json()
            start = time_elapsed("Dish lookup", start)
        else:
            # If no dish is found, return translation data and similar dishes, if they exist.
            translation = translate(text)
            start = time_elapsed("Translation", start)
            results['translation'] = translation

            # Find similar dishes and add to results.
            if len(text) > 1:
                similar_dishes = Dish.find_similar(text)
                start = time_elapsed("Similar dish lookup", start)
                similar_json = []            
                for similar_dish in similar_dishes:
                    dish_data = similar_dish.get_json_min()
                    similar_json.append(dish_data)

                if similar_json != []:
                    results['similar'] = similar_json

    return results
Exemple #10
0
def search_dish_name(text):
    """Searches for text in the dishes database. If not found, translates text and
    looks for similar dishes in database. Returns JSON data for dish or search results."""

    # timing information, can delete later.
    start = dt.datetime.now()

    results = {}
    if type(text) != unicode:
        text = text.decode('utf-8')
    if len(text) > 10:
        # Most dish names are 3-5 characters.
        # If Tesseract returned more than 10 characters, something probably went wrong.
        print "Input text is too long."
        return None
    else:
        # Find a matching dish, if it exists.
        match = Dish.find_match(text)
        if match:
            # If result is found, return JSON representation of dish.
            results = match.get_json()
            start = time_elapsed("Dish lookup", start)
        else:
            # If no dish is found, return translation data and similar dishes, if they exist.
            translation = translate(text)
            start = time_elapsed("Translation", start)
            results['translation'] = translation

            # Find similar dishes and add to results.
            if len(text) > 1:
                similar_dishes = Dish.find_similar(text)
                start = time_elapsed("Similar dish lookup", start)
                similar_json = []
                for similar_dish in similar_dishes:
                    dish_data = similar_dish.get_json_min()
                    similar_json.append(dish_data)

                if similar_json != []:
                    results['similar'] = similar_json

    return results
Exemple #11
0
def smooth_and_thin_image(path):
    """Extra preprocessing, including smoothing, acute angle emphasis and thinning."""

    start = dt.datetime.now()

    # Steps using PIL
    im = Image.open(path)

    # Steps using scikit-image
    pix = im_to_trutharray(im)

    # Stentiford preprocessing for image thinning.
    smooth_and_emphasize_angles(pix)
    start = time_elapsed("Stentiford preprocessing", start)

    # Thinning
    pix = thin_image(pix)
    start = time_elapsed("Thinning", start)

    im = trutharray_to_im(pix)

    im.save(path)
Exemple #12
0
def smooth_and_thin_image(path):
    """Extra preprocessing, including smoothing, acute angle emphasis and thinning."""

    start = dt.datetime.now()

    # Steps using PIL
    im = Image.open(path)

    # Steps using scikit-image
    pix = im_to_trutharray(im)

    # Stentiford preprocessing for image thinning.
    smooth_and_emphasize_angles(pix)
    start = time_elapsed("Stentiford preprocessing", start)

    # Thinning
    pix = thin_image(pix)
    start = time_elapsed("Thinning", start)

    im = trutharray_to_im(pix)

    im.save(path)