Esempio n. 1
0
def ocr_search(request, pk):
    obj = get_screenshot(request, pk)
    if not HAS_OCR:
        return search_results(500, obj)
    translation = obj.component.source_translation

    # Load image
    original_image = Image.open(obj.image.path)
    # Convert to greyscale
    original_image = original_image.convert("L")
    # Resize image (tesseract works best around 300dpi)
    scaled_image = original_image.copy().resize(
        [size * 4 for size in original_image.size], Image.BICUBIC)

    # Find all our strings
    sources = dict(translation.unit_set.values_list("source", "pk"))
    strings = tuple(sources.keys())

    results = set()

    # Extract and match strings
    with c_locale(), PyTessBaseAPI() as api:
        for image in (original_image, scaled_image):
            for match in ocr_extract(api, image, strings):
                results.add(sources[match])

    # Close images
    original_image.close()
    scaled_image.close()

    return search_results(200, obj,
                          translation.unit_set.filter(pk__in=results))
Esempio n. 2
0
def ocr_search(request, pk):
    obj = get_screenshot(request, pk)
    if not HAS_OCR:
        return search_results(500, obj)
    try:
        translation = obj.component.translation_set.all()[0]
    except IndexError:
        return search_results(500, obj)

    # Load image
    original_image = Image.open(obj.image.path)
    # Convert to greyscale
    original_image = original_image.convert("L")
    # Resize image (tesseract works best around 300dpi)
    scaled_image = original_image.copy().resize(
        [size * 4 for size in original_image.size],
        Image.BICUBIC
    )

    # Find all our strings
    sources = dict(translation.unit_set.values_list('source', 'pk'))
    strings = tuple(sources.keys())

    results = set()

    # Extract and match strings
    with c_locale(), PyTessBaseAPI() as api:
        for image in (original_image, scaled_image):
            for match in ocr_extract(api, image, strings):
                results.add(sources[match])

    return search_results(
        200,
        obj,
        translation.unit_set.filter(pk__in=results)
    )
Esempio n. 3
0
import difflib

from django.contrib.auth.decorators import login_required
from django.core.exceptions import PermissionDenied
from django.http import JsonResponse
from django.utils.translation import ugettext as _
from django.views.decorators.http import require_POST
from django.views.generic import ListView, DetailView
from django.shortcuts import get_object_or_404, redirect, render

from PIL import Image

from weblate.utils.locale import c_locale

try:
    with c_locale():
        from tesserocr import PyTessBaseAPI, RIL
    HAS_OCR = True
except ImportError:
    HAS_OCR = False

from weblate.screenshots.forms import ScreenshotForm
from weblate.screenshots.models import Screenshot
from weblate.trans.models import Source
from weblate.utils import messages
from weblate.utils.views import ComponentViewMixin


def try_add_source(request, obj):
    if 'source' not in request.POST or not request.POST['source'].isdigit():
        return False
Esempio n. 4
0
from django.http import JsonResponse
from django.shortcuts import get_object_or_404, redirect, render
from django.utils.translation import ugettext as _
from django.views.decorators.http import require_POST
from django.views.generic import DetailView, ListView
from PIL import Image

from weblate.screenshots.forms import ScreenshotForm
from weblate.screenshots.models import Screenshot
from weblate.trans.models import Source
from weblate.utils import messages
from weblate.utils.locale import c_locale
from weblate.utils.views import ComponentViewMixin

try:
    with c_locale():
        from tesserocr import PyTessBaseAPI, RIL
    HAS_OCR = True
except ImportError:
    HAS_OCR = False


def try_add_source(request, obj):
    if 'source' not in request.POST:
        return False

    try:
        source = Source.objects.get(
            pk=int(request.POST['source']),
            component=obj.component
        )