def ocr_search(request, pk): obj = get_screenshot(request, pk) if not HAS_OCR: return search_results(500, obj) translation = obj.component.source_translation # Load image original_image = Image.open(obj.image.path) # Convert to greyscale original_image = original_image.convert("L") # Resize image (tesseract works best around 300dpi) scaled_image = original_image.copy().resize( [size * 4 for size in original_image.size], Image.BICUBIC) # Find all our strings sources = dict(translation.unit_set.values_list("source", "pk")) strings = tuple(sources.keys()) results = set() # Extract and match strings with c_locale(), PyTessBaseAPI() as api: for image in (original_image, scaled_image): for match in ocr_extract(api, image, strings): results.add(sources[match]) # Close images original_image.close() scaled_image.close() return search_results(200, obj, translation.unit_set.filter(pk__in=results))
def ocr_search(request, pk): obj = get_screenshot(request, pk) if not HAS_OCR: return search_results(500, obj) try: translation = obj.component.translation_set.all()[0] except IndexError: return search_results(500, obj) # Load image original_image = Image.open(obj.image.path) # Convert to greyscale original_image = original_image.convert("L") # Resize image (tesseract works best around 300dpi) scaled_image = original_image.copy().resize( [size * 4 for size in original_image.size], Image.BICUBIC ) # Find all our strings sources = dict(translation.unit_set.values_list('source', 'pk')) strings = tuple(sources.keys()) results = set() # Extract and match strings with c_locale(), PyTessBaseAPI() as api: for image in (original_image, scaled_image): for match in ocr_extract(api, image, strings): results.add(sources[match]) return search_results( 200, obj, translation.unit_set.filter(pk__in=results) )
import difflib from django.contrib.auth.decorators import login_required from django.core.exceptions import PermissionDenied from django.http import JsonResponse from django.utils.translation import ugettext as _ from django.views.decorators.http import require_POST from django.views.generic import ListView, DetailView from django.shortcuts import get_object_or_404, redirect, render from PIL import Image from weblate.utils.locale import c_locale try: with c_locale(): from tesserocr import PyTessBaseAPI, RIL HAS_OCR = True except ImportError: HAS_OCR = False from weblate.screenshots.forms import ScreenshotForm from weblate.screenshots.models import Screenshot from weblate.trans.models import Source from weblate.utils import messages from weblate.utils.views import ComponentViewMixin def try_add_source(request, obj): if 'source' not in request.POST or not request.POST['source'].isdigit(): return False
from django.http import JsonResponse from django.shortcuts import get_object_or_404, redirect, render from django.utils.translation import ugettext as _ from django.views.decorators.http import require_POST from django.views.generic import DetailView, ListView from PIL import Image from weblate.screenshots.forms import ScreenshotForm from weblate.screenshots.models import Screenshot from weblate.trans.models import Source from weblate.utils import messages from weblate.utils.locale import c_locale from weblate.utils.views import ComponentViewMixin try: with c_locale(): from tesserocr import PyTessBaseAPI, RIL HAS_OCR = True except ImportError: HAS_OCR = False def try_add_source(request, obj): if 'source' not in request.POST: return False try: source = Source.objects.get( pk=int(request.POST['source']), component=obj.component )