Python translateString 예제들, louis.translateString Python 예제들

예제 #1

0

파일 보기

파일: views.py 프로젝트: BlissfulEscape/daisyproducer

def edit_global_words_with_missing_braille(request):

    WordFormSet = formset_factory(GlobalWordBothGradesForm, extra=0)

    if request.method == 'POST':
        formset = WordFormSet(request.POST)
        if formset.is_valid():
            for form in formset.forms:
                GlobalWord.objects.create(
                    untranslated=form.cleaned_data['untranslated'], 
                    braille=form.cleaned_data['grade2'] if form.cleaned_data['original_grade'] == 1 else form.cleaned_data['grade1'],
                    grade=2 if form.cleaned_data['original_grade'] == 1 else 1,
                    type=form.cleaned_data['type'],
                    homograph_disambiguation=form.cleaned_data['homograph_disambiguation'])
            return HttpResponseRedirect(reverse('dictionary_edit_global_words_with_missing_braille'))
        else:
            return render_to_response('dictionary/edit_missing_globals.html', locals(),
                                      context_instance=RequestContext(request))

    WORDS_WITH_MISSING_BRAILLE = """
SELECT l.* 
FROM dictionary_globalword AS l
WHERE NOT EXISTS
      (
      SELECT NULL
      FROM dictionary_globalword AS r
      WHERE
	l.untranslated = r.untranslated AND 
      	l.type = r.type AND
      	l.homograph_disambiguation = r.homograph_disambiguation AND
      	l.grade != r.grade
      )
ORDER BY l.untranslated
"""
    single_grade_words = GlobalWord.objects.raw(WORDS_WITH_MISSING_BRAILLE)
    missing_words = [{'untranslated': smart_unicode(word.untranslated),
                      'original_grade': word.grade,
                      'grade1': smart_unicode(word.braille) if word.grade == 1 else louis.translateString(getTables(1), smart_unicode(word.untranslated)),
                      'grade2': smart_unicode(word.braille) if word.grade == 2 else louis.translateString(getTables(2), smart_unicode(word.untranslated)),
                      'type' : word.type,
                      'homograph_disambiguation': smart_unicode(word.homograph_disambiguation)}
                     for word in single_grade_words]

    paginator = Paginator(missing_words, MAX_WORDS_PER_PAGE)
    try:
        page = int(request.GET.get('page', '1'))
    except ValueError:
        page = 1
    
    try:
        words = paginator.page(page)
    except InvalidPage:
        words = paginator.page(paginator.num_pages)

    formset = WordFormSet(initial=words.object_list)
    return render_to_response('dictionary/edit_missing_globals.html', locals(),
                              context_instance=RequestContext(request))

예제 #2

0

파일 보기

파일: brailleTables.py 프로젝트: BlissfulEscape/daisyproducer

def writeWhiteListTables(words):
    writeTable('sbs-de-g1-white.mod', 
               ((smart_unicode(word.homograph_disambiguation).replace('|', unichr(0x250A)) if word.type == 5 else word.untranslated, word.braille) 
                for word in words.filter(grade=1).iterator()), 
               lambda word: louis.translateString(getTables(1), word))
    writeTable('sbs-de-g2-white.mod', 
               ((smart_unicode(word.homograph_disambiguation).replace('|', unichr(0x250A)) if word.type == 5 else word.untranslated, word.braille) 
                for word in words.filter(grade=2).filter(type__in=(0, 1, 3, 5)).iterator()), 
               lambda word: louis.translateString(getTables(2), word))
    writeTable('sbs-de-g2-name-white.mod', ((word.untranslated, word.braille) for word in words.filter(grade=2).filter(type__in=(1,2))), 
               lambda word: louis.translateString(getTables(2, name=True), word))
    writeTable('sbs-de-g2-place-white.mod', ((word.untranslated, word.braille) for word in words.filter(grade=2).filter(type__in=(3,4))),
               lambda word: louis.translateString(getTables(2, place=True), word))

예제 #3

0

파일 보기

파일: brailleTables.py 프로젝트: BlissfulEscape/daisyproducer

def writeLocalTables(changedDocuments):
    for document in changedDocuments:
        words = LocalWord.objects.filter(document=document).order_by('untranslated')
        writeTable('sbs-de-g1-white-%s.mod' % document.identifier, 
                   ((smart_unicode(word.homograph_disambiguation).replace('|', unichr(0x250A)) if word.type == 5 else word.untranslated, word.braille) 
                    for word in words.filter(grade=1).iterator()),
                   lambda word: louis.translateString(getTables(1), word))
        writeTable('sbs-de-g2-white-%s.mod' % document.identifier, 
                   ((smart_unicode(word.homograph_disambiguation).replace('|', unichr(0x250A)) if word.type == 5 else word.untranslated, word.braille) 
                    for word in words.filter(grade=2).filter(type__in=(0, 1, 3, 5)).iterator()),
                   lambda word: louis.translateString(getTables(2), word))
        writeTable('sbs-de-g2-name-white-%s.mod' % document.identifier, 
                   ((word.untranslated, word.braille) for word in words.filter(grade=2).filter(type__in=(1,2))),
                   lambda word: louis.translateString(getTables(2, name=True), word))
        writeTable('sbs-de-g2-place-white-%s.mod' % document.identifier, 
                   ((word.untranslated, word.braille) for word in words.filter(grade=2).filter(type__in=(3,4))),
                  lambda word: louis.translateString(getTables(2, place=True), word))

예제 #4

0

파일 보기

def translate(table, word):
    braille = louis.translateString(table, word)
    # Unfortunately hyphenation marks sneaked back into the German
    # core liblouis tables, so we have to remove them here. They are
    # only used in the actual translation to sbsform but are not
    # wanted when proposing braille translations to the user for
    # unknown words.
    braille = braille.replace('t', '').replace('w', '').replace('a', '').replace('n','')
    return braille

예제 #5

0

파일 보기

파일: test_conversion.py 프로젝트: Bristol-Braille/calibre-pef-plugin

 def test_grade2_text_conversion(self):
     #test_content = self.create_text_file()
     test_content = self.create_text_file_for_grade2()
     grade2 = []
     for line in test_content:
         grade2.append(louis.translateString(['en-GB-g2.ctb'], line))
     os.system(ebook_convert + " " + test_text_file + " " + pef_file + " --ueb2")
     self.assertTrue(os.path.exists(pef_file))
     self.pef_test_grade2(pef_file, grade2)

예제 #6

0

파일 보기

파일: Translator.py 프로젝트: Chertan/CUB_Control_Software

def translate(in_string, in_lang, grade=1):
    """Translates the input word in english into a string where each character represents a Braille Cell

    :param in_string: The string or character to be translated
    :param in_lang: Language/Format of the input string
    :param grade: Grade of Unified English Braille to use for the translation (higher grade has more contractions)
    :return: A list of the translated characters
    """
    # Translate english strings into braille
    if in_lang == "ENG":
        # Convert as per the input grade (default of 1)
        if grade == 1:
            braille = louis.translateString(['en-ueb-g1.ctb'], in_string)
        elif grade == 2:
            braille = louis.translateString(['en-ueb-g2.ctb'], in_string)
        else:
            # Ensures grade is valid
            raise OperationError("Input Conversation", "Translation",
                                 "Invalid Grade of Unified English Braille")
    else:
        braille = in_string

    output = []

    # Iterate through the string and convert each symbol to its cell description equivalent
    if in_lang == "BKB":
        # Convert as per Braille Keyboard Specification
        output.append(b_keyboard_to_br[braille])
    else:
        for char in braille:
            if in_lang == "ENG" or in_lang == "UEB":
                # Converted character as per UEB Specification
                output.append(ueb_to_br[char])
            else:
                # Invalid language set - Should never reach in operation as argument parser should prevent
                # Left here in case of abnormal circumstances
                raise OperationError("Input Conversation", "Translation",
                                     "Invalid Language of input file")
    return output

예제 #7

0

파일 보기

파일: utils.py 프로젝트: charadani/BrailleExtender

def getTextInBraille(t = '', table = None):
	if not t: t = getTextSelection()
	if not t.strip(): return ''
	if not table: table = os.path.join(brailleTables.TABLES_DIR, config.conf["braille"]["translationTable"])
	nt = []
	res = ''
	t = t.split("\n")
	for l in t:
		l = l.rstrip()
		if not l: res = ''
		elif charToDotsInLouis: res = louis.charToDots([table], l, louis.ucBrl)
		else: res = louis.translateString([table], l, None, louis.dotsIO)
		nt.append(res)
	nt = '\n'.join(nt)
	if charToDotsInLouis: return nt
	return ''.join([unichr(ord(ch)-0x8000+0x2800) if ord(ch) > 8000 else ch for ch in nt])

예제 #8

0

파일 보기

def getTextInBraille(t=None, table=[]):
    if not isinstance(table, list):
        raise TypeError("Wrong type for table parameter: %s" % repr(table))
    if not t: t = getTextSelection()
    if not t: return ''
    if not table or "current" in table:
        table = getCurrentBrailleTables()
    else:
        for i, e in enumerate(table):
            if '\\' not in e and '/' not in e:
                table[i] = "%s\\%s" % (brailleTables.TABLES_DIR, e)
    t = t.split("\n")
    res = [
        louis.translateString(table, l, mode=louis.ucBrl | louis.dotsIO)
        for l in t if l
    ]
    return '\n'.join(res)

예제 #9

0

파일 보기

def getTextInBraille(t=''):
    nt = ""
    if t == '': t = getTextSelection()
    if t.strip() != '':
        for i, l in enumerate(t):
            if l not in ['\r', '\n']:
                nt += louis.translateString([
                    os.path.join(brailleTables.TABLES_DIR,
                                 config.conf["braille"]["translationTable"])
                ], l, None, louis.dotsIO)
            else:
                nt += l
        t = ""
        for i, ch in enumerate(nt):
            t += unichr(ord(ch) - 0x8000 + 0x2800) if ord(ch) > 8000 else ch
        return t
    else:
        return ''

예제 #10

0

파일 보기

def getTextInBraille(t=None, table=[]):
    if not isinstance(table, list):
        raise TypeError("Wrong type for table parameter: %s" % repr(table))
    if not t: t = getTextSelection()
    if not t.strip(): return ''
    if not table or "current" in table:
        currentTable = os.path.join(brailleTables.TABLES_DIR,
                                    config.conf["braille"]["translationTable"])
        if "current" in table: table[table.index("current")] = currentTable
        else: table.append(currentTable)
    nt = []
    res = ''
    t = t.split("\n")
    for l in t:
        l = l.rstrip()
        if not l: res = ''
        else:
            res = ''.join([
                chr(ord(ch) - 0x8000 + 0x2800)
                for ch in louis.translateString(table, l, mode=louis.dotsIO)
            ])
        nt.append(res)
    return '\n'.join(nt)

예제 #11

0

파일 보기

파일: testing.py 프로젝트: jeffersonbenson/TouchType

def main():
    print('Testing!')
    print('liblouis version: ' + louis.version())
    print('Table Check passed!')
    louis.checkTable([b'./en-ueb-g2.ctb'])
    # something = input('type something: ')
    something = sys.stdin.readlines()
    # print("Here's what you typed: " + something)
    # prints the value of something and combines the ucBrl (64) and noUndefined (128) modes. Outputs as a single word, so a list is necessary to grab individual characters
    print("Translation in characters:")
    braille = louis.translateString([b'./en-ueb-g2.ctb'], something, mode=192)
    print(list(braille))
    print("Here it is in dots:")
    dots = louis.charToDots([b'./en-ueb-g2.ctb'], something, mode=192)
    print(list(dots))
    print("Here's a back-translation of what you typed:")
    # Returns typed phrases without contractions, for proper bash interpretation
    print("Back translation of characters:")
    backBraille = louis.backTranslateString([b'./en-ueb-g2.ctb'],
                                            braille,
                                            mode=1)
    print(backBraille)
    print("Back translation of the dots:")
    print(louis.dotsToChar([b'./en-ueb-g2.ctb'], dots))

예제 #12

0

파일 보기

파일: braille.py 프로젝트: JohnGrime/Braille3D

#
# Two options; use Python bindings for liblouis, or the internal translation
# system ("dumb"). As you might infer from the naming, the internal system is
# not very good and should only be considered for emergency purposes; it's only
# a partial implementation of the Braille system, and I stopped development
# after integrating liblouis support. However, it's quite simple and flexible
# so the functionality can be extended in future for a lightweight system that
# has no external dependencies if that is desired.
#

if use_liblouis:
    import louis

    tableList = ["en-ueb-g2.ctb"]
    unicode_dots = louis.translateString(tableList,
                                         input_txt,
                                         typeform=None,
                                         mode=louis.dotsIO | louis.ucBrl)
    backTranslation = louis.backTranslateString(tableList,
                                                unicode_dots,
                                                typeform=None,
                                                mode=0)

    print(
        f'# input => Braille => back translation : "{input_txt}" => "{unicode_dots}" => "{backTranslation}"'
    )

else:
    from braille_fsm import BrailleTokeniser as Tokeniser
    from braille_fsm import BrailleTranslator as Translator

    btok, btrn = Tokeniser(), Translator()

예제 #13

0

파일 보기

파일: views.py 프로젝트: BlissfulEscape/daisyproducer

def confirm(request, grade, deferred=False):
    if [word for word in get_conflicting_words(grade)]:
        redirect = ('dictionary_confirm_deferred_conflicting_duplicates_g' if deferred
                        else 'dictionary_confirm_conflicting_duplicates_g') + str(grade)
        return HttpResponseRedirect(reverse(redirect))

    WordFormSet = formset_factory(ConfirmDeferredWordForm if deferred else ConfirmWordForm, extra=0) 
    if request.method == 'POST':

        formset = WordFormSet(request.POST)
        if formset.is_valid():
            # FIXME: in Djano 1.3+ formset formmsets are iterable, so you can just say 
            # for form in formset:
            for form in formset.forms:
                update_word_tables(form, grade, deferred)
            # FIXME: in principle we need to regenerate the liblouis tables,
            # i.e. the white lists now. However we do this asynchronously
            # (using a cron job) for now. There are several reasons for this:
            # 1) It is slow as hell if done inside a transaction. To do this
            # outside the transaction we need transaction context managers
            # (https://docs.djangoproject.com/en/1.3/topics/db/transactions/#controlling-transaction-management-in-views)
            # which are only available in Django 1.3.
            # 2) We need to serialize the table writing so they do not write
            # on top of each other. This is easy if it is done periodically.
            # 3) Of course it would be nice to use some kind of message queue
            # for this (e.g. rabbitmq and celery), but for now this poor mans
            # solution seems good enough
            # redirect to self as there might be more words
            redirect = ('dictionary_confirm_deferred_g' if deferred else 'dictionary_confirm_g') + str(grade)
            return HttpResponseRedirect(reverse(redirect))
        else:
            return render_to_response('dictionary/confirm.html', locals(),
                                      context_instance=RequestContext(request))

    # create a default for all unconfirmed homographs which have no default, i.e. no restriction word entry
    unconfirmed_homographs = set((smart_unicode(word) for 
                                  word in 
                                  LocalWord.objects.filter(grade=grade, type=5, isConfirmed=False, isDeferred=deferred, 
                                                           document__state__sort_order=final_sort_order).values_list('untranslated', flat=True)))
    if unconfirmed_homographs:
        covered_entries = set((smart_unicode(word) for 
                               word in 
                               chain(
                    LocalWord.objects.filter(grade=grade, type=0, untranslated__in=unconfirmed_homographs).values_list('untranslated', flat=True),
                    GlobalWord.objects.filter(grade=grade, type=0, untranslated__in=unconfirmed_homographs).values_list('untranslated', flat=True))))
                                 
        for word in unconfirmed_homographs - covered_entries:
            document = Document.objects.filter(localword__grade=grade, localword__type=5, localword__isConfirmed=False, localword__untranslated=word)[0]
            w = LocalWord(untranslated=word, 
                          braille=louis.translateString(getTables(grade), word),
                          grade=grade, type=0, document=document)
            w.save()
    
    filterform = FilterForm(request.GET)
    if filterform.is_valid():
        currentFilter = filterform.cleaned_data['filter']
    
    words_to_confirm = LocalWord.objects.filter(grade=grade, isConfirmed=False, isDeferred=deferred, 
                                                untranslated__contains=currentFilter,
                                                document__state__sort_order=final_sort_order).order_by('untranslated', 'type').values('untranslated', 'braille', 'type', 'homograph_disambiguation', 'isLocal').distinct()
    paginator = Paginator(words_to_confirm, MAX_WORDS_PER_PAGE)
    try:
        page = int(request.GET.get('page', '1'))
    except ValueError:
        page = 1
    
    try:
        words = paginator.page(page)
    except InvalidPage:
        words = paginator.page(paginator.num_pages)

    have_type = any((word['type']!=0 for word in words.object_list))
    have_homograph_disambiguation = any((word['homograph_disambiguation']!='' for word in words.object_list))
    formset = WordFormSet(initial=words.object_list)
    return render_to_response('dictionary/confirm.html', locals(),
                              context_instance=RequestContext(request))

예제 #14

0

파일 보기

파일: check_doctests.py 프로젝트: VFO-GROUP/liblouis

 def braille(self, txt):
     return louis.translateString(self.tables, txt)

예제 #15

0

파일 보기

 def test_9(self):
     self.assertEqual(
         louis.translateString(["en-ueb-g1.ctb", "tests/test.cti"],
                               "\ud83d\ude02 after"),
         '"<face with tears of joy"> after')

예제 #16

0

파일 보기

    Variant('British English, UEB grade 2', 'en-ueb-g2.ctb', 'en_GB.UTF-8@ueb2'),
]

for variant in variants:
    dest = polib.POFile()

    dest.metadata = src.metadata
    now = datetime.now(timezone.utc).strftime('%F %H:%M%z')
    dest.metadata['PO-Revision-Date'] = now

    for src_entry in valid_entries:
        # unicode.dis still uses ASCII spaces.
        if src_entry.msgid.find('  ') != -1:
            print('Warning: embedded double-space:\n' + str(src_entry.occurrences))
        translation = louis.translateString(
            ['unicode.dis', variant.table],
            src_entry.msgid
        )
        # Louis squashes newlines into spaces.  Provided there are
        # no double-spaces in the input we can assume a double-space
        # in the output was meant to be a line break.
        import re
        wrapped = []
        for para in re.split(r'  ', translation):
            wrapped.append(textwrap.fill(para, width=CANUTE_WIDTH))
            wrapped.append('\n')
            wrapped.append('\n')
        wrapped.pop()
        wrapped.pop()
        wrapped = ''.join(wrapped)
        dest_entry = polib.POEntry(
            msgid=src_entry.msgid,

예제 #17

0

파일 보기

 def test_13(self):
     self.assertEqual(
         louis.translateString(["en-ueb-g1.ctb", "tests/test.cti"],
                               "\ud83e\udd23 b"),
         '"<rolling on the floor laughing"> b')

예제 #18

0

파일 보기

파일: brailleTables.py 프로젝트: BlissfulEscape/daisyproducer

 def write_csv(f, tables, word):
     translation = louis.translateString(tables, smart_unicode(word.untranslated))
     if translation != smart_unicode(word.braille):
         f.write("%s\t%s\t%s\t%s\t%s\t%s\n" % (smart_unicode(word.untranslated), 
                                               smart_unicode(word.braille), translation, word.grade, 
                                               word.type, smart_unicode(word.homograph_disambiguation)))

예제 #19

0

파일 보기

파일: check_doctests.py 프로젝트: prashantmitts/brailleback

 def braille(self, txt):
     return louis.translateString(self.tables, txt)

예제 #20

0

파일 보기

 def test_1(self):
     self.assertEqual(
         louis.translateString(["en-ueb-g1.ctb", "tests/test.cti"], "ðŸ˜‚"),
         '"<face with tears of joy">')

예제 #21

0

파일 보기

 def test_10(self):
     self.assertEqual(
         louis.translateString(["en-ueb-g1.ctb", "tests/test.cti"],
                               "before ðŸ˜‚ after"),
         'before "<face with tears of joy"> after')

예제 #22

0

파일 보기

This is a tiny example that illustrates how you can use lxml to read
HTML and then translate only the text contained in the HTML nodes.

Similar scripts can be written for XML in general using lxml.etree.
Also there is much fine grained control possible by selecting
different elements and attributes more specifically, using xpath and
other methods available within lxml, to do different things
"""

import textwrap
import louis
from lxml import html

tableList = ["en-ueb-g2.ctb"]
lineLength = 38
fileIn = input("Please enter the input file name: ")
fileOut = input("Please enter the output file name: ")

with open(fileOut, "w") as outputFile:
    html_root = html.parse(fileIn).getroot()
    for head_or_body in html_root:
        for elem in head_or_body:
            if elem.xpath("string()").strip() != "":
                line = elem.xpath("string()")
                translation = louis.translateString(tableList, line, 0, 0)
                outputFile.write(textwrap.fill(translation, lineLength))
                outputFile.write("\n")

print("Done.")

예제 #23

0

파일 보기

 def test_14(self):
     self.assertEqual(
         louis.translateString(["en-ueb-g1.ctb", "tests/test.cti"],
                               "a ðŸ¤£ b"),
         'a "<rolling on the floor laughing"> b')

예제 #24

0

파일 보기

파일: views.py 프로젝트: BlissfulEscape/daisyproducer

def check(request, document_id, grade):

    document = get_object_or_404(Document, pk=document_id)

    if request.method == 'POST':
        WordFormSet = modelformset_factory(
            LocalWord, 
            form=RestrictedWordForm,
            exclude=('document', 'isConfirmed', 'isDeferred', 'grade'), 
            can_delete=True)

        formset = WordFormSet(request.POST)
        if formset.is_valid():
            instances = formset.save(commit=False)
            for instance in instances:
                instance.grade = grade
                instance.document = document
                instance.save()
            writeLocalTables([document])
            redirect = 'dictionary_check_g1' if grade == 1 else 'dictionary_check_g2'
            return HttpResponseRedirect(reverse(redirect, args=[document_id]))
        else:
            return render_to_response('dictionary/words.html', locals(),
                                      context_instance=RequestContext(request))

    # filter some words from the xml
    content = document.latest_version().content
    content.open()
    # strip='none': if this parameter is not set, whitespace is removed automatically for documents with a DOCTYPE declaration
    tree = etree.parse(saxon9he(content.file, os.path.join(settings.PROJECT_DIR, 'dictionary', 'xslt', 'filter.xsl'), '-strip:none', contraction=grade).stdout)
    content.close()

    # grab the homographs
    homographs = set(("|".join(homograph.xpath('text()')).lower() 
                      for homograph in tree.xpath('//brl:homograph', namespaces=BRL_NAMESPACE)))
    duplicate_homographs = set((smart_unicode(word) for 
                                word in 
                                chain(GlobalWord.objects.filter(grade=grade).filter(type=5).filter(homograph_disambiguation__in=homographs).values_list('homograph_disambiguation', flat=True),
                                      LocalWord.objects.filter(grade=grade).filter(type=5).filter(document=document).filter(homograph_disambiguation__in=homographs).values_list('homograph_disambiguation', flat=True))))
    unknown_homographs = [{'untranslated': homograph.replace('|', ''), 
                           'braille': louis.translateString(getTables(grade), homograph.replace('|', unichr(0x250A))),
                           'type': 5,
                           'homograph_disambiguation': homograph}
                          for homograph in homographs - duplicate_homographs]
    # grab names and places
    names = set((name for names in 
                 (name.text.lower().split() for name in tree.xpath('//brl:name', namespaces=BRL_NAMESPACE) if name.text != None) for name in names))
    duplicate_names = set((smart_unicode(word) for 
                           word in 
                           chain(GlobalWord.objects.filter(grade=grade).filter(type__in=(1,2)).filter(untranslated__in=names).values_list('untranslated', flat=True),
                                 LocalWord.objects.filter(grade=grade).filter(type__in=(1,2)).filter(document=document).filter(untranslated__in=names).values_list('untranslated', flat=True))))
    unknown_names = [{'untranslated': name, 
                      'braille': louis.translateString(getTables(grade, name=True), name), 
                      'type': 2,
                      'homograph_disambiguation': ''}
                     for name in names - duplicate_names]
    places = set((place for places in 
                 (place.text.lower().split() for place in tree.xpath('//brl:place', namespaces=BRL_NAMESPACE) if place.text != None) for place in places))
    duplicate_places = set((smart_unicode(word) for 
                            word in 
                            chain(GlobalWord.objects.filter(grade=grade).filter(type__in=(3,4)).filter(untranslated__in=places).values_list('untranslated', flat=True),
                                  LocalWord.objects.filter(grade=grade).filter(type__in=(3,4)).filter(document=document).filter(untranslated__in=places).values_list('untranslated', flat=True))))
    unknown_places = [{'untranslated': place,
                       'braille': louis.translateString(getTables(grade, place=True), place),
                       'type': 4,
                       'homograph_disambiguation': ''}
                      for place in places - duplicate_places]

    # filter homographs, names and places from the xml
    xsl = etree.parse(os.path.join(settings.PROJECT_DIR, 'dictionary', 'xslt', 'filter_names.xsl'))
    transform = etree.XSLT(xsl)
    filtered_tree = transform(tree)
    # grab the rest of the content
    content = etree.tostring(filtered_tree, method="text", encoding=unicode)
    # filter all punctuation and replace dashes by space, so we can split by space below
    content = ''.join(
        # replace Punctuation Dash and Punctuation other (except for "'") with space
        c if c == u"\u0027" or unicodedata.category(c) not in ['Pd', 'Po'] else ' '
        for c in content 
        # drop all chars which are not letters, separators or select
        # punctuation which we replace with space later on
        if unicodedata.category(c) in ['Lu', 'Ll', 'Zs', 'Zl', 'Zp', 'Pd', 'Po']
        or c in ['\n', '\r'])

    new_words = set((w.lower() for w in content.split() if len(w) > 1))
    # FIXME: We basically do a set difference manually here. This
    # would probably be better if done inside the db. However for that
    # we would have to be able to insert the new_words into the db in
    # an efficient manner, i.e. bulk insert. For a possibility on how
    # to do this in the context of Django ORM look at
    # http://ole-laursen.blogspot.com/2010/11/bulk-inserting-django-objects.html.
    # After that we could for example do a query along the lines of
    # cursor.execute("SELECT untranslated from new_words EXCEPT SELECT
    # untranslated FROM dict_words;). However MySQL doesn't seem to
    # support EXCEPT so it would be SELECT untranslated FROM new_words
    # w1 LEFT JOIN dict_words w2 ON w1.untranslated=w2.untranslated
    # WHERE w2.untranslated IS NULL;
    duplicate_words = set((smart_unicode(word) for 
                           word in 
                           chain(GlobalWord.objects.filter(grade=grade).filter(untranslated__in=new_words).values_list('untranslated', flat=True),
                                 LocalWord.objects.filter(grade=grade).filter(document=document).filter(untranslated__in=new_words).values_list('untranslated', flat=True))))
    unknown_words = [{'untranslated': word, 
                      'braille': louis.translateString(getTables(grade), word),
                      'type' : 0,
                      'homograph_disambiguation': ''}
                     for word in new_words - duplicate_words]

    unknown_words = unknown_words + unknown_homographs + unknown_names + unknown_places
    unknown_words.sort(cmp=lambda x,y: cmp(x['untranslated'].lower(), y['untranslated'].lower()))

    # remove words from the local words which are no longer in the document (they might have
    # been typos that slipped in to the local words and were corrected subsequently)
    all_duplicates = duplicate_homographs | duplicate_names | duplicate_places | duplicate_words
    LocalWord.objects.filter(grade=grade, document=document).exclude(untranslated__in=all_duplicates).delete()

    paginator = Paginator(unknown_words, MAX_WORDS_PER_PAGE)
    try:
        page = int(request.GET.get('page', '1'))
    except ValueError:
        page = 1
    
    try:
        words = paginator.page(page)
    except InvalidPage:
        words = paginator.page(paginator.num_pages)

    WordFormSet = modelformset_factory(
        LocalWord, 
        form=RestrictedWordForm,
        exclude=('document', 'isConfirmed', 'isDeferred', 'grade'), 
        extra=len(words.object_list), can_delete=True)

    have_type = any((word['type']!=0 for word in words.object_list))
    have_homograph_disambiguation = any((word['homograph_disambiguation']!='' for word in words.object_list))
    formset = WordFormSet(queryset=LocalWord.objects.none(), initial=words.object_list)

    # Document statistic
    stats = DocumentStatistic(document=document, grade=grade, total=len(new_words), unknown=len(unknown_words))
    percentage = 100.0*stats.unknown/stats.total
    stats.save()

    return render_to_response('dictionary/words.html', locals(),
                              context_instance=RequestContext(request))

예제 #25

0

파일 보기

 def test_8(self):
     self.assertEqual(
         louis.translateString(["en-ueb-g1.ctb", "tests/test.cti"],
                               "before \ud83d\ude02"),
         'before "<face with tears of joy">')

예제 #26

0

파일 보기

 def test_4(self):
     self.assertEqual(
         louis.translateString(["en-ueb-g1.ctb", "tests/test.cti"],
                               "a \ud83d\ude02 b"),
         'a "<face with tears of joy"> b')