def all_tab_delimited_formatter(self, form): """All the data of a form, tab-delimited. """ form_list = [ str(form.id), self.no_tabs(u"%s%s" % (form.grammaticality, h.storageToOutputTranslate(form.transcription))), self.no_tabs(form.narrowPhoneticTranscription) or u"", self.no_tabs(form.phoneticTranscription) or u"", self.no_tabs(h.storageToOutputTranslate(form.morphemeBreak, True)) or u"", self.no_tabs(form.morphemeGloss) or u"", self.no_tabs( u"; ".join(["%s%s" % (x.glossGrammaticality, x.gloss.replace(";", ".")) for x in form.glosses]) ), form.comments and self.no_tabs(h.storageToOutputTranslateOLOnly(form.comments)) or u"", form.speakerComments and self.no_tabs(h.storageToOutputTranslateOLOnly(form.speakerComments)) or u"", form.context and self.no_tabs(form.context) or u"", form.dateElicited and self.no_tabs(form.dateElicited.strftime("%b %d, %Y")) or u"", form.datetimeEntered and self.no_tabs(form.datetimeEntered.strftime("%b %d, %Y at %I:%M %p")) or u"", form.datetimeModified and self.no_tabs(form.datetimeModified.strftime("%b %d, %Y at %I:%M %p")) or u"", form.syntacticCategoryString and self.no_tabs(form.syntacticCategoryString) or u"", form.elicitor and self.no_tabs(u"%s %s" % (form.elicitor.firstName, form.elicitor.lastName)) or u"", form.enterer and self.no_tabs(u"%s %s" % (form.enterer.firstName, form.enterer.lastName)) or u"", form.verifier and self.no_tabs(u"%s %s" % (form.verifier.firstName, form.verifier.lastName)) or u"", form.speaker and self.no_tabs(u"%s %s" % (form.speaker.firstName, form.speaker.lastName)) or u"", form.elicitationMethod and self.no_tabs(form.elicitationMethod.name) or u"", form.syntacticCategory and self.no_tabs(form.syntacticCategory.name) or u"", form.source and self.no_tabs(u"%s (%s)" % (form.source.authorLastName, form.source.year)) or u"", form.keywords and self.no_tabs(u"; ".join([x.name.replace(u";", u".") for x in form.keywords])) or u"", ] return u"\t".join([x.replace("\t", " ") for x in form_list])
def covington_form_formatter(self, form, **kwargs): """A XeLaTeX representation of a Form using the Covington package to put the words into IGT formatted examples. Because non-ascii characters might occur in the Form, XeLaTeX (not just plain LaTeX) will be required to process the document. Note: I was originally using h.capsToLatexSmallCaps to convert uppercase glosses to LaTeX smallcaps (\textsc{}), but the Aboriginal Serif font was not rendering the smallcaps, so I removed the function. If I can figure out how to use XeLaTeX with a font that will render NAPA symbols AND make smallcaps, then the function should be reinstated... """ result = [u'\n\n\\begin{examples}\n'] if not form: result.append(u'\t\\item WARNING: BAD FORM REFERENCE') else: # If the Form has a morphological analysis, use Covington for IGT if form.morphemeBreak and form.morphemeGloss: result.extend([ u'\t\\item' u'\n\t\t\\glll %s%s' % (self.esc_latex(form.grammaticality), self.esc_latex( h.storageToOutputTranslate(form.transcription))), u'\n\t\t%s' % self.esc_latex( h.storageToOutputTranslate(form.morphemeBreak, True)), u'\n\t\t%s' % self.esc_latex(form.morphemeGloss), u'\n\t\t\\glt %s' % u'\\\\ \n\t\t'.join([ u"%s`%s'" % (self.esc_latex(gloss.glossGrammaticality), self.esc_latex(gloss.gloss)) for gloss in form.glosses ]), u'\n\t\t\\glend' ]) # If no morphological analysis, just put transcr and gloss(es) on separate lines else: result.extend([ u'\t\\item', u'\n\t\t%s%s \\\\' % (self.esc_latex(form.grammaticality), self.esc_latex( h.storageToOutputTranslate(form.transcription))), u'\n\t\t%s' % ' \\\\\n\t\t'.join([ u"%s`%s'" % (self.esc_latex(gloss.glossGrammaticality), self.esc_latex(gloss.gloss)) for gloss in form.glosses ]) ]) if kwargs.get('secondary_data', False): result.extend([ u'\n', self.xelatex_secondary_data(form, kwargs.get('reference', False)) ]) result.append(u'\n\\end{examples}') return u''.join(result)
def all_tab_delimited_formatter(self, form): """All the data of a form, tab-delimited. """ form_list = [ str(form.id), self.no_tabs(u'%s%s' % (form.grammaticality, h.storageToOutputTranslate(form.transcription))), self.no_tabs(form.narrowPhoneticTranscription) or u'', self.no_tabs(form.phoneticTranscription) or u'', self.no_tabs(h.storageToOutputTranslate(form.morphemeBreak, True)) or u'', self.no_tabs(form.morphemeGloss) or u'', self.no_tabs(u'; '.join([ '%s%s' % (x.glossGrammaticality, x.gloss.replace(';', '.')) for x in form.glosses ])), form.comments and self.no_tabs(h.storageToOutputTranslateOLOnly(form.comments)) or u'', form.speakerComments and self.no_tabs( h.storageToOutputTranslateOLOnly(form.speakerComments)) or u'', form.context and self.no_tabs(form.context) or u'', form.dateElicited and self.no_tabs(form.dateElicited.strftime('%b %d, %Y')) or u'', form.datetimeEntered and self.no_tabs( form.datetimeEntered.strftime('%b %d, %Y at %I:%M %p')) or u'', form.datetimeModified and self.no_tabs( form.datetimeModified.strftime('%b %d, %Y at %I:%M %p')) or u'', form.syntacticCategoryString and self.no_tabs(form.syntacticCategoryString) or u'', form.elicitor and self.no_tabs(u'%s %s' % (form.elicitor.firstName, form.elicitor.lastName)) or u'', form.enterer and self.no_tabs(u'%s %s' % (form.enterer.firstName, form.enterer.lastName)) or u'', form.verifier and self.no_tabs(u'%s %s' % (form.verifier.firstName, form.verifier.lastName)) or u'', form.speaker and self.no_tabs(u'%s %s' % (form.speaker.firstName, form.speaker.lastName)) or u'', form.elicitationMethod and self.no_tabs( form.elicitationMethod.name) or u'', form.syntacticCategory and self.no_tabs(form.syntacticCategory.name) or u'', form.source and self.no_tabs(u'%s (%s)' % (form.source.authorLastName, form.source.year)) or u'', form.keywords and self.no_tabs(u'; '.join( [x.name.replace(u';', u'.') for x in form.keywords])) or u'' ] return u'\t'.join([x.replace('\t', ' ') for x in form_list])
def expex_form_formatter(self, form, **kwargs): """A XeLaTeX representation of a Form using the ExPex package to put the words into IGT formatted examples. Because non-ascii characters might occur in the Form, XeLaTeX (not just plain LaTeX) will be required to process the document. """ result = [u"\n\n\\ex\n"] if not form: result.append(u"\tWARNING: BAD FORM REFERENCE") else: trailing_citation = self.expex_trailing_citation(form) result.extend( [ u"\t\\begingl" u"\n\t\t\\gla %s%s//" % ( self.esc_latex(form.grammaticality), self.esc_latex(h.storageToOutputTranslate(form.transcription)), ) ] ) if form.narrowPhoneticTranscription: result.append(u"\n\t\t\\glb %s//" % self.esc_latex(form.narrowPhoneticTranscription)) if form.phoneticTranscription: result.append(u"\n\t\t\\glb %s//" % self.esc_latex(form.phoneticTranscription)) if form.morphemeBreak: result.append( u"\n\t\t\\glb %s//" % self.esc_latex(h.storageToOutputTranslate(form.morphemeBreak, True)) ) if form.morphemeGloss: result.append(u"\n\t\t\\glb %s//" % self.esc_latex(form.morphemeGloss)) result.extend( [ u"\n\t\t\\glft %s%s//" % ( "\\\\\n\t\t".join( u"%s`%s'" % (self.esc_latex(gloss.glossGrammaticality), self.esc_latex(gloss.gloss)) for gloss in form.glosses ), trailing_citation, ), u"\n\t\\endgl", ] ) if kwargs.get("secondary_data", False): result.extend([u"\n", self.xelatex_secondary_data(form, kwargs.get("reference", False))]) result.append(u"\n\\xe") return u"".join(result)
def txt_igt_formatter(self, form): """Interlinear Gloss Text: transcription, morphemeBreak, morphemeGloss, gloss all separated by newlines. """ result = [u"%s%s" % (form.grammaticality, h.storageToOutputTranslate(form.transcription))] if form.morphemeBreak: result.append(u"\n%s" % h.storageToOutputTranslate(form.morphemeBreak, True)) if form.morphemeGloss: result.append(u"\n%s" % form.morphemeGloss) result.append( u"\n%s" % u"\n".join([u"%s%s" % (gloss.glossGrammaticality, gloss.gloss) for gloss in form.glosses]) ) return u"".join(result)
def expex_form_formatter(self, form, **kwargs): """A XeLaTeX representation of a Form using the ExPex package to put the words into IGT formatted examples. Because non-ascii characters might occur in the Form, XeLaTeX (not just plain LaTeX) will be required to process the document. """ result = [u'\n\n\\ex\n'] if not form: result.append(u'\tWARNING: BAD FORM REFERENCE') else: trailing_citation = self.expex_trailing_citation(form) result.extend([ u'\t\\begingl' u'\n\t\t\\gla %s%s//' % (self.esc_latex(form.grammaticality), self.esc_latex(h.storageToOutputTranslate( form.transcription))) ]) if form.narrowPhoneticTranscription: result.append(u'\n\t\t\\glb %s//' % self.esc_latex(form.narrowPhoneticTranscription)) if form.phoneticTranscription: result.append(u'\n\t\t\\glb %s//' % self.esc_latex(form.phoneticTranscription)) if form.morphemeBreak: result.append(u'\n\t\t\\glb %s//' % self.esc_latex( h.storageToOutputTranslate(form.morphemeBreak, True))) if form.morphemeGloss: result.append(u'\n\t\t\\glb %s//' % self.esc_latex(form.morphemeGloss)) result.extend([ u'\n\t\t\\glft %s%s//' % ('\\\\\n\t\t'.join( u"%s`%s'" % (self.esc_latex(gloss.glossGrammaticality), self.esc_latex(gloss.gloss)) for gloss in form.glosses), trailing_citation), u'\n\t\\endgl' ]) if kwargs.get('secondary_data', False): result.extend([ u'\n', self.xelatex_secondary_data(form, kwargs.get('reference', False)) ]) result.append(u'\n\\xe') return u''.join(result)
def txt_tr_tl_formatter(self, form): """Return the (grammaticality + ) transcription, followed by each translation (with grammaticality) on its own line. """ result = [ "%s%s" % (form.grammaticality, h.storageToOutputTranslate(form.transcription)), "\n%s" % "\n".join(["%s%s" % (gloss.glossGrammaticality, gloss.gloss) for gloss in form.glosses]), ] return u"".join(result)
def txt_igt_formatter(self, form): """Interlinear Gloss Text: transcription, morphemeBreak, morphemeGloss, gloss all separated by newlines. """ result = [ u'%s%s' % (form.grammaticality, h.storageToOutputTranslate(form.transcription)) ] if form.morphemeBreak: result.append(u'\n%s' % h.storageToOutputTranslate(form.morphemeBreak, True)) if form.morphemeGloss: result.append(u'\n%s' % form.morphemeGloss) result.append(u'\n%s' % u'\n'.join([ u'%s%s' % (gloss.glossGrammaticality, gloss.gloss) for gloss in form.glosses ])) return u''.join(result)
def txt_tr_tl_formatter(self, form): """Return the (grammaticality + ) transcription, followed by each translation (with grammaticality) on its own line. """ result = [ '%s%s' % (form.grammaticality, h.storageToOutputTranslate(form.transcription)), '\n%s' % '\n'.join([ '%s%s' % (gloss.glossGrammaticality, gloss.gloss) for gloss in form.glosses ]) ] return u''.join(result)
def covington_form_formatter(self, form, **kwargs): """A XeLaTeX representation of a Form using the Covington package to put the words into IGT formatted examples. Because non-ascii characters might occur in the Form, XeLaTeX (not just plain LaTeX) will be required to process the document. Note: I was originally using h.capsToLatexSmallCaps to convert uppercase glosses to LaTeX smallcaps (\textsc{}), but the Aboriginal Serif font was not rendering the smallcaps, so I removed the function. If I can figure out how to use XeLaTeX with a font that will render NAPA symbols AND make smallcaps, then the function should be reinstated... """ result = [u"\n\n\\begin{examples}\n"] if not form: result.append(u"\t\\item WARNING: BAD FORM REFERENCE") else: # If the Form has a morphological analysis, use Covington for IGT if form.morphemeBreak and form.morphemeGloss: result.extend( [ u"\t\\item" u"\n\t\t\\glll %s%s" % ( self.esc_latex(form.grammaticality), self.esc_latex(h.storageToOutputTranslate(form.transcription)), ), u"\n\t\t%s" % self.esc_latex(h.storageToOutputTranslate(form.morphemeBreak, True)), u"\n\t\t%s" % self.esc_latex(form.morphemeGloss), u"\n\t\t\\glt %s" % u"\\\\ \n\t\t".join( [ u"%s`%s'" % (self.esc_latex(gloss.glossGrammaticality), self.esc_latex(gloss.gloss)) for gloss in form.glosses ] ), u"\n\t\t\\glend", ] ) # If no morphological analysis, just put transcr and gloss(es) on separate lines else: result.extend( [ u"\t\\item", u"\n\t\t%s%s \\\\" % ( self.esc_latex(form.grammaticality), self.esc_latex(h.storageToOutputTranslate(form.transcription)), ), u"\n\t\t%s" % " \\\\\n\t\t".join( [ u"%s`%s'" % (self.esc_latex(gloss.glossGrammaticality), self.esc_latex(gloss.gloss)) for gloss in form.glosses ] ), ] ) if kwargs.get("secondary_data", False): result.extend([u"\n", self.xelatex_secondary_data(form, kwargs.get("reference", False))]) result.append(u"\n\\end{examples}") return u"".join(result)
def txt_tr_formatter(self, form): """Return the grammaticality followed by the transcription. """ return "%s%s" % (form.grammaticality, h.storageToOutputTranslate(form.transcription))
def browse(self, id=None): """Generates page for browsing Forms as dictionary entries. Id variable (regex '[0-9]+_(ol|ml)') encodes both the index of the first letter of the words being browsed and the language (object or meta-) being browsed. A first letter index of 1000000 means browse everything. """ # Get OL orthography as an HTML table of links to browse actions OLOrthography = app_globals.defaultOutputOrthography[1] OLOrthographyAsList = OLOrthography.orthographyAsList OLOrthographyX = [ '<a href="%s" %s>%s</a>' % (url(controller='dictionary', action='browse', id=str(OLOrthographyAsList.index(x)) + '_ol', anchor='hl'), 'title="browse by %s character \'%s\'"' % (app_globals.objectLanguageName, h.storageToOutputTranslate( x[0])), h.storageToOutputTranslate(x[0])) for x in OLOrthographyAsList ] c.OLOrthographyTable = h.literal( h.tablify(OLOrthographyX, 14, 'orthographyAsLinks')) # Get ML orthography as an HTML table of links to browse actions MLOrthography = app_globals.metaLanguageOrthography MLOrthographyAsList = MLOrthography.orthographyAsList MLOrthographyX = [ '<a href="%s" %s>%s</a>' % (url(controller='dictionary', action='browse', id=str(MLOrthographyAsList.index(x)) + '_ml', anchor='hl'), 'title="browse by %s character \'%s\'"' % (app_globals.metaLanguageName, x[0]), x[0]) for x in MLOrthographyAsList ] c.MLOrthographyTable = h.literal( h.tablify(MLOrthographyX, 14, 'orthographyAsLinks')) # If there is a valid first-letter index, # build a query and return the appropriate variables. patt = re.compile('^[0-9]+_(ol|ml)$') if id and patt.search(id): headCharIndex = id.split('_')[0] c.languageToSortBy = id.split('_')[1] langToOrth = { 'ol': [ app_globals.storageOrthography[1], OLOrthographyAsList, 'transcription' ], 'ml': [MLOrthography, MLOrthographyAsList, 'gloss'] } orthography = langToOrth[c.languageToSortBy][0] orthographyAsList = langToOrth[c.languageToSortBy][1] try: c.headChar = orthographyAsList[int(headCharIndex)] except IndexError: c.headChar = None wordList_q = meta.Session.query(model.Form) wordList_q = wordList_q.filter( not_(model.Form.transcription.like(u'% %'))) # The default case # Non-empty headChar means a letter was clicked on if id and c.headChar: # filter and sort wordList for object-language-to-metalanguage view if c.languageToSortBy == 'ol': wordList_q = wordList_q.filter( model.Form.transcription.op('regexp')( '^(%s)' % '|'.join(c.headChar))) # existence of supergraphs means we have to filter the query # of Forms whose transcription/gloss begins with a supergraph superGraphs = getSuperGraphs(c.headChar, OLOrthographyAsList) if superGraphs: wordList_q = wordList_q.filter( not_( model.Form.transcription.op('regexp')( '^(%s)' % '|'.join(superGraphs)))) # sort wordList using functions.CustomSorter class c.wordList = wordList_q.all() if c.wordList: cs = h.CustomSorter(orthography) c.wordList = cs.sort(c.wordList) # filter and sort wordList for metalanguage-to-object-metalanguage view elif c.languageToSortBy == 'ml': wordList_q = wordList_q.outerjoin(model.Form.glosses) wordList_q = wordList_q.filter( model.Gloss.gloss.op('regexp')('^(%s)' % '|'.join(c.headChar))) # existence of supergraphs means we have to filter the query # of Forms whose transcription/gloss begins with a supergraph superGraphs = getSuperGraphs(c.headChar, MLOrthographyAsList) if superGraphs: wordList_q = wordList_q.filter( not_( model.Gloss.gloss.op('regexp')( '^(%s)' % '|'.join(superGraphs)))) wordList = wordList_q.all() if wordList: patt = re.compile('^%s' % c.headChar) newWordList = [] for form in wordList: goodGlosses = [ gloss.gloss for gloss in form.glosses if patt.match(gloss.gloss) ] for gg in goodGlosses: newForm = ThinForm(form.id, gg, form.transcription, form.keywords) newWordList.append(newForm) cs = h.CustomSorter(orthography) c.wordList = cs.sort(newWordList) # The special case # id of a million means we are browsing everything! elif id and int(headCharIndex) == 1000000: wordList = wordList_q.all() if c.languageToSortBy == 'ml': newWordList = [] for form in wordList: goodGlosses = [gloss.gloss for gloss in form.glosses] for gg in goodGlosses: newForm = ThinForm(form.id, gg, form.transcription, form.keywords) newWordList.append(newForm) cs = h.CustomSorter(orthography) c.wordList = cs.sort(newWordList) else: cs = h.CustomSorter(orthography) c.wordList = cs.sort(wordList) return render('/derived/dictionary/browse.html')
def txt_tr_formatter(self, form): """Return the grammaticality followed by the transcription. """ return '%s%s' % (form.grammaticality, h.storageToOutputTranslate(form.transcription))
def browse(self, id=None): """Generates page for browsing Forms as dictionary entries. Id variable (regex '[0-9]+_(ol|ml)') encodes both the index of the first letter of the words being browsed and the language (object or meta-) being browsed. A first letter index of 1000000 means browse everything. """ # Get OL orthography as an HTML table of links to browse actions OLOrthography = app_globals.defaultOutputOrthography[1] OLOrthographyAsList = OLOrthography.orthographyAsList OLOrthographyX = ['<a href="%s" %s>%s</a>' % ( url( controller='dictionary', action='browse', id=str(OLOrthographyAsList.index(x)) + '_ol', anchor='hl' ), 'title="browse by %s character \'%s\'"' % ( app_globals.objectLanguageName, h.storageToOutputTranslate(x[0]) ), h.storageToOutputTranslate(x[0]) ) for x in OLOrthographyAsList] c.OLOrthographyTable = h.literal( h.tablify(OLOrthographyX, 14, 'orthographyAsLinks')) # Get ML orthography as an HTML table of links to browse actions MLOrthography = app_globals.metaLanguageOrthography MLOrthographyAsList = MLOrthography.orthographyAsList MLOrthographyX = ['<a href="%s" %s>%s</a>' % ( url( controller='dictionary', action='browse', id=str(MLOrthographyAsList.index(x)) + '_ml', anchor='hl' ), 'title="browse by %s character \'%s\'"' % ( app_globals.metaLanguageName, x[0]), x[0] ) for x in MLOrthographyAsList] c.MLOrthographyTable = h.literal( h.tablify(MLOrthographyX, 14, 'orthographyAsLinks')) # If there is a valid first-letter index, # build a query and return the appropriate variables. patt = re.compile('^[0-9]+_(ol|ml)$') if id and patt.search(id): headCharIndex = id.split('_')[0] c.languageToSortBy = id.split('_')[1] langToOrth = { 'ol': [app_globals.storageOrthography[1], OLOrthographyAsList, 'transcription'], 'ml': [MLOrthography, MLOrthographyAsList, 'gloss'] } orthography = langToOrth[c.languageToSortBy][0] orthographyAsList = langToOrth[c.languageToSortBy][1] try: c.headChar = orthographyAsList[int(headCharIndex)] except IndexError: c.headChar = None wordList_q = meta.Session.query(model.Form) wordList_q = wordList_q.filter( not_(model.Form.transcription.like(u'% %')) ) # The default case # Non-empty headChar means a letter was clicked on if id and c.headChar: # filter and sort wordList for object-language-to-metalanguage view if c.languageToSortBy == 'ol': wordList_q = wordList_q.filter( model.Form.transcription.op('regexp')( '^(%s)' % '|'.join(c.headChar)) ) # existence of supergraphs means we have to filter the query # of Forms whose transcription/gloss begins with a supergraph superGraphs = getSuperGraphs(c.headChar, OLOrthographyAsList) if superGraphs: wordList_q = wordList_q.filter( not_(model.Form.transcription.op('regexp')( '^(%s)' % '|'.join(superGraphs))) ) # sort wordList using functions.CustomSorter class c.wordList = wordList_q.all() if c.wordList: cs = h.CustomSorter(orthography) c.wordList = cs.sort(c.wordList) # filter and sort wordList for metalanguage-to-object-metalanguage view elif c.languageToSortBy == 'ml': wordList_q = wordList_q.outerjoin( model.Form.glosses ) wordList_q = wordList_q.filter( model.Gloss.gloss.op('regexp')('^(%s)' % '|'.join(c.headChar)) ) # existence of supergraphs means we have to filter the query # of Forms whose transcription/gloss begins with a supergraph superGraphs = getSuperGraphs(c.headChar, MLOrthographyAsList) if superGraphs: wordList_q = wordList_q.filter( not_(model.Gloss.gloss.op('regexp')( '^(%s)' % '|'.join(superGraphs))) ) wordList = wordList_q.all() if wordList: patt = re.compile('^%s' % c.headChar) newWordList = [] for form in wordList: goodGlosses = [gloss.gloss for gloss in form.glosses if patt.match(gloss.gloss)] for gg in goodGlosses: newForm = ThinForm(form.id, gg, form.transcription, form.keywords) newWordList.append(newForm) cs = h.CustomSorter(orthography) c.wordList = cs.sort(newWordList) # The special case # id of a million means we are browsing everything! elif id and int(headCharIndex) == 1000000: wordList = wordList_q.all() if c.languageToSortBy == 'ml': newWordList = [] for form in wordList: goodGlosses = [gloss.gloss for gloss in form.glosses] for gg in goodGlosses: newForm = ThinForm(form.id, gg, form.transcription, form.keywords) newWordList.append(newForm) cs = h.CustomSorter(orthography) c.wordList = cs.sort(newWordList) else: cs = h.CustomSorter(orthography) c.wordList = cs.sort(wordList) return render('/derived/dictionary/browse.html')