def parse_restrs(rdng, tag, kanjs): restrtxts = [x.strip(' ') for x in tag.split(';')] errs = [] jdb.txt2restr(restrtxts, rdng, kanjs, '_restr', errs) for err in errs: raise ParseError('Reading restriction "%s" doesn\'t match any kanji' % err)
def test002(_): rtxts = [] retval = jdb.txt2restr(rtxts, _.e._rdng[1], _.e._kanj, '_restr') for r in _.e._rdng: _.assertEqual([], r._restr) for k in _.e._kanj: _.assertEqual([], k._restr) _.assertEqual([], retval)
def parse_stags(tag, sens, kanjs, rdngs): stagrtxts = [] stagktxts = [] words = tag.split(',') for word in words: word = word.strip() if jdb.jstr_reb(word): stagrtxts.append(word) elif jdb.jstr_keb(word): stagktxts.append(word) else: raise ParseError( 'stagx restriction word neither reading or kanji: "%s"' % word) errs = [] jdb.txt2restr(stagrtxts, sens, rdngs, '_stagr', bad=errs) if errs: raise ParseError('Stagr text not in readings: "%s"' % '","'.join(errs)) errs = [] jdb.txt2restr(stagktxts, sens, kanjs, '_stagk', bad=errs) if errs: raise ParseError('Stagk text not in kanji: "%s"' % '","'.join(errs)) return
def test014(_): rtxts = ['あ'] retval = jdb.txt2restr(rtxts, _.e._sens[0], _.e._rdng, '_stagr') for expect, s in zip([1, 0], _.e._sens): _.assertEqual(expect, len(s._stagr)) for expect, r in zip([0, 1], _.e._rdng): _.assertEqual(expect, len(r._stagr)) for s in _.e._sens: for x in s._stagr: _.assert_(isinstance(x, Stagr)) _.assertEqual(_.e._sens[0]._stagr[0], _.e._rdng[1]._stagr[0]) _.assertEqual([2], retval)
def test011(_): rtxts = ['亜'] retval = jdb.txt2restr(rtxts, _.e._rdng[0], _.e._kanj, '_restr') for expect, r in zip([2, 0], _.e._rdng): _.assertEqual(expect, len(r._restr)) for expect, k in zip([0, 1, 1], _.e._kanj): _.assertEqual(expect, len(k._restr)) for r in _.e._rdng: for x in r._restr: _.assert_(isinstance(x, Restr)) _.assertEqual(_.e._rdng[0]._restr[0], _.e._kanj[1]._restr[0]) _.assertEqual(_.e._rdng[0]._restr[1], _.e._kanj[2]._restr[0]) _.assertEqual([2, 3], retval)
def test013(_): rtxts = ['亜'] retval = jdb.txt2restr(rtxts, _.e._sens[0], _.e._kanj, '_stagk') for expect, s in zip([2, 0], _.e._sens): _.assertEqual(expect, len(s._stagk)) for expect, k in zip([0, 1, 1], _.e._kanj): _.assertEqual(expect, len(k._stagk)) for s in _.e._sens: for x in s._stagk: _.assert_(isinstance(x, Stagk)) _.assertEqual(_.e._sens[0]._stagk[0], _.e._kanj[1]._stagk[0]) _.assertEqual(_.e._sens[0]._stagk[1], _.e._kanj[2]._stagk[0]) _.assertEqual([2, 3], retval)
def test012(_): rtxts = None # Equiv to "nokanji". retval = jdb.txt2restr(rtxts, _.e._rdng[0], _.e._kanj, '_restr') for expect, r in zip([3, 0], _.e._rdng): _.assertEqual(expect, len(r._restr)) for expect, k in zip([1, 1, 1], _.e._kanj): _.assertEqual(expect, len(k._restr)) for r in _.e._rdng: for x in r._restr: _.assert_(isinstance(x, Restr)) _.assertEqual(_.e._rdng[0]._restr[0], _.e._kanj[0]._restr[0]) _.assertEqual(_.e._rdng[0]._restr[1], _.e._kanj[1]._restr[0]) _.assertEqual(_.e._rdng[0]._restr[2], _.e._kanj[2]._restr[0]) _.assertEqual([1, 2, 3], retval)
def mk_restrs (listkey, rdngs, kanjs): # Note: mk_restrs() are used for all three # types of restriction info: restr, stagr, stagk. However to # simplify things, the comments and variable names assume use # with reading restrictions (restr). # # What we do is take a list of restr text items received from # a user which list the kanji (a subset of all the kanji for # the entry) that are valid with this reading, and turn it # into a list of restr records that identify the kanji that # are *invalid* with this reading. The restr records identify # kanji by id number rather than text. # # listkey -- Name of the key used to get the list of text # restr items from 'rdngs'. These are the text strings # provided by the user. Should be "_RESTR", "_STAGR", # or "_STAGK". # rdngs -- List of rdng or sens records depending on whether # we're doing restr or stagr/stagk restrictions. # kanjs -- List of the entry's kanji or reading records # depending on whether we are doing restr/stagk or stagr # restrictions. errs = [] ktxts = [x.txt for x in kanjs] for n,r in enumerate (rdngs): # Get the list of restr text strings and nokanji flag and # delete them from the rdng object since they aren't part # of the standard api. restrtxt = getattr (r, listkey, None) if restrtxt: delattr (r, listkey) nokanj = getattr (r, '_NOKANJI', None) if nokanj: delattr (r, '_NOKANJI') # Continue with next reading if nothing to be done # with this one. if not nokanj and not restrtxt: continue # bld_rdngs() guarantees that {_NOKANJI} and {_RESTR} # won't both be present on the same rdng. if nokanj and restrtxt: # Only rdng-kanj restriction should have "nokanji" tag, so # message can hardwire "reading" and "kanji" text even though # this function in also used for sens-rdng and sens-kanj # restrictions. errs.append ("Reading %d has 'nokanji' tag but entry has no kanji" % (n+1)) continue if nokanj: restrtxt = None z = jdb.txt2restr (restrtxt, r, kanjs, listkey.lower()) # Check for kanji erroneously in the 'restrtxt' but not in # 'kanjs'. As an optimization, we only do this check if the # number of Restr objects created (len(z)) plus the number of # 'restrtxt's are not equal to the number of 'kanjs's. (This # criterion my not be valid in some corner cases.) if restrtxt is not None and len (z) + len (restrtxt) != len (kanjs): nomatch = [x for x in restrtxt if x not in ktxts] if nomatch: if listkey == "_RESTR": not_found_in = "kanji" elif listkey == "_STAGR": not_found_in = "readings" elif listkey == "_STAGK": not_found_in = "kanji" errs.append ("restr value(s) '" + "','".join (nomatch) + "' not in the entry's %s" % not_found_in) return "\n".join (errs)