def p_record(p): '''record : header "{" attributes "}" | header "{" attributes ";" "}" | header "{" "}"''' global Params res = [] header = p[1] attributes = p[3] if len(p) > 4 else [[]] for name, base in header: has_dot, is_word, nme, ipa = name if base is None and has_dot: continue if is_word: for attr in attributes: tok = Token(None, 0, 0) tok._attrs = attr_to_dict(attr) if not ipa is None: tok.attr(concept['transcription'], ipa) if not tok.attr(concept['transcription']) is None: tok.attr(concept['transcription'], [tok.attr(concept['transcription'])]) if tok.meaning is None and not tok.attr(concept['stem']) is None: tok.meaning = tok.attr(concept['stem']).rsplit('.', 1)[0] if not tok.attr(concept['number']) is None \ and tok.attr(concept['real-number']) is None: tok.attr(concept['real-number'], tok.attr(concept['number'])) tok.text = [nme] if not base is None: res += modify_token(base, tok) else: res += [tok] else: if not ipa is None: raise Exception() for attr in attributes: if not base is None: modify_base(has_dot, nme, attr, base) elif not has_dot and attr != []: if nme in Params.funcs.keys(): Params.funcs[nme] += [attr] else: Params.funcs[nme] = [attr] dictionary = {} meanings = {} p[0] = fill_dm(dictionary, meanings, res)
def init_words(self, text, start = 0, end = 0): result = [] words = [] tt = text.pop(0) t = tt.lower() if self.is_number(t): w = Token(type["numeral"], [tt], start, end) w.meaning = t w.num_type = NUM_TYPE_CARDINAL w.str_type = STR_TYPE_DIGITS return [([w], text)] if t[-3:] in ["1st", "2nd", "3rd"] or t[-2:] == "th" and self.is_number(t[:-2]): w = Token(type["numeral"], [tt], start, end) w.meaning = t[:-2] w.num_type = NUM_TYPE_ORDINAL w.str_type = STR_TYPE_DIGITS return [([w], text)] if self.is_punctuation(t): w = Token(ltype["punctuation"], [tt], start, end) words += [w] for x in self.conjunctions: if t == x.before: pos = position["before"] elif t == x.among: pos = position["among"] elif t == x.after: pos = position["after"] else: continue w = Token(ltype["conjunction"], [tt], start, end) w.position = pos #w.type = type["conjunction"] w.conjuction_structure = deepcopy(x) words += [w] if words != []: #EITHER MAKE AN ERROR HERE OR LET IT WORK WHEN THE FIRST WORD IS KNOWN THE SECOND ISN'T AND IT IS AN IDIOM result += [(words, deepcopy(text))] for x in self.vocabulary.get(t, []): p = [t] + deepcopy(text) for q in x.text: if q.lower() != p.pop(0).lower(): break else: result += [([deepcopy(x)], p)] if not result or result == []: raise Exception("Word '%s' not found in the '%s' dictionary." % (t, self.name)) return result