コード例 #1
0
ファイル: cws.py プロジェクト: arne-cl/fosay
def p_record(p):
    '''record : header "{" attributes "}"
              | header "{" attributes ";" "}"
              | header "{" "}"'''
    global Params
    res = []
    header = p[1]
    attributes = p[3] if len(p) > 4 else [[]]
    for name, base in header:
        has_dot, is_word, nme, ipa = name
        if base is None and has_dot: continue
        if is_word:
            for attr in attributes:
                tok = Token(None, 0, 0)
                tok._attrs = attr_to_dict(attr)
                if not ipa is None:
                    tok.attr(concept['transcription'], ipa)
                if not tok.attr(concept['transcription']) is None:
                    tok.attr(concept['transcription'], [tok.attr(concept['transcription'])])
                if tok.meaning is None and not tok.attr(concept['stem']) is None:
                    tok.meaning = tok.attr(concept['stem']).rsplit('.', 1)[0]
                if not tok.attr(concept['number']) is None \
                    and tok.attr(concept['real-number']) is None:
                    tok.attr(concept['real-number'], tok.attr(concept['number']))
                tok.text = [nme]
                if not base is None:
                    res += modify_token(base, tok)
                else:
                    res += [tok]
        else:
            if not ipa is None:
                raise Exception()
            for attr in attributes:
                if not base is None:
                    modify_base(has_dot, nme, attr, base)
                elif not has_dot and attr != []:
                    if nme in Params.funcs.keys():
                        Params.funcs[nme] += [attr]
                    else:
                        Params.funcs[nme] = [attr]
    dictionary = {}
    meanings = {}
    p[0] = fill_dm(dictionary, meanings, res)
コード例 #2
0
ファイル: lang.py プロジェクト: arne-cl/fosay
    def init_words(self, text, start = 0, end = 0):
        result = []
        words = []
        tt = text.pop(0)
        t = tt.lower()

        if self.is_number(t):
            w = Token(type["numeral"], [tt], start, end)
            w.meaning = t
            w.num_type = NUM_TYPE_CARDINAL
            w.str_type = STR_TYPE_DIGITS
            return [([w], text)]
        if t[-3:] in ["1st", "2nd", "3rd"] or t[-2:] == "th" and self.is_number(t[:-2]):
            w = Token(type["numeral"], [tt], start, end)
            w.meaning = t[:-2]
            w.num_type = NUM_TYPE_ORDINAL
            w.str_type = STR_TYPE_DIGITS
            return [([w], text)]


        if self.is_punctuation(t):
            w = Token(ltype["punctuation"], [tt], start, end)
            words += [w]
        for x in self.conjunctions:
            if t == x.before:
                pos = position["before"]
            elif t == x.among:
                pos = position["among"]
            elif t == x.after:
                pos = position["after"]
            else:
                continue
            w = Token(ltype["conjunction"], [tt], start, end)
            w.position = pos
            #w.type = type["conjunction"]
            w.conjuction_structure = deepcopy(x)
            words += [w]

        if words != []: #EITHER MAKE AN ERROR HERE OR LET IT WORK WHEN THE FIRST WORD IS KNOWN THE SECOND ISN'T AND IT IS AN IDIOM
            result += [(words, deepcopy(text))]

        for x in self.vocabulary.get(t, []):
            p = [t] + deepcopy(text)
            for q in x.text:
                if q.lower() != p.pop(0).lower():
                    break
            else:
                result += [([deepcopy(x)], p)]

        if not result or result == []:
            raise Exception("Word '%s' not found in the '%s' dictionary." % (t, self.name))
        return result