Esempio n. 1
0
        def bind_more_if_same():
            word1 = words[i-2]
            word1a = words[i-1]
            if not word1a.items or word1a.items[0].pos != 'adj':
                return False
            if not same(word1, word1a, pos_check=False):
                return False

            word2 = words[i+1]
            with_2a = False
            if i+2 < M:
                word2a = words[i+2]
                if word2a.items and word2a.items[0].pos == 'adj':
                    if same(word2, word2a, pos_check=False):
                        with_2a = True

            if same(word1, word2):
                print "// #%d #%d ET #%d (#%d): %s == %s" % (i-2, i-1, i+1, i+2, word1.surface_utf8(), word2.surface_utf8())
                cl = AndOr(u'et')
                word1.add_modifier(word1a)
                cl.add([word1])
                visited.add(i-2)
                visited.add(i-1)

                if with_2a:
                    word2.add_modifier(word2a)
                    visited.add(i+2)
                cl.add([word2])
                visited.add(i+1)

                words[i-2] = cl
                ao_loc.add(i-2)
                return True
            else:
                return False
Esempio n. 2
0
 def bind_two_if_same():
     word1 = words[i-1]
     word2 = words[i+1]
     if same(word1, word2):
         print "// #%d ET #%d: %s == %s" % (i-1, i+1, word1.surface_utf8(), word2.surface_utf8())
         cl = AndOr(u'et')
         cl.add([word1])
         cl.add([word2])
         for j in range(i-1, i+2):
             visited.add(j)
         words[i-1] = cl
         ao_loc.add(i-1)
         return True
     else:
         return False
Esempio n. 3
0
    def detect(and_or_word):
        ao_indices = [word_indices_in_group(and_or_word, group) for group in groups]
        for i, aos in enumerate(ao_indices):
            num_of_ao = len(aos)
            if num_of_ao >= 2:
                upper = and_or_word.upper()
                # print "  %s-%s- found in #%d" % (upper, upper, i), aos
                cl = AndOr(and_or_word)
                # 区間が確定しているもの(=最後の1つ以外)をまず追加
                for j in range(num_of_ao-1):
                    this_et_idx = aos[j]
                    next_et_idx = aos[j+1]
                    cl.add(words[this_et_idx+1:next_et_idx])
                # 最後の1つは、どこで終わるか確かめながら追加
                last_et_idx = aos[num_of_ao-1]
                end_idx = groups[i][-1] + 1
                ws = []
                for idx in range(last_et_idx+1, end_idx):
                    word = words[idx]
                    first_item = word.items[0]
                    if cl.pos == 'noun':
                        # 格変化のある語に限る
                        if first_item._ is None: break
                        if non_genitive(first_item._):
                            cases = [x[0] for x in first_item._]
                            # これまでの物と格が一致する可能性がなければ排除
                            cases_x = filter(lambda case:case in cases, cl.cases)
                            if not cases_x: break
                    ws.append(word)
                    idx += 1
                if ws:
                    cl.add(ws)
                else:
                    pass # ERROR: type mismatch

                # print "CL: [%d..%d)" % (aos[0], idx)
                # cl.dump()
                for j in range(aos[0], idx):
                    # words[j] = None
                    # visited[j] = True
                    visited.add(j)

                cl.restrict()

                words[aos[0]] = cl
                ao_loc.add(aos[0])