Beispiel #1
0
 def phrases(ccg):
     for s in paren_utils.paren_iter(ccg,
                                     bottom_up=True,
                                     lparen='{',
                                     rparen='}'):
         if Readible.Phrase_RE.match(s):
             yield Readible.Phrase(s)
Beispiel #2
0
    def clean_parens(tag):
        marked_tag = paren_utils.mark_depth(tag)
        for t in paren_utils.paren_iter(f'({tag})', bottom_up=True):
            depth = paren_utils.depth_at(tag, tag.index(t)) + 1
            t = paren_utils.mark_depth(t)
            j = 2
            while f'<{j}>' in t:
                t = t.replace(f'<{j}>', '(').replace(f'</{j}>', ')')
                j += 1
            mod = CCG_Tag._Modifier_RE.match(t)
            if mod and CCG_Tag.remove_features(
                    mod.group('a')) == CCG_Tag.remove_features(mod.group('b')):
                a = mod.group('a')
                b = mod.group('b')
                slash = mod.group('slash')
                marked_tag = marked_tag.replace(
                    f'<{depth}>{a}</{depth}>{slash}<{depth}>{b}</{depth}>',
                    f'({a}){slash}({b})', 1)
            elif CCG_Tag._Left_RE.search(t):
                x = CCG_Tag._Left_RE.match(t)
                a = x.group('a')
                marked_tag = marked_tag.replace(f'<{depth}>{a}</{depth}>', a,
                                                1)
            elif CCG_Tag._Right_RE.search(t):
                x = CCG_Tag._Right_RE.search(t)
                a = x.group('a')
                marked_tag = marked_tag.replace(f'<{depth}>{a}</{depth}>',
                                                f'({a})', 1)

        return paren_utils.unmark_depth(marked_tag)
Beispiel #3
0
    def edge_triples(self):
        if self.edges_are_memoized:
            return self.edge_triples_memoized
        memo = []
        text = str(self)
        idx = [(e.start(), e.group()) for e in self.EDGE_RE.finditer(text)]

        for rel_position, rel in idx:
            Target_RE = re.compile(f'{rel}\s*[(]?\s*{self.NODE_RE.pattern}')
            for t in paren_utils.paren_iter(text):
                pos = rel_position - text.index(t)
                if t.startswith(rel, pos) and paren_utils.depth_at(t,
                                                                   pos) == 0:
                    root = self.NODE_RE.match(t)
                    source = root.group().split('/')[0].strip()
                    x = Target_RE.match(t, pos=pos)
                    if x:
                        target = x.group('id')
                        memo.append((source, rel, target))
                    else:
                        print('Missing target node! ', rel,
                              re.sub('\s+', ' ', t))
                        memo.append((source, rel, '?'))
                    break
        self.edge_triples_memoized = memo
        self.edges_are_memoized = True
        return memo
Beispiel #4
0
    def latex(text):
        amr = AMR(text)
        text = str(amr)
        for x in re.findall('x[0-9]+ ?/ ?[^()\s]+', text):
            text = text.replace(x, '(' + x + ')')
        edges = [(e, id) for e, id in zip(amr.edges(), amr.edge_ids())]
        elems = []
        max_depth = paren_utils.max_depth(text)
        prev_depth = 0
        depth = 0

        i = 0
        node_depth = {}
        for t in paren_utils.paren_iter(text):
            node = amr.NODE_RE.match(t).group()
            id = node.split('/')[0].strip()
            # clean node
            if re.match('x[0-9]+/', node):
                node = node.split('/')[1]
            node = node.replace('"', '``', 1).replace('"', "''", 1)
            prev_depth = depth
            depth = paren_utils.depth_at(text, text.index(t))
            if depth > prev_depth:
                i = 0
            node_depth[id] = depth
            num_nodes = paren_utils.mark_depth(text).count(f'<{depth}>')
            x = AMR_Latex.get_x(i, num_nodes)
            y = AMR_Latex.get_y(depth, max_depth)
            color = AMR_Latex.get_color(i)
            elems.append(f'\t\\node[{color}]({id}) at ({x},{y}) {{{node}}};')
            i += 1
        for edge, id in edges:
            source = id.split('_')[0]
            target = id.split('_')[2]
            dir1 = 'south'
            dir2 = 'north'
            if node_depth[source] > node_depth[target]:
                dir1 = 'north'
                dir2 = 'south'
            if node_depth[source] == node_depth[target]:
                dir1 = 'north'
                dir2 = 'north'
            elems.append(
                f'\t\draw[->, thick] ({source}.{dir1}) -- ({target}.{dir2}) node[midway, above, sloped] {{{edge}}};'
            )
        latex = '\n\\begin{tikzpicture}[\n'
        latex += 'red/.style={rectangle, draw=red!60, fill=red!5, very thick, minimum size=7mm},\n'
        latex += 'blue/.style={rectangle, draw=blue!60, fill=blue!5, very thick, minimum size=7mm},\n'
        latex += 'green/.style={rectangle, draw=green!60, fill=green!5, very thick, minimum size=7mm},\n'
        latex += 'purple/.style={rectangle, draw=purple!60, fill=purple!5, very thick, minimum size=7mm},\n'
        latex += 'orange/.style={rectangle, draw=orange!60, fill=orange!5, very thick, minimum size=7mm},\n'
        latex += ']\n'
        latex += '\n'.join(elems)
        latex += '\n\end{tikzpicture}\n'

        return latex
Beispiel #5
0
 def named_entities(self):
     NE_RE = re.compile(
         f'(?P<root>{self.NODE_RE.pattern}).*:name\s+<1>(?P<name>.*?)</1>',
         re.DOTALL)
     for t in paren_utils.paren_iter(str(self)):
         t = paren_utils.mark_depth(t)
         x = NE_RE.match(t)
         if x:
             root = x.group('root')
             name = x.group('name')
             yield AMR(f'({root} :name ({name}) )')
Beispiel #6
0
 def phrases(ccg):
     ccg = ccg.replace('{', '-LBR-').replace('}', '-RBR-')
     ccg = ccg.replace('(', '{').replace(')', '}')
     for t in re.finditer('<[LT] (?P<tag>[^\s>]+) .*?>', ccg):
         tag = t.group('tag')
         ccg = ccg.replace(
             f' {tag} ',
             ' ' + tag.replace('{', '(').replace('}', ')') + ' ')
     for s in paren_utils.paren_iter(ccg, bottom_up=True):
         p = CCGBank.Phrase_RE.match(s)
         if not p: continue
         yield CCGBank.Phrase(s)
Beispiel #7
0
 def phrases_and_indices(ccg):
     phrases = Readible.phrases(ccg)
     ccg_ids = ccg
     ID_RE = re.compile('[*](?P<n>[0-9]+)[*]')
     for i, w in enumerate(Readible.words(ccg)):
         word = w.text
         ccg_ids = ccg_ids.replace(word, f'{{*{i}*}}', 1)
     for s in paren_utils.paren_iter(ccg_ids,
                                     bottom_up=True,
                                     lparen='{',
                                     rparen='}'):
         if Readible.Phrase_RE.match(s):
             indices = [i.group('n') for i in ID_RE.finditer(s)]
             yield (next(phrases), f'{indices[0]}-{indices[-1]}')
Beispiel #8
0
 def sub_amrs(self):
     for t in paren_utils.paren_iter(self.text):
         yield AMR('(' + t + ')')