Beispiel #1
0
    def induce_entity_grammar(self, start_grammar):
        """Induce an entity-swapping grammar.

        Get the entities from the original dataset.
        Get the places to put holes from start_grammar.
        """
        new_grammar = Grammar()

        # Entity rules
        for x, y in self.dataset:
            alignments = self.domain.get_entity_alignments(x, y)
            for cat, x_span, y_span in alignments:
                x_str = x[x_span[0]:x_span[1]]
                y_str = y[y_span[0]:y_span[1]]
                new_grammar.add_rule(cat, x_str, y_str)

        # Root/template rules
        for cat, x_str, y_str in start_grammar.rule_list:
            # Anchor on single mention in x--allow one-to-many x-to-y mapping
            alignments = self.domain.get_entity_alignments(x_str, y_str)
            x_swaps = list(set(
                [(x_span, '%s_%d' % (inner_cat, x_span[0]))
                 for i, (inner_cat, x_span, y_span) in enumerate(alignments)]))
            x_new = self.splice(x_str, x_swaps)
            y_swaps = [(y_span, '%s_%d' % (inner_cat, x_span[0]))
                       for i, (inner_cat, x_span, y_span) in enumerate(alignments)]
            y_new = self.splice(y_str, y_swaps)
            new_grammar.add_rule(cat, x_new, y_new)

        # new_grammar.print_self()
        return new_grammar
Beispiel #2
0
    def induce_nesting_grammar(self, start_grammar):
        """Induce an entity-swapping grammar.

    Get everything from the start_grammar.
    """
        new_grammar = Grammar()
        for cat, x_str, y_str in start_grammar.rule_list:
            alignments, productions = self.domain.get_nesting_alignments(
                x_str, y_str)

            # Replacements
            for cat_p, x_p, y_p in productions:
                new_grammar.add_rule(cat_p, x_p, y_p)

            # Templates
            x_swaps = list(
                set([(x_span, '%s_%d' % (inner_cat, x_span[0]))
                     for i, (inner_cat, x_span,
                             y_span) in enumerate(alignments)]))
            x_new = self.splice(x_str, x_swaps)
            y_swaps = [(y_span, '%s_%d' % (inner_cat, x_span[0]))
                       for i, (inner_cat, x_span,
                               y_span) in enumerate(alignments)]
            y_new = self.splice(y_str, y_swaps)
            new_grammar.add_rule(cat, x_new, y_new)
        new_grammar.print_self()
        return new_grammar
Beispiel #3
0
def new_c():
    ret = Grammar()
    ret.add_nonterminals({k for k in rules.keys()})
    for right in rules.values():
        for tokens in right:
            ret.add_terminals({tok for tok in tokens if tok and (callable(tok) or (isinstance(tok, str) and tok[0] != "$"))})
    for k, v in rules.items():
        ret.add_rule(k, v)
    return ret
Beispiel #4
0
def separate_prefixes(g: grammar.Grammar, layer: grammar.NonTerminal,
                      prefix: grammar.Derivation, root: PrefixNode,
                      common_depth: int, nterm_sequence: Iterator):
    """
    Separate written into tree derivations by common prefixes.

    Uses recursion, maximal depth of it can be as big as
    depth of tree plus 1.

    :param g: Grammar, to which derivations will be recorded.
    :param layer: non-terminal symbol to which the derivation belong.
    :param prefix: common prefix.
    :param root: prefix tree.
    :param common_depth: depth of common prefix.
    :param nterm_sequence: sequence of new non-terminals.
    :return: none.
    """
    # Root in None means that it's leaf.
    if root is None:
        g.add_rule(layer, prefix)
        return

    # Common depth can be only in beginning.
    if common_depth == -1:
        common_depth = 1
    else:
        if len(root) == 1:
            common_depth += 1
        else:
            common_depth = 0

    if common_depth >= 1:
        new_layer = layer
    else:
        # If there is fork, we have to write
        # production of form
        # Layer --> prefixNewLayer
        # where NewLayer non-terminal
        # will keep symbols of the fork.
        new_layer = next(nterm_sequence)
        g.add_rule(layer, prefix + (new_layer, ))

    for symb, next_node in root.items():
        # Handling case of the EmptyWord.
        if type(symb) == tuple:
            t_symb = symb
        else:
            t_symb = (symb, )
        # Prefix assembling.
        if common_depth >= 1:
            new_prefix = prefix + t_symb
        else:
            new_prefix = t_symb

        separate_prefixes(g, new_layer, new_prefix, next_node, common_depth,
                          nterm_sequence)
Beispiel #5
0
 def make_grammar(self):
     grammar = Grammar()
     r1 = Rule(
         Symbol("NP", {"AGR": "?a"}), [
             Symbol("ART", {"AGR": "?a"}), Symbol("N", {"AGR": "?a"})])
     r1.set_variable_code("?a", -1L)
     # -1L should be default for any undefined variable
     # that is referenced while constructing
     grammar.add_rule(r1)
     return grammar
Beispiel #6
0
    def rulelist(self, start, end, item, tail):
        # Helper function
        def process_elements(grammar, rulename, elements):
            new_elements = []
            for element in elements:
                element_type = type(element)
                if element_type is tuple:
                    min, max = element[0], element[1]
                    rest = process_elements(grammar, rulename, element[2:])
                    new_elements.extend(min * rest)
                    if max is None:
                        new_elements.append((None,) + rest)
                    else:
                        max = max - min
                        if max > 0:
                            new_elements.append((max,) + rest)
                elif element_type is Alternation:
                    aux = grammar.get_internal_rulename(rulename)
                    for alt in element:
                        grammar.add_rule(aux, *alt)
                    new_elements.append(aux)
                else:
                    new_elements.append(element)
            if type(elements) is tuple:
                return tuple(new_elements)
            return new_elements

        # Go
        grammar = Grammar()
        rulename, alternation = item
        for elements in alternation:
            elements = process_elements(grammar, rulename, elements)
            grammar.add_rule(rulename, *elements)
        while tail:
            item, tail = tail
            if item is None:
                continue
            rulename, alternation = item
            for elements in alternation:
                elements = process_elements(grammar, rulename, elements)
                grammar.add_rule(rulename, *elements)
        return grammar
Beispiel #7
0
    def rulelist(self, start, end, item, tail):
        # Helper function
        def process_elements(grammar, rulename, elements):
            new_elements = []
            for element in elements:
                element_type = type(element)
                if element_type is tuple:
                    min, max = element[0], element[1]
                    rest = process_elements(grammar, rulename, element[2:])
                    new_elements.extend(min * rest)
                    if max is None:
                        new_elements.append((None, ) + rest)
                    else:
                        max = max - min
                        if max > 0:
                            new_elements.append((max, ) + rest)
                elif element_type is Alternation:
                    aux = grammar.get_internal_rulename(rulename)
                    for alt in element:
                        grammar.add_rule(aux, *alt)
                    new_elements.append(aux)
                else:
                    new_elements.append(element)
            if type(elements) is tuple:
                return tuple(new_elements)
            return new_elements

        # Go
        grammar = Grammar()
        rulename, alternation = item
        for elements in alternation:
            elements = process_elements(grammar, rulename, elements)
            grammar.add_rule(rulename, *elements)
        while tail:
            item, tail = tail
            if item is None:
                continue
            rulename, alternation = item
            for elements in alternation:
                elements = process_elements(grammar, rulename, elements)
                grammar.add_rule(rulename, *elements)
        return grammar
Beispiel #8
0
    def induce_concat_grammar(self, start_grammar, concat_num):
        new_grammar = Grammar()

        for cat, x_str, y_str in start_grammar.rule_list:
            if cat == start_grammar.ROOT:
                new_grammar.add_rule('$sentence', x_str, y_str)
            else:
                new_grammar.add_rule(cat, x_str, y_str)
        root_str = (' %s ' % Vocabulary.END_OF_SENTENCE).join(
            '$sentence_%d' % i for i in range(concat_num))
        new_grammar.add_rule(new_grammar.ROOT, root_str, root_str)
        #new_grammar.print_self()
        return new_grammar
Beispiel #9
0
    def induce_concat_grammar(self, start_grammar, concat_num):
        new_grammar = Grammar()

        for cat, x_str, y_str in start_grammar.rule_list:
            if cat == start_grammar.ROOT:
                # print("This is X :", x_str)
                # print("This is Y :", y_str)
                new_grammar.add_rule('$sentence', x_str, y_str)
            else:
                new_grammar.add_rule(cat, x_str, y_str)
        # print("This is the grammar :", new_grammar.rule_list)
        root_str = (' %s ' % '[SEP]').join('$sentence_%d' % i for i in range(concat_num))  # TODO here if issue
        # print("This is the root_str :", root_str)
        new_grammar.add_rule(new_grammar.ROOT, root_str, root_str)
        # new_grammar.print_self()
        return new_grammar
Beispiel #10
0
    print(response_str)
    print()
    exit()

# logf = open('log.txt', "w", encoding='utf8')

word_list = req_dict['word_list']
m_list = [Monomial(d) for d in word_list]
sys.stderr.write(str(m_list) + '\n')

grammar = Grammar()
rule_list = req_dict['grammar']
for rule in rule_list:
    rule_str = ' '.join(rule[:2]) + ' ' + ' , '.join(rule[2:])
    # logf.write(str(rule_str) + '\n\n')
    try:
        grammar.add_rule(rule_from_string(rule_str))
        response_str = 'OK'
    except Exception as ex:
        response_str = str(ex)

cyk = CYK_Parser(m_list, grammar)
cyk.parse()

dict_list, parse_table = cyk.table_to_plain_data()

print('Content-type:text/html')
print()  # blank line, end of headers
print(json.dumps({'dict_list': dict_list, 'parse_table': parse_table}))

# logf.close()
Beispiel #11
0
def induce(parsed_sents):
    def get_rule_from_node(node):

        if not node.is_leaf():

            child_tags = []

            for child in node.children:

                if child.is_leaf():
                    leafs[node.tag].add(child.text)

                else:
                    child_tags.append(child.tag)

            if len(child_tags):

                key = slugify(child_tags)
                nodes[node.tag].add(key)

    g = Grammar()

    for parsed_sent in parsed_sents:

        root = Node()
        current = root

        for char in str(parsed_sent):

            if char == '(':

                child = Node()
                child.parent = current

                current.children.append(child)
                current = child

            elif char == ')':

                if isinstance(current, Leaf):
                    current = current.parent
                current = current.parent

            elif re.match(r'\s', char):
                current.tag_parsed = True

            else:

                if isinstance(current, Leaf):

                    current.text += char

                elif current.tag_parsed:

                    leaf = Leaf()
                    leaf.parent = current
                    leaf.text += char

                    current.children.append(leaf)
                    current = leaf

                else:

                    current.tag += char

        leafs = defaultdict(set)
        nodes = defaultdict(set)

        root.children[0].descend(get_rule_from_node)

        for (src, tar_set) in nodes.items():
            for slugged_tars in tar_set:

                tars = deslugify(slugged_tars)
                g.add_rule(src, tars, False)

        for (src, tars) in leafs.items():
            g.add_rule(src, tars, True)

    g.dedup()
    return g
Beispiel #12
0
from grammar import Grammar
from rule_builder import rule_from_string

grammar = Grammar()
grammar.add_rule(rule_from_string('S -> subj:NP , verb:VP'))

print(grammar)
Beispiel #13
0
    },
    "$NP": {
        ("$A", "$NP"): Fraction(3, 10),
        ("$N",): Fraction(7, 10),
    },
    "$VP": {
        ("$V", "$NP"): Fraction(3, 4),
        ("$V",): Fraction(1, 4),
    },
    "$N": {
        ('ideas',): .5,
        ('linguists',): .5,
    },
    "$V": {
        ("hate",): .5,
        ("generate",): .5,
    },
    "$A": {
        ("great",): .5,
        ("green",): .5,
    }
}

english = Grammar()
english.add_terminals({'generate', 'hate', 'great', 'green', 'ideas', 'linguists'})
english.add_nonterminals({"$S", '$NP', '$VP', '$N', '$V', '$A'})
for k, v in rules.items():
    english.add_rule(k, v)

print(english.expand("$S"))