예제 #1
0
    def __init__(
        self,
        resource_loader=None,
        config=None,
        allow_relaxed=True,
        domain=None,
        intent=None,
    ):
        """Initializes the parser

        Args:
            resource_loader (ResourceLoader): An object which can load
                resources for the parser.
            config (dict, optional): The configuration for the parser. If none
                is provided the app config will be loaded.
        """
        if not resource_loader and not config:
            raise ValueError(
                "Parser requires either a configuration or a resource loader")
        app_path = resource_loader.app_path if resource_loader else None
        try:
            entity_types = path.get_entity_types(app_path) + ["unk"]
        except TypeError:
            entity_types = {"unk"}
        self._resource_loader = resource_loader
        self.config = get_parser_config(app_path, config, domain, intent) or {}
        configured_entities = set()
        for entity_type, entity_config in self.config.items():
            configured_entities.add(entity_type)
            configured_entities.update(entity_config.keys())

        self._configured_entities = configured_entities
        rules = generate_grammar(self.config, entity_types)
        self._grammar = FeatureGrammar.fromstring(rules)
        self._parser = FeatureChartParser(self._grammar)
        if allow_relaxed:
            relaxed_rules = generate_grammar(self.config,
                                             entity_types,
                                             relaxed=True)
            self._relaxed_grammar = FeatureGrammar.fromstring(relaxed_rules)
            self._relaxed_parser = FeatureChartParser(self._relaxed_grammar)
        else:
            self._relaxed_grammar = None
            self._relaxed_parser = None
예제 #2
0
파일: analyse.py 프로젝트: anorth/wwc-nltk
def main(argv):
    """
  Loads grammar files from command-line args then parses lines from standard input.
  """
    parser = argparse.ArgumentParser()
    parser.add_argument(dest='grammars',
                        nargs='+',
                        help='Grammar file path(s)')
    parser.add_argument('--draw',
                        dest='draw',
                        action='store_true',
                        help='Draw trees')
    parser.add_argument('-v',
                        '--verbose',
                        dest='verbose',
                        action='store_true',
                        help='Be verbose')
    args = parser.parse_args(argv)

    grammar = load_grammars(args.grammars)
    parser = FeatureChartParser(grammar,
                                trace=args.verbose,
                                trace_chart_width=80)

    line = sys.stdin.readline()
    while line:
        if line[0] == '#': continue
        tokens = line.lower().strip().split()
        if len(tokens) == 0: continue

        trees = list(parser.parse(tokens))
        print('*** {} ***'.format(tokens))
        if trees:
            for tree in trees:
                print(tree.pformat(margin=80))
                if args.draw:
                    tree.draw()
                # print(TreePrettyPrinter(tree).text())
        else:
            print('Could not parse {}'.format(tokens))
        print('\n')
        line = sys.stdin.readline()
예제 #3
0
def main():
    ugrammar = FeatureGrammar.fromstring(rules)
    uparser = FeatureChartParser(ugrammar)
    index = 0
    for sent in text:
        index += 1
        print_tree(sent, uparser, index)
    print "Input testing sentece or the number of the above one: (q to quit)"
    str = sys.stdin.readline().strip()
    while str != "q":
        try:
            index = int(str)
            print_tree(text[index], uparser, index)
        except IndexError:
            print "Index out of range. Please check."
        except ValueError:
            print_tree(str, uparser, -1)
        print "Input testing sentece or the number of the above one: (q to quit)"
        str = sys.stdin.readline().strip()
예제 #4
0
    ADV   -> 'always' | 'never' | 'intensely'
    
    ############## Preposition ################
    P -> 'in' | 'before' | 'after' | 'when' | 'on' | 'beyond' | 'from' | 'to' | 'at'
    
    ######## Auxiliary #################
    AUX[FORM=base, NUM=plur]     -> 'do'
    AUX[FORM=vbz, NUM=sing]      -> 'does'
    AUX[FORM=pret]               -> 'did'
    AUX[FORM=pastpart, NUM=sing] -> 'has'
    AUX[FORM=pastpart, NUM=plur] -> 'have'
    
   
""")

uparser = FeatureChartParser(ugrammar)

text = """\
Bart laughs
Homer laughed
Bart and Lisa drink milk
Bart wears blue shoes
Lisa serves Bart a healthy green salad
Homer serves Lisa
Bart always drinks milk
Lisa thinks Homer thinks Bart drinks milk
Homer never drinks milk in the kitchen before midnight
when Homer drinks milk Bart laughs
when does Lisa drink the milk on the table
when do Lisa and Bart wear shoes
Bart thinks Lisa drinks milk on the table
예제 #5
0
import nltk
from nltk import FeatureChartParser

fcfg = nltk.data.load('P2.fcfg')
parser = FeatureChartParser(fcfg)


def parse_text(text):
    examples = text.splitlines()
    for sent in examples:
        print(sent)
        parses = parser.parse(sent.split())
        for tree in parses:
            print(tree)


def parse_file(name):
    f = open(name, 'r')
    text = f.read()
    f.close()
    parse_text(text)


print("================ Positive examples ================")
parse_file('P2.pos')
print("================ Negative examples ================")
parse_file('P2.neg')
예제 #6
0
def unification_grammar():
    ugrammar = FeatureGrammar.fromstring("""\
    ################### RULES #################
    S -> NP[NUM=?n] VP[NUM=?n]
    S -> PREP_P S
    S -> Wh_P AUX[NUM=?n] NP[NUM=?n] VP
    
    NP[NUM=?n] -> ProperNoun[NUM=?n] 
    NP[NUM=?n] -> N[NUM=?n] | ADJ_P NP[NUM=?n] | DET[NUM=?n] NP[NUM=?n] | N[NUM=?n] PREP_P | ADJ_P
    NP[NUM=?n] -> ProperNoun[NUM=?n] GER_P | GER_P
    NP[NUM=pl] -> NP[NUM=?n] CC NP[NUM=?n]
     
    VP[SUBCAT=?rest, NUM=?n] -> V[NUM=?n, SUBCAT=?rest] | VP[NUM=?n, TENSE=?t, SUBCAT=[HEAD=?arg, TAIL=?rest]] ARG[CAT=?arg]
    VP[SUBCAT=?rest, NUM=?n] -> ADV_P V[NUM=?n, SUBCAT=?rest] | V[NUM=?n, SUBCAT=?rest] ADV_P
    VP[SUBCAT=?rest, NUM=?n] -> MOD_P VP[TENSE=?t, SUBCAT=[HEAD=?arg, TAIL=?rest]] ARG[CAT=?arg]
    VP[SUBCAT=?rest, NUM=?n] -> VTB[NUM=?n, SUBCAT=[HEAD=?arg, TAIL=?rest]] ARG[CAT=?arg]
    VP[SUBCAT=?rest, NUM=?n] -> VTB VP[SUBCAT=?rest]
    
    GER_P -> GER NP
    
    ADJ_P -> ADJ | ADJ ADJ_P
    ADV_P -> ADV | ADV ADV_P

    PREP_P -> PREP NP | PREP S
    MOD_P -> MOD AUX[NUM=pl] |  MOD ADV AUX[NUM=pl]
    Wh_P -> Wh | Wh ARG[CAT=?arg] 
    
    ARG[CAT=np] -> NP
    ARG[CAT=pp] -> PREP_P
    ARG[CAT=s] -> S
    
    ################# Lexicons #################
    ################## NOUN ###################
    ###########################################
    ProperNoun[NUM=sg] -> 'Homer' | 'Bart' | 'Lisa'
    N[NUM=sg] -> 'milk' | 'salad' | 'midnight' | 'kitchen' | 'table' 
    N[NUM=pl] -> 'shoes' | 'tables'
    
    ################# VERB ####################
    ###########################################
    
    ############### PRESENT ###################
    #########----- Intransitive -----##########
    V[TENSE=pres, NUM=sg, SUBCAT=nil]-> 'laughs' | 'smiles' | 'walks' | 'serves' | 'drinks'
    V[TENSE=pres, NUM=pl, SUBCAT=nil] -> 'laugh' | 'smile' | 'walk' | 'serve' |'drink'
    
    #########----- Transitive ------###########
    V[TENSE=pres, NUM=sg, SUBCAT=[HEAD=s,TAIL=nil]] -> 'thinks' | 'believes'
    V[TENSE=pres, NUM=pl, SUBCAT=[HEAD=s,TAIL=nil]] -> 'think' | 'believe'
    
    V[TENSE=pres, NUM=sg, SUBCAT=[HEAD=np,TAIL=nil]] ->'serves' | 'drinks' | 'wears' | 'likes' 
    V[TENSE=pres, NUM=pl, SUBCAT=[HEAD=np,TAIL=nil]] ->'serve' | 'drink' | 'wear' | 'like'
    
    V[TENSE=pres, NUM=sg, SUBCAT=[HEAD=pp,TAIL=nil]] ->'walks' | 'teaches' 
    V[TENSE=pres, NUM=pl, SUBCAT=[HEAD=pp,TAIL=nil]] ->'walk' | 'teach' 
    
    ######### primary & secondary ########
    V[TENSE=pres, NUM=sg, SUBCAT=[HEAD=np, TAIL=[HEAD=np,TAIL=nil]]] -> 'serves'
    V[TENSE=pres, NUM=pl, SUBCAT=[HEAD=np, TAIL=[HEAD=np,TAIL=nil]]] -> 'serve'
    V[TENSE=pres, NUM=pl, SUBCAT=[HEAD=s, TAIL=[HEAD=np,TAIL=nil]]] -> 'think' | 'believe'
    
    ################# Past ####################
    #########----- Intransitive -----##########
    V[TENSE=past, SUBCAT=nil] -> 'laughed' | 'smiled' | 'walked'
    
    #########----- Transitive ------###########
    V[TENSE=past, SUBCAT=[HEAD=np,TAIL=nil]] -> 'drank' | 'wore' | 'served'
    V[TENSE=pastpart, SUBCAT=[HEAD=np,TAIL=nil]] ->'drunk' | 'worn' | 'served' | 'seen'
    
     ############### PRESENT CONT. #############
    V[TENSE=prescon, FORM=prespart , SUBCAT=[HEAD=np,TAIL=nil]] -> 'drinking' | 'wearing' 
    V[TENSE=prescon, FORM=prespart , SUBCAT=[HEAD=pp,TAIL=nil]] -> 'drinking'
    
    ################ Determiner ###############
    DET[NUM=sg] -> 'a' | 'the' | 'that'
    DET[NUM=pl] -> 'the' | 'these' | 'those'
    
    ################ Conjunction ##############
    CC -> 'and'
    
    ################## Modal ##################
    MOD -> 'may'
    
    ################# Gerund #################
    GER -> 'drinking'
    
    ############ Adverb & Adjective ############
    ADJ -> 'blue' | 'healthy' | 'green' | 'same'
    ADV -> 'always' | 'never' | 'not' | 'yesterday'
    
    ############## Preposition ##################
    PREP -> 'in' | 'before' | 'when' | 'on'  
    
    AUX[NUM=sg] -> 'does' | 'has'
    AUX[NUM=pl] -> 'do' | 'have'
    VTB[NUM=sg] -> 'is'
    VTB[NUM=pl] -> 'are'
    
    Wh -> 'when' | 'what' | 'where' | 'whom'
    """)
    uparser = FeatureChartParser(ugrammar)
    sents = text_extended.splitlines()
    for sent in sents:
        parses = uparser.parse(sent.split())
        print(sent)
        for tree in parses:
            print(tree)
예제 #7
0
class Parser:
    """
    A language parser which is used to extract relations between entities in a
    given query and group related entities together.

    The parser uses a context free grammar based on a configuration to generate
    candidate entity groupings. Heuristics are then used to rank and select a
    grouping.

    This rule based parser will be helpful in many situations, but if you have
    a sufficiently sophisticated entity hierarchy, you may benefit from using a
    statistical approach.

    Attributes:
        config (dict): The parser config.
    """
    def __init__(
        self,
        resource_loader=None,
        config=None,
        allow_relaxed=True,
        domain=None,
        intent=None,
    ):
        """Initializes the parser

        Args:
            resource_loader (ResourceLoader): An object which can load
                resources for the parser.
            config (dict, optional): The configuration for the parser. If none
                is provided the app config will be loaded.
        """
        if not resource_loader and not config:
            raise ValueError(
                "Parser requires either a configuration or a resource loader")
        app_path = resource_loader.app_path if resource_loader else None
        try:
            entity_types = path.get_entity_types(app_path) + ["unk"]
        except TypeError:
            entity_types = {"unk"}
        self._resource_loader = resource_loader
        self.config = get_parser_config(app_path, config, domain, intent) or {}
        configured_entities = set()
        for entity_type, entity_config in self.config.items():
            configured_entities.add(entity_type)
            configured_entities.update(entity_config.keys())

        self._configured_entities = configured_entities
        rules = generate_grammar(self.config, entity_types)
        self._grammar = FeatureGrammar.fromstring(rules)
        self._parser = FeatureChartParser(self._grammar)
        if allow_relaxed:
            relaxed_rules = generate_grammar(self.config,
                                             entity_types,
                                             relaxed=True)
            self._relaxed_grammar = FeatureGrammar.fromstring(relaxed_rules)
            self._relaxed_parser = FeatureChartParser(self._relaxed_grammar)
        else:
            self._relaxed_grammar = None
            self._relaxed_parser = None

    def parse_entities(
        self,
        query,
        entities,
        all_candidates=False,
        handle_timeout=True,
        timeout=MAX_PARSE_TIME,
    ):
        """Determines groupings of entities for the given query.

        Args:
            query (Query): The query being parsed.
            entities (list[QueryEntity]): The entities to find groupings for.
            all_candidates (bool, optional): Whether to return all the entity candidates.
            handle_timeout (bool, optional): False if an exception should be raised in the event of
                a parsing times out. Defaults to True.
            timeout (float, optional): The amount of time to wait for the parsing to complete.
                By default this is set to MAX_PARSE_TIME. If None is passed, the passing will never
                time out

        Returns:
            (tuple[QueryEntity]): An updated version of the entities collection passed in with \
                their parent and children attributes set appropriately.
        """
        if not self._configured_entities:
            return entities

        if not handle_timeout:
            return self._parse(query,
                               entities,
                               all_candidates=all_candidates,
                               timeout=timeout)

        try:
            return self._parse(query,
                               entities,
                               all_candidates=all_candidates,
                               timeout=timeout)
        except ParserTimeout:
            logger.warning("Parser timed out parsing query %r", query.text)
            return entities

    def _parse(self, query, entities, all_candidates, timeout):
        entity_type_count = defaultdict(int)
        entity_dict = {}
        tokens = []  # tokens to be parsed

        # generate sentential form (assumes entities are sorted)
        for entity in entities:
            entity_type = entity.entity.type
            role_type = entity.entity.role
            if role_type:
                # Append role type to entity type with - separator
                entity_with_role_type = entity_type + "--" + role_type
                if entity_with_role_type in self._configured_entities:
                    entity_type = entity_with_role_type
            if entity_type not in self._configured_entities:
                entity_type = "unk"
            entity_id = "{}{}".format(entity_type,
                                      entity_type_count[entity_type])
            entity_type_count[entity_type] += 1
            entity_dict[entity_id] = entity
            tokens.append(entity_id)

        logger.debug("Parsing sentential form: %r", " ".join(tokens))
        start_time = time.time()
        parses = []
        for parse in self._parser.parse(tokens):
            parses.append(parse)
            if timeout is not None and (time.time() - start_time) > timeout:
                raise ParserTimeout("Parsing took too long")

        if not parses and self._relaxed_parser:
            for parse in self._relaxed_parser.parse(tokens):
                parses.append(parse)
                if timeout is not None and (time.time() -
                                            start_time) > MAX_PARSE_TIME:
                    raise ParserTimeout("Parsing took too long")

        if not parses:
            if all_candidates:
                return []
            return entities

        ranked_parses = self._rank_parses(query, entity_dict, parses, timeout,
                                          start_time)
        if all_candidates:
            return ranked_parses

        # if we still have more than one, choose the first
        entities = self._get_flat_entities(ranked_parses[0], entities,
                                           entity_dict)
        return tuple(sorted(entities, key=lambda e: e.span.start))

    def _rank_parses(self,
                     query,
                     entity_dict,
                     parses,
                     timeout,
                     start_time=None):
        start_time = start_time or time.time()
        resolved = OrderedDict()

        for parse in parses:
            if timeout is not None and time.time() - start_time > timeout:
                raise ParserTimeout("Parsing took too long")
            resolved[self._resolve_parse(parse)] = None
        filtered = (p for p in resolved.keys())

        # Prefer parses with fewer groups
        parses = list(sorted(filtered, key=len))
        filtered = (p for p in parses if len(p) <= len(parses[0]))

        # Prefer parses with minimal distance from dependents to heads
        parses = list(
            sorted(filtered,
                   key=lambda p: self._parse_distance(p, query, entity_dict)))
        min_parse_dist = self._parse_distance(parses[0], query, entity_dict)
        filtered = (
            p for p in parses
            if self._parse_distance(p, query, entity_dict) <= min_parse_dist)

        # TODO: apply precedence

        return list(filtered)

    def _parse_distance(self, parse, query, entity_dict):
        total_link_distance = 0
        stack = list(parse)
        while stack:
            node = stack.pop()
            head = entity_dict[node.id]
            for dep in node.dependents or set():
                if dep.dependents:
                    stack.append(dep)
                    continue
                child = entity_dict[dep.id]
                if child.token_span.start > head.token_span.start:
                    intra_entity_span = Span(head.token_span.end,
                                             child.token_span.start)
                else:
                    intra_entity_span = Span(child.token_span.end,
                                             head.token_span.start)
                link_distance = 0
                for token in intra_entity_span.slice(query.text.split(" ")):
                    if token in self.config[node.type][
                            dep.type]["linking_words"]:
                        link_distance -= 0.5
                    else:
                        link_distance += 1
                total_link_distance += link_distance

        return total_link_distance

    @staticmethod
    def _get_flat_entities(parse, entities, entity_dict):
        stack = [g.to_query_entity(entity_dict) for g in parse]
        new_dict = {}
        while stack:
            entity = stack.pop()
            new_dict[(entity.entity.type, entity.span.start)] = entity

            for child in entity.children or ():
                stack.append(child)

        return [
            new_dict.get((e.entity.type, e.span.start), e) for e in entities
        ]

    @classmethod
    def _resolve_parse(cls, node):
        groups = set()
        for child in node:
            child_symbol = child.label()[TYPE_FEATURE]
            if child_symbol in START_SYMBOLS:
                groups.update(cls._resolve_parse(child))
            else:
                group = cls._resolve_group(child).freeze()
                groups.add(group)
        return frozenset(groups)

    @classmethod
    def _resolve_group(cls, node):
        symbol = node.label()[TYPE_FEATURE]
        if not symbol[0].isupper():
            # this node is a generic entity of type {symbol}, its child is the terminal
            return _EntityNode(symbol, node[0], None)

        # if first char is capitalized, this is a group!
        group_type = symbol.lower()
        dependents = set()
        for child in node:
            child_symbol = child.label()[TYPE_FEATURE]
            if child_symbol == symbol:
                # this is the ancestor of this group
                group = cls._resolve_group(child)
            elif child_symbol == group_type:
                # this is the root ancestor of this group
                group = cls._resolve_group(child)
                group = _EntityNode(group.type, group.id, set())
            else:
                dependents.add(cls._resolve_group(child).freeze())

        group.dependents.update(dependents)
        return group
예제 #8
0
V[FORM=?vbz] -> 'laughs' | 'wears' | 'serves' | 'drinks' | 'thinks' | 'does' | 'do'
V[FORM=?pret] -> 'laughed'
V[FORM=?base] -> 'drink' | 'wear' | 'laugh'
DET[NUM=?sg] -> 'a' | 'the'
Adjective[CAT=?des] -> 'healthy'
Adjective[CAT=?col] -> 'blue' | 'green'
Adverb[T=?des] -> 'always' | 'never' | 'before'
Preposition[T=?gen] -> 'in' | 'on' 
NN[NUM=?sg] -> 'shoes' | 'salad' | 'milk' | 'kitchen' | 'midnight' | 'table'
NN[NUM=?pl] -> 'milk' | 'shoes'
ConjuctiveJoin[NUM=?pl] -> 'and'
ConjuctiveConditional[T=?con] -> 'when'
ConjuctiveConditional[T=?quest] -> 'when'
""")

grammar_feature_parser = FeatureChartParser(grammar_feature)
#'''


def _parse_and_print_unification(sents):
    for i in range(len(sents)):
        parses = grammar_feature_parser.parse(sents[i].split())
        print("\r\n")
        print "Sentence", i + 1, " : ",
        print(sents[i])
        for tree in parses:
            print(tree)


_parse_and_print_unification(sents_split)