Ejemplo n.º 1
0
def main():
    usage = "%prog [<options>] <model-name>"
    description = "Queries an ngram model interactively"
    optparser = OptionParser(usage=usage, description=description)
    # Read in command line options and args
    options, arguments = parse_args_with_config(optparser)
    
    if len(arguments) < 1:
        print "Specify a model name"
        sys.exit(1)
    model_name = arguments[0]
        
    # Load the ngram model
    ngmodel = NgramTaggerModel.load_model(model_name)
    model = ngmodel.model
    
    input_getter = input_iterator(">> ")
    # Load the shell history if possible
    try:
        readline.read_history_file(settings.NGRAM_QUERY_HISTORY_FILE)
    except IOError:
        # No history file found. No problem
        pass
    print "N-gram model query"
    print "Loaded", model_name
    print
    print "Transition:      t <state> <state-1> ... <state-n>"
    print "Emission:        e <chord> <state>"
    print "State domain:    states"
    print "Emission domain: ems"
    
    def _check_state(s):
        if s not in model.label_dom+[None]:
            raise QueryError, "invalid state label: %s" % s
    
    for query in input_getter:
        query = query.rstrip("\n").strip()
        if query:
            try:
                if query.startswith("states"):
                    print ", ".join(model.label_dom)
                elif query.startswith("ems"):
                    print ", ".join(model.emission_dom)
                elif query.startswith("t"):
                    # Transition prob query
                    states = query.split()[1:]
                    if len(states) != model.order:
                        print "Ngram must have length %d" % model.order
                        continue
                    states = [s if s != "None" else None for s in states]
                    # Verify all these states
                    for state in states:
                        _check_state(state)
                    # Get the transition probability
                    prob = model.transition_probability_debug(*states)
                    print "P(Qi = %s | %s) = %f" % (states[0], 
                                    ", ".join(["Q(i-%d) = %s" % (i+1,s) 
                                        for (i,s) in enumerate(states[1:])]),
                                    prob)
                elif query.startswith("e"):
                    # Emission prob query
                    em_state = query.split()[1:]
                    if len(em_state) != 2:
                        print "Emission query must consist of a chord and a state"
                        continue
                    em, state = em_state
                    # Check the state label's valid
                    _check_state(state)
                    # Get the emission probability
                    prob = model.emission_probability(em, state)
                    # Print out the probability
                    print "P(Oi = %s | Qi = %s) = %f" % (em, state, prob)
                else:
                    print "Invalid query: %s" % query
            except QueryError, err:
                print "Check your query: %s" % err
            except Exception, err:
                print "Error processing query: %s" % err
Ejemplo n.º 2
0
def main():
    usage = "%prog [<options>] <model-name>"
    description = "Queries an ngram model interactively"
    optparser = OptionParser(usage=usage, description=description)
    # Read in command line options and args
    options, arguments = parse_args_with_config(optparser)

    if len(arguments) < 1:
        print "Specify a model name"
        sys.exit(1)
    model_name = arguments[0]

    # Load the ngram model
    ngmodel = NgramTaggerModel.load_model(model_name)
    model = ngmodel.model

    input_getter = input_iterator(">> ")
    # Load the shell history if possible
    try:
        readline.read_history_file(settings.NGRAM_QUERY_HISTORY_FILE)
    except IOError:
        # No history file found. No problem
        pass
    print "N-gram model query"
    print "Loaded", model_name
    print
    print "Transition:      t <state> <state-1> ... <state-n>"
    print "Emission:        e <chord> <state>"
    print "State domain:    states"
    print "Emission domain: ems"

    def _check_state(s):
        if s not in model.label_dom + [None]:
            raise QueryError, "invalid state label: %s" % s

    for query in input_getter:
        query = query.rstrip("\n").strip()
        if query:
            try:
                if query.startswith("states"):
                    print ", ".join(model.label_dom)
                elif query.startswith("ems"):
                    print ", ".join(model.emission_dom)
                elif query.startswith("t"):
                    # Transition prob query
                    states = query.split()[1:]
                    if len(states) != model.order:
                        print "Ngram must have length %d" % model.order
                        continue
                    states = [s if s != "None" else None for s in states]
                    # Verify all these states
                    for state in states:
                        _check_state(state)
                    # Get the transition probability
                    prob = model.transition_probability_debug(*states)
                    print "P(Qi = %s | %s) = %f" % (states[0], ", ".join([
                        "Q(i-%d) = %s" % (i + 1, s)
                        for (i, s) in enumerate(states[1:])
                    ]), prob)
                elif query.startswith("e"):
                    # Emission prob query
                    em_state = query.split()[1:]
                    if len(em_state) != 2:
                        print "Emission query must consist of a chord and a state"
                        continue
                    em, state = em_state
                    # Check the state label's valid
                    _check_state(state)
                    # Get the emission probability
                    prob = model.emission_probability(em, state)
                    # Print out the probability
                    print "P(Oi = %s | Qi = %s) = %f" % (em, state, prob)
                else:
                    print "Invalid query: %s" % query
            except QueryError, err:
                print "Check your query: %s" % err
            except Exception, err:
                print "Error processing query: %s" % err
Ejemplo n.º 3
0
            # Get the bulk input to supply names
            name_getter = iter(input_data.get_identifiers())
            num_inputs = len(input_data)
        else:
            # Otherwise, there's just one input
            input_list = [input_data]
            num_inputs = 1
            # Try getting a name for this
            if input_data.name is None:
                name = "unnamed"
            else:
                name = input_data.name
            name_getter = iter([name])

    if stdinput:
        input_getter = input_iterator(">> ")
        print "No input string given: accepting input from stdin. Hit Ctrl+d to exit"
        # Load the shell history if possible
        try:
            readline.read_history_file(settings.TAG_PROMPT_HISTORY_FILE)
        except IOError:
            # No history file found. No problem
            pass
    else:
        input_getter = iter(input_list)

    ############# Parameter output ################
    # Display the important parameter settings
    print >> sys.stderr, "=== The Jazz Parser tagger ==="
    print >> sys.stderr, "Supertagger:         %s" % options.supertagger
    print >> sys.stderr, "Supertagger options: %s" % toptstr
Ejemplo n.º 4
0
         # Get the bulk input to supply names
         name_getter = iter(input_data.get_identifiers())
         num_inputs = len(input_data)
     else:
         # Otherwise, there's just one input
         input_list = [input_data]
         num_inputs = 1
         # Try getting a name for this
         if input_data.name is None:
             name = "unnamed"
         else:
             name = input_data.name
         name_getter = iter([name])
 
 if stdinput:
     input_getter = input_iterator(">> ")
     print "No input string given: accepting input from stdin. Hit Ctrl+d to exit"
     # Load the shell history if possible
     try:
         readline.read_history_file(settings.TAG_PROMPT_HISTORY_FILE)
     except IOError:
         # No history file found. No problem
         pass
 else:
     input_getter = iter(input_list)
 
 ############# Parameter output ################
 # Display the important parameter settings
 print >>sys.stderr, "=== The Jazz Parser tagger ==="
 print >>sys.stderr, "Supertagger:         %s" % options.supertagger
 print >>sys.stderr, "Supertagger options: %s" % toptstr
Ejemplo n.º 5
0
         for selector in options.input_index.split(","):
             if "-" in selector:
                 # This is a range
                 start,end = selector.split("-")
                 start,end = int(start),int(end)
                 indices.extend(range(start, end+1))
             else:
                 indices.append(int(selector))
     except ValueError:
         logger.error("Could not parse index values: %s" % options.input_index)
         return 1
     if len(indices):
         select_indices = indices
 
 if stdinput:
     input_getter = input_iterator("Enter chord sequence:\n# ")
     print "No input string given: accepting input from stdin. Hit Ctrl+d to exit"
     # Load the shell history if possible
     if readline_loaded:
         try:
             readline.read_history_file(settings.INPUT_PROMPT_HISTORY_FILE)
         except (IOError):
             # No history file found or readline not available. No problem
             pass
 else:
     input_getter = iter(input_list)
 
 if partitions > 1 and partition_numbers is None:
     # We can only partition certain types of input
     logger.error("Got partitions=%d, but can only partition bulk input "\
                 "data" % (partitions))