Esempio n. 1
0
def do_parse(grammar, tagger_cls, parser_cls, input, topts, popts, backoff, 
        npopts, options, identifier, multiprocessing=False, 
        logfile=None, partition=None):
    """
    Function called for each input to do tagging and parsing and return the 
    results. It's a separate function so that we can hand it over to worker 
    processes to do multiprocessing.
    
    @type logfile: str
    @param logfile: filename to send logging output to. If None, will log 
        to stderr
    
    """
    # If the input's a string, preprocess it
    if isinstance(input, str):
        input = input.rstrip("\n")
        if len(input) == 0:
            return
        input = ChordInput.from_string(input)
    
    print "Processing input: %s (%s)" % (input, identifier)
        
    if logfile is None:
        # Sending logging output to stderr
        logger = create_plain_stderr_logger()
    else:
        logger = create_logger(filename=logfile)
        print "Logging parser progress to %s" % logfile
    
    # Prepare an initial response
    # We'll fill in some values of this later
    response = {
        'tagger' : None,
        'parser' : None,
        'input' : input,
        'error' : None,
        'messages' : [],
        'time' : None,
        'identifier' : identifier,
        'results' : None,
        'timed_out' : False,
    }
    tagger = None
    parser = None
    messages = []
    
    if options.short_progress:
        # Only output the short form of the progress reports
        progress = 2
    elif options.long_progress:
        progress = 1
    else:
        progress = 0
    
    # Start a timer now to time the parse
    timer = ExecutionTimer(clock=True)
    
    # Catch any errors and continue to the next input, instead of giving up
    try:
        ######### Do that parsing thang
        logger.info("Tagging sequence (%d timesteps)" % len(input))
        
        # Prepare a suitable tagger component
        tagger = tagger_cls(grammar, input, options=topts.copy(), logger=logger)
        if not multiprocessing:
            response['tagger'] = tagger
        
        # Create a parser using this tagger
        parser = parser_cls(grammar, tagger, options=popts.copy(), 
                                backoff=backoff, 
                                backoff_options=npopts.copy(),
                                logger=logger)
        if not multiprocessing:
            response['parser'] = parser
        try:
            # Parse to produce a list of results
            results = parser.parse(derivations=options.derivations, summaries=progress)
        except (KeyboardInterrupt, Exception), err:
            if multiprocessing:
                # Don't go interactive if we're in a subprocess
                # Instead, just return with an error
                response.update({
                    'error' : exception_tuple(str_tb=True),
                })
                return response
            else:
                # Drop into the shell
                if type(err) == KeyboardInterrupt:
                    print "Dropping out on keyboard interrupt"
                    print "Entering shell: use 'chart' command to see current state of parse"
                elif options.error_shell:
                    print >> sys.stderr, "Error parsing %s" % str(input)
                    print >> sys.stderr, "The error was:"
                    traceback.print_exc(file=sys.stderr)
                # If we keyboard interrupted, always go into the shell, so 
                #  the user can see how far we got
                if options.error_shell or type(err) == KeyboardInterrupt:
                    # Instead of exiting, enter the interactive shell
                    print 
                    from jazzparser.shell import interactive_shell
                    env = {}
                    env.update(globals())
                    env.update(locals())
                    interactive_shell(parser.chart.parses,options,tagger,parser,
                                grammar.formalism,env,input_data=input)
                    return
                else:
                    raise
    except (KeyboardInterrupt, Exception), err:
        if multiprocessing:
            response.update({
                'error' : exception_tuple(str_tb=True),
            })
            return response
        else:
            if type(err) == KeyboardInterrupt:
                print "Exiting on keyboard interrupt"
                sys.exit(1)
            else:
                response.update({
                    'error' : exception_tuple(str_tb=True),
                    'messages' : messages,
                    'time' : timer.get_time(),
                })
                return response
Esempio n. 2
0
 def _result_callback(response):
     if response is None:
         # Empty input, or the subprocess doesn't want us to do anything
         return
     else:
         # Mark this input as completed
         global completed_parses
         completed_parses[response['identifier']] = True
         
         if response['results'] is None:
             # There was some error: check what it was
             error = response['error']
             print >> sys.stderr, "Error parsing %s" % str(response['input'])
             print >> sys.stderr, "The error was:"
             print >>sys.stderr, error[2]
             global parse_exit_status
             parse_exit_status = 1
         else:
             # Keep this together with all the other processes' responses
             all_results.append(response)
             print "Parsed: %s" % response['input']
             
             # Run any cleanup routines that the formalism defines
             grammar.formalism.clean_results(response['results'])
             
             # Remove complex results if atomic-only option has been set
             if options.atoms_only:
                 response['results'] = remove_complex_categories(response['results'], grammar.formalism)
             
             if not options.no_results:
                 print "Results:"
                 list_results(response['results'])
             
             if output_dir is not None:
                 # Try getting a gold standard analysis if one has been 
                 #  associated with the input
                 gold = response['input'].get_gold_analysis()
                 
                 # Get the results with their probabilities
                 top_results = [(getattr(res, 'probability', None), res) \
                                     for res in response['results']]
                 if options.topn is not None:
                     # Limit the results that get stored
                     top_results = list(reversed(sorted(
                                             top_results)))[:options.topn]
                 # Output the results to a file
                 presults = ParseResults(
                                 top_results, 
                                 signs=True,
                                 gold_parse=gold,
                                 timed_out=response['timed_out'],
                                 cpu_time=response['time'])
                 filename = get_output_filename(response['identifier'])
                 presults.save(filename)
                 print "Parse results output to %s" % filename
             
             if time_parse:
                 print "Parse took %f seconds" % response['time']
                 
             if options.lh_analysis:
                 print >>sys.stderr, "\nLonguet-Higgins tonal space analysis for each result:"
                 # Output the tonal space path for each result
                 for i,result in enumerate(response['results']):
                     path = grammar.formalism.sign_to_coordinates(result)
                     coords,times = zip(*path)
                     print "%d> %s" % (i, ", ".join(
                         ["%s@%s" % (crd,time) for (crd,time) in 
                                 zip(coordinates_to_roman_names(coords),times)]))
                     
             if options.lh_coord:
                 print >>sys.stderr, "\nLonguet-Higgins tonal space coordinates for each result:"
                 # Output the tonal space path for each result
                 for i,result in enumerate(response['results']):
                     path = grammar.formalism.sign_to_coordinates(result)
                     print "%d> %s" % (i, ", ".join(["(%d,%d)@%s" % (x,y,t) for ((x,y),t) in path]))
             
             # Print out any messages the parse routine sent to us
             for message in response['messages']:
                 print message
                 
             # Print as summary of what we've completed
             num_completed = len(filter(lambda x:x[1], completed_parses.items()))
             if not stdinput:
                 if not options.no_progress:
                     print format_table([
                             [str(ident), 
                              "Complete" if completed_parses[ident] else ""]
                                 for ident in sorted(completed_parses.keys())])
                 if num_inputs is None:
                     print "\nCompleted %d parses" % num_completed
                 else:
                     print "\nCompleted %d/%d parses" % (num_completed, num_inputs)
                 
             # Enter interactive mode now if requested in options
             # Don't do this is we're in a process pool
             if not multiprocessing and options.interactive:
                 print 
                 from jazzparser.shell import interactive_shell
                 env = {}
                 env.update(globals())
                 env.update(locals())
                 interactive_shell(response['results'],
                                   options,
                                   response['tagger'], 
                                   response['parser'],
                                   grammar.formalism,
                                   env,
                                   input_data=response['input'])
             print
             # Flush the output to make sure everything gets out before we start the next one
             sys.stderr.flush()
             sys.stdout.flush()