def test_detect_input_type(self): # Load some input: DbInput dbi = DbInput.from_file(DB_SEQUENCES_FILE, {'index': 0}) # Run it through the preprocessor datatype, obj = detect_input_type(dbi) # Get the datatype from the type name lists datatype2 = input_type_name(type(obj)) self.assertEqual(datatype, datatype2) # Do the same with ChordInput ci = ChordInput.from_file(CHORDS_FILE, options={'roman': True}) datatype, obj = detect_input_type(ci) datatype2 = input_type_name(type(obj)) self.assertEqual(datatype, datatype2) # Try some bulk input bulk = DbBulkInput.from_file(DB_SEQUENCES_FILE) datatype, obj = detect_input_type(bulk, allow_bulk=True) datatype2 = input_type_name(type(obj)) self.assertEqual(datatype, datatype2) # Try restricting the allowed type datatype, obj = detect_input_type(ci, allowed=['chords']) # And this one should get rejected self.assertRaises(InputTypeError, detect_input_type, (ci, ), {'allowed': 'db'})
def test_detect_input_type(self): # Load some input: DbInput dbi = DbInput.from_file(DB_SEQUENCES_FILE, {"index": 0}) # Run it through the preprocessor datatype, obj = detect_input_type(dbi) # Get the datatype from the type name lists datatype2 = input_type_name(type(obj)) self.assertEqual(datatype, datatype2) # Do the same with ChordInput ci = ChordInput.from_file(CHORDS_FILE, options={"roman": True}) datatype, obj = detect_input_type(ci) datatype2 = input_type_name(type(obj)) self.assertEqual(datatype, datatype2) # Try some bulk input bulk = DbBulkInput.from_file(DB_SEQUENCES_FILE) datatype, obj = detect_input_type(bulk, allow_bulk=True) datatype2 = input_type_name(type(obj)) self.assertEqual(datatype, datatype2) # Try restricting the allowed type datatype, obj = detect_input_type(ci, allowed=["chords"]) # And this one should get rejected self.assertRaises(InputTypeError, detect_input_type, (ci,), {"allowed": "db"})
def test_from_string(self): ci = ChordInput.from_string(CHORDS, roman=True)
def test_from_file(self): # Just load the file ci = ChordInput.from_file(CHORDS_FILE, options={"roman": True})
# Get a filename for a logger for this input if parse_logger_dir: parse_logger = os.path.join(parse_logger_dir, "%s.log" % \ slugify(input_identifier)) print >>sys.stderr, "Logging parser progress to %s" % parse_logger logger = create_logger(filename=parse_logger) else: logger = create_plain_stderr_logger() # Catch any errors and continue to the next input, instead of giving up try: if isinstance(input, str): input = input.rstrip("\n") if len(input) == 0: return input = ChordInput.from_string(input) logger.info("Tagging sequence (%d timesteps)" % len(input)) # Prepare a suitable tagger component tagger = tagger_cls(grammar, input, options=topts.copy(), logger=logger) except KeyboardInterrupt: print "Exiting on keyboard interrupt" break except: print "Error tagging %s" % input_identifier traceback.print_exc() print else: # Tagged successfully # Get tags from the tagger as the parser would
# Get a filename for a logger for this input if parse_logger_dir: parse_logger = os.path.join(parse_logger_dir, "%s.log" % \ slugify(input_identifier)) print >> sys.stderr, "Logging parser progress to %s" % parse_logger logger = create_logger(filename=parse_logger) else: logger = create_plain_stderr_logger() # Catch any errors and continue to the next input, instead of giving up try: if isinstance(input, str): input = input.rstrip("\n") if len(input) == 0: return input = ChordInput.from_string(input) logger.info("Tagging sequence (%d timesteps)" % len(input)) # Prepare a suitable tagger component tagger = tagger_cls(grammar, input, options=topts.copy(), logger=logger) except KeyboardInterrupt: print "Exiting on keyboard interrupt" break except: print "Error tagging %s" % input_identifier traceback.print_exc() print
def test_from_file(self): # Just load the file ci = ChordInput.from_file(CHORDS_FILE, options={'roman': True})
def do_parse(grammar, tagger_cls, parser_cls, input, topts, popts, backoff, npopts, options, identifier, multiprocessing=False, logfile=None, partition=None): """ Function called for each input to do tagging and parsing and return the results. It's a separate function so that we can hand it over to worker processes to do multiprocessing. @type logfile: str @param logfile: filename to send logging output to. If None, will log to stderr """ # If the input's a string, preprocess it if isinstance(input, str): input = input.rstrip("\n") if len(input) == 0: return input = ChordInput.from_string(input) print "Processing input: %s (%s)" % (input, identifier) if logfile is None: # Sending logging output to stderr logger = create_plain_stderr_logger() else: logger = create_logger(filename=logfile) print "Logging parser progress to %s" % logfile # Prepare an initial response # We'll fill in some values of this later response = { 'tagger' : None, 'parser' : None, 'input' : input, 'error' : None, 'messages' : [], 'time' : None, 'identifier' : identifier, 'results' : None, 'timed_out' : False, } tagger = None parser = None messages = [] if options.short_progress: # Only output the short form of the progress reports progress = 2 elif options.long_progress: progress = 1 else: progress = 0 # Start a timer now to time the parse timer = ExecutionTimer(clock=True) # Catch any errors and continue to the next input, instead of giving up try: ######### Do that parsing thang logger.info("Tagging sequence (%d timesteps)" % len(input)) # Prepare a suitable tagger component tagger = tagger_cls(grammar, input, options=topts.copy(), logger=logger) if not multiprocessing: response['tagger'] = tagger # Create a parser using this tagger parser = parser_cls(grammar, tagger, options=popts.copy(), backoff=backoff, backoff_options=npopts.copy(), logger=logger) if not multiprocessing: response['parser'] = parser try: # Parse to produce a list of results results = parser.parse(derivations=options.derivations, summaries=progress) except (KeyboardInterrupt, Exception), err: if multiprocessing: # Don't go interactive if we're in a subprocess # Instead, just return with an error response.update({ 'error' : exception_tuple(str_tb=True), }) return response else: # Drop into the shell if type(err) == KeyboardInterrupt: print "Dropping out on keyboard interrupt" print "Entering shell: use 'chart' command to see current state of parse" elif options.error_shell: print >> sys.stderr, "Error parsing %s" % str(input) print >> sys.stderr, "The error was:" traceback.print_exc(file=sys.stderr) # If we keyboard interrupted, always go into the shell, so # the user can see how far we got if options.error_shell or type(err) == KeyboardInterrupt: # Instead of exiting, enter the interactive shell print from jazzparser.shell import interactive_shell env = {} env.update(globals()) env.update(locals()) interactive_shell(parser.chart.parses,options,tagger,parser, grammar.formalism,env,input_data=input) return else: raise except (KeyboardInterrupt, Exception), err: if multiprocessing: response.update({ 'error' : exception_tuple(str_tb=True), }) return response else: if type(err) == KeyboardInterrupt: print "Exiting on keyboard interrupt" sys.exit(1) else: response.update({ 'error' : exception_tuple(str_tb=True), 'messages' : messages, 'time' : timer.get_time(), }) return response