def main(): input_files = glob.glob(PARSES_FILES) for file_results in input_files: # We read in the whole file (it's pickled, so we have to), but don't # keep the pres object after the loop iteration, because it can # be very big try: pres = ParseResults.from_file(file_results) except ParseResults.LoadError, err: if options.errors: # Print all load errors print >>sys.stderr, "Error loading file: %s" % (err) errors.append(file_results) continue # tracking progress print file_results # get gold semantics gold_result = pres.get_gold_semantics() # calcuate maximum index of parses if len(pres.semantics) > 0: max_index = max_parse(pres.semantics, gold_result) print "Total: ", len(pres.semantics) print "Max: " , max_index zi = pres.semantics[max_index] parse_result = ParseResults([zi], gold_result) parse_result.save(RESULT_FILES + os.path.basename(file_results))
def result_lengths(filename, grammar=None): """ Opens the parse results file and returns the lengths of the gold standard path and the top parse result's path. """ if grammar is None: grammar = get_grammar() # Load the data in from the file res = ParseResults.from_file(filename) gold_parse = res.get_gold_semantics() if gold_parse is None: gold_length = 0 else: # Measure the length of the gold standard gold_length = grammar.formalism.Evaluation.tonal_space_length(gold_parse) # Get the results in order of probability results = res.semantics if len(results) == 0: # No results: cannot analyse them return gold_length,0 top_result = results[0][1] top_length = grammar.formalism.Evaluation.tonal_space_length(top_result) return gold_length, top_length
def main(): usage = "%prog [options] <seq-file>" description = "Parses a sequence from a sequence index file using the "\ "annotations stored in the same file." parser = OptionParser(usage=usage, description=description) parser.add_option("-r", "--results", dest="results", action="store_true", help="output the results list") options, arguments = parser.parse_args() if len(arguments) < 1: print "Specify a parse results file" sys.exit(1) pres = ParseResults.from_file(arguments[0]) if hasattr(pres, "signs") and pres.signs: print "Results stored as signs" else: print "Results stored as logical forms only" if pres.gold_parse is None: print "No gold parse stored" else: print "Gold parse available" if pres.gold_sequence is None: print "No gold sequence stored" else: print "Gold sequence available" if options.results: print for i,(prob,res) in enumerate(pres.parses): print "Result %d, probability %s" % (i,prob) print res
def result_lengths(filename, grammar=None): """ Opens the parse results file and returns the lengths of the gold standard path and the top parse result's path. """ if grammar is None: grammar = get_grammar() # Load the data in from the file res = ParseResults.from_file(filename) gold_parse = res.get_gold_semantics() if gold_parse is None: gold_length = 0 else: # Measure the length of the gold standard gold_length = grammar.formalism.Evaluation.tonal_space_length( gold_parse) # Get the results in order of probability results = res.semantics if len(results) == 0: # No results: cannot analyse them return gold_length, 0 top_result = results[0][1] top_length = grammar.formalism.Evaluation.tonal_space_length(top_result) return gold_length, top_length
def main(): usage = "%prog [options] <results-files>" description = "Evaluates parse results stored in files by comparing "\ "them to the gold standard results stored with them, using any "\ "a variety of metrics." parser = OptionParser(usage=usage, description=description) parser.add_option("--errors", dest="errors", action="store_true", help="display errors reading in the files.") parser.add_option("--unscored", dest="unscored", action="store_true", help="output a list of files containing no results (i.e. no successful full parses) and exit") parser.add_option("-m", "--metric", dest="metric", action="store", help="semantics distance metric to use. Use '-m help' for a list of available metrics") parser.add_option("--mopt", "--metric-options", dest="mopts", action="append", help="options to pass to the semantics metric. Use with '--mopt help' with -m to see available options") parser.add_option("--mc", "--metric-computation", dest="print_computation", action="store_true", help="show the metric's computation trace for each input") options, arguments = parser.parse_args() if len(arguments) == 0: print >>sys.stderr, "Specify at least one file to read the results from" sys.exit(1) deprec_metric = command_line_metric(formalism, "deprec") deps_metric = command_line_metric(formalism, "deps") # Try loading all the input files preses = [] input_pairs = [] errors = [] covered = 0 input_filenames = [] for filename in arguments: try: pres = ParseResults.from_file(filename) except ParseResults.LoadError, err: if options.errors: # Print all load errors print >>sys.stderr, "Error loading file: %s" % (err) errors.append(filename) continue preses.append(pres) # Try to get a gold standard result gold_result = pres.get_gold_semantics() if gold_result is None: # Can't evaluate this: ignore it if options.unscored: print "No gold result for", filename continue # Get the top result's semantics if len(pres.semantics) == 0: # No results for this input_pairs.append((None, gold_result)) input_filenames.append(filename) continue top_result = pres.semantics[0][1] # Got a result and gold result for this covered += 1 input_pairs.append((top_result, gold_result)) input_filenames.append(filename)
def main(): usage = "%prog [options] <results-files>" description = "Evaluates parse results stored in files by comparing "\ "them to the gold standard results stored with them, using any "\ "a variety of metrics." parser = OptionParser(usage=usage, description=description) parser.add_option("--tabbed", dest="tabbed", action="store_true", help="output a tabbed table of values") options, arguments = parser.parse_args() if len(arguments) == 0: print >> sys.stderr, "Specify at least one file to read the results from" sys.exit(1) deprec_metric = command_line_metric(formalism, "deprec", options="output=f") deps_metric = command_line_metric(formalism, "deps", options="output=f") # Try loading all the input files input_pairs = [] errors = [] covered = 0 input_filenames = [] for filename in arguments: try: pres = ParseResults.from_file(filename) except ParseResults.LoadError, err: errors.append(filename) continue # Try to get a gold standard result gold_result = pres.get_gold_semantics() if gold_result is None: # Can't evaluate this: ignore it if options.unscored: print "No gold result for", filename continue # Get the top result's semantics if len(pres.semantics) == 0: # No results for this input_pairs.append((None, gold_result)) input_filenames.append(filename) continue top_result = pres.semantics[0][1] # Got a result and gold result for this covered += 1 input_pairs.append((top_result, gold_result)) input_filenames.append(filename)
def run(self, args, state): from .shell import ShellError from jazzparser.data.parsing import ParseResults # Load the file pres = ParseResults.from_file(args[0]) if not hasattr(pres, "signs") or not pres.signs: raise ShellError, "loaded parse results, but they're stored as "\ "logical forms, not signs, so we can't load them into the "\ "state" # Replace the results in the state state.results = [res for (prob,res) in pres.parses]
def run(self, args, state): from .shell import ShellError from jazzparser.data.parsing import ParseResults # Load the file pres = ParseResults.from_file(args[0]) if not hasattr(pres, "signs") or not pres.signs: raise ShellError, "loaded parse results, but they're stored as "\ "logical forms, not signs, so we can't load them into the "\ "state" # Replace the results in the state state.results = [res for (prob, res) in pres.parses]
def main(): usage = "%prog [options] <res-file1> [<res-file2> ...]" description = "Reads in a parse results file, drops all but the top n "\ "results and writes it out to another directory" parser = OptionParser(usage=usage, description=description) parser.add_option("-n", dest="n", action="store", type="int", help="number of results to keep. Default: 1", default=1) parser.add_option("-o", "--output-dir", dest="output_dir", action="store", help="directory to put the output files in. Default: same as inputs, with altered filenames") parser.add_option("-d", "--strip-derivations", dest="strip_derivations", action="store_true", help="remove derivation traces from the results") options, arguments = parser.parse_args() if len(arguments) < 1: print "Specify at least one parse results file" sys.exit(1) n = options.n if options.output_dir is not None: output_dir = os.path.abspath(options.output_dir) filename_suffix = "" else: output_dir = None filename_suffix = "-top-%d" % n print "Outputing to: %s\n" % output_dir for filename in arguments: # Run the garbage collector each time round to get rid of the old # objects. For some reason it doesn't get run often enough otherwise gc.collect() filebase = os.path.basename(filename) # Decide where the output's going for this file if output_dir is None: file_outdir = os.path.dirname(os.path.abspath(filename)) else: file_outdir = output_dir file_outname = os.path.join(file_outdir, "%s%s" % (filebase, filename_suffix)) print "Reading in: %s" % filebase # Read in the parse results file pres = ParseResults.from_file(filename) pres.parses = pres.parses[:n] if options.strip_derivations and pres.signs: # Remove derivation traces, if they were stored in the first place for prob,res in pres.parses: res.derivation_trace = None pres.save(file_outname) # Allow this to be garbage collected now pres = None
def main(): usage = "%prog [options] <results-files>" description = "Evaluates parse results stored in files by comparing "\ "them to the gold standard results stored with them, using any "\ "a variety of metrics." parser = OptionParser(usage=usage, description=description) parser.add_option("--tabbed", dest="tabbed", action="store_true", help="output a tabbed table of values") options, arguments = parser.parse_args() if len(arguments) == 0: print >>sys.stderr, "Specify at least one file to read the results from" sys.exit(1) deprec_metric = command_line_metric(formalism, "deprec", options="output=f") deps_metric = command_line_metric(formalism, "deps", options="output=f") # Try loading all the input files input_pairs = [] errors = [] covered = 0 input_filenames = [] for filename in arguments: try: pres = ParseResults.from_file(filename) except ParseResults.LoadError, err: errors.append(filename) continue # Try to get a gold standard result gold_result = pres.get_gold_semantics() if gold_result is None: # Can't evaluate this: ignore it if options.unscored: print "No gold result for", filename continue # Get the top result's semantics if len(pres.semantics) == 0: # No results for this input_pairs.append((None, gold_result)) input_filenames.append(filename) continue top_result = pres.semantics[0][1] # Got a result and gold result for this covered += 1 input_pairs.append((top_result, gold_result)) input_filenames.append(filename)
def reranking(input_files): # Initialization: v = 0 learning_rate = 0.2 v = get_features_vector() input_files = sorted(input_files) # Algorithm: # For t = 1..T, i = 1..n # zi = F(xi) # if (zi != yi) v = v + f(xi, yi) - f(xi, zi) T = 10 for t in range(T): print "========== Loop: %d ==========" % t for parses_result in input_files: # We read in the whole file (it's pickled, so we have to), but don't # keep the pres object after the loop iteration, because it can # be very big try: pres = ParseResults.from_file(parses_result) except ParseResults.LoadError, err: if options.errors: # Print all load errors print >>sys.stderr, "Error loading file: %s" % (err) errors.append(parses_result) continue # get gold semantics and gold dependency graph gold_result = pres.get_gold_semantics() gold_depend_graph = get_depend_graph(gold_result) # calcuate maximum index of parses if len(pres.semantics) > 0: max_index = max_parse(pres.semantics, v) zi = pres.semantics[max_index] # get maximum dependency graph zi_depend_graph = get_depend_graph(zi[1]) if tonal_space_distance(zi[1].lf, gold_result.lf) != 0: gold_features = get_features(gold_depend_graph) zi_features = get_features(zi_depend_graph) for k, val in zi_features.iteritems(): v[k] = v[k] + gold_features[k] - zi_features[k] print sum(v.values())
def main(): usage = "%prog [options] <results-files>" description = "Prints a dependency tree for a parse result" parser = OptionParser(usage=usage, description=description) parser.add_option("-t", "--times", dest="times", action="store_true", help="show timings of nodes") parser.add_option("-l", "--latex", dest="latex", action="store_true", help="output Latex for the graphs using tikz-dependency") parser.add_option("--la", "--latex-align", dest="latex_align", action="store_true", help="show node alignments in Latex output") parser.add_option( "--align-time", dest="align_time", action="store_true", help= "show the graph of common dependencies when the two graphs are aligned by node times" ) parser.add_option( "--align-max", dest="align_max", action="store_true", help= "show the graph of common dependencies when the two graphs are aligned to maximize the dependency recovery" ) options, arguments = parser.parse_args() if len(arguments) == 0: print >> sys.stderr, "Specify a file to read the results from" sys.exit(1) filename = arguments[0] try: pres = ParseResults.from_file(filename) except ParseResults.LoadError, err: print >> sys.stderr, "Error loading file: %s" % (err) sys.exit(1)
def get_top_result(filename): """ Loads a top parse result from a ParseResults file and the gold standard result. @note: effectively now moved to ParseResults.get_top_result(). This is just a wrapper for backward compatibility. @rtype: pair @return: gold standard result and top parser result """ # Load the data in from the file res = ParseResults.from_file(filename) top, gold = res.get_top_result() if top is not None: return top.lf, gold.lf else: return None, None
def get_top_result(filename): """ Loads a top parse result from a ParseResults file and the gold standard result. @note: effectively now moved to ParseResults.get_top_result(). This is just a wrapper for backward compatibility. @rtype: pair @return: gold standard result and top parser result """ # Load the data in from the file res = ParseResults.from_file(filename) top,gold = res.get_top_result() if top is not None: return top.lf, gold.lf else: return None,None
def main(): usage = "%prog [options] <results-files>" description = "Prints a dependency tree for a parse result" parser = OptionParser(usage=usage, description=description) parser.add_option("-t", "--times", dest="times", action="store_true", help="show timings of nodes") parser.add_option("-l", "--latex", dest="latex", action="store_true", help="output Latex for the graphs using tikz-dependency") parser.add_option("--la", "--latex-align", dest="latex_align", action="store_true", help="show node alignments in Latex output") parser.add_option("--align-time", dest="align_time", action="store_true", help="show the graph of common dependencies when the two graphs are aligned by node times") parser.add_option("--align-max", dest="align_max", action="store_true", help="show the graph of common dependencies when the two graphs are aligned to maximize the dependency recovery") options, arguments = parser.parse_args() if len(arguments) == 0: print >>sys.stderr, "Specify a file to read the results from" sys.exit(1) filename = arguments[0] try: pres = ParseResults.from_file(filename) except ParseResults.LoadError, err: print >>sys.stderr, "Error loading file: %s" % (err) sys.exit(1)
def main(): usage = "%prog [options] <results-files>" description = "Prints a dependency tree for a parse result" parser = OptionParser(usage=usage, description=description) parser.add_option("-t", "--times", dest="times", action="store_true", help="show timings of nodes") parser.add_option("-l", "--latex", dest="latex", action="store_true", help="output Latex for the graphs using tikz-dependency") parser.add_option("--la", "--latex-align", dest="latex_align", action="store_true", help="show node alignments in Latex output") parser.add_option("--align-time", dest="align_time", action="store_true", help="show the graph of common dependencies when the two graphs are aligned by node times") parser.add_option("--align-max", dest="align_max", action="store_true", help="show the graph of common dependencies when the two graphs are aligned to maximize the dependency recovery") options, arguments = parser.parse_args() if len(arguments) == 0: print >>sys.stderr, "Specify a file to read the results from" sys.exit(1) filename = arguments[0] # Swith PCCG/St+PCCG PARSER = "PCCG" FEATURE_PARAMS = "../xuanhong/params_2_pcfg.txt" if filename.find("stpcfg") != -1: PARSER = "St+PCCG" FEATURE_PARAMS = "../xuanhong/params_2_stpcfg.txt" # Input sequence list_songs = read_list_songs("../xuanhong/list_songs.txt") song_name = os.path.basename(filename) seqs = SequenceIndex.from_file(settings.SEQUENCE_DATA) seq = seqs.sequences[list_songs[song_name]] input_sequence = DbInput.from_sequence(seq) try: pres = ParseResults.from_file(filename) except ParseResults.LoadError, err: print >>sys.stderr, "Error loading file: %s" % (err) sys.exit(1)
def main(): usage = "%prog [options] <res-file1> [<res-file2> ...]" description = "Reads in a parse results file, drops all but the top n "\ "results and writes it out to another directory" parser = OptionParser(usage=usage, description=description) parser.add_option("-n", dest="n", action="store", type="int", help="number of results to keep. Default: 1", default=1) parser.add_option( "-o", "--output-dir", dest="output_dir", action="store", help= "directory to put the output files in. Default: same as inputs, with altered filenames" ) parser.add_option("-d", "--strip-derivations", dest="strip_derivations", action="store_true", help="remove derivation traces from the results") options, arguments = parser.parse_args() if len(arguments) < 1: print "Specify at least one parse results file" sys.exit(1) n = options.n if options.output_dir is not None: output_dir = os.path.abspath(options.output_dir) filename_suffix = "" else: output_dir = None filename_suffix = "-top-%d" % n print "Outputing to: %s\n" % output_dir for filename in arguments: # Run the garbage collector each time round to get rid of the old # objects. For some reason it doesn't get run often enough otherwise gc.collect() filebase = os.path.basename(filename) # Decide where the output's going for this file if output_dir is None: file_outdir = os.path.dirname(os.path.abspath(filename)) else: file_outdir = output_dir file_outname = os.path.join(file_outdir, "%s%s" % (filebase, filename_suffix)) print "Reading in: %s" % filebase # Read in the parse results file pres = ParseResults.from_file(filename) pres.parses = pres.parses[:n] if options.strip_derivations and pres.signs: # Remove derivation traces, if they were stored in the first place for prob, res in pres.parses: res.derivation_trace = None pres.save(file_outname) # Allow this to be garbage collected now pres = None
def main(): usage = "%prog [options] <results-file> [<result-number>=0]" parser = OptionParser(usage=usage) parser.add_option( "-q", "--quiet", dest="quiet", action="store_true", help="only output the requested information, no meta-info.") parser.add_option("-p", "--print", dest="printout", action="store_true", help="output the result to stdout.") parser.add_option("--path", dest="path", action="store_true", help="display the fully-specified tonal space path.") parser.add_option( "--play", dest="play", action="store_true", help= "use the harmonical to play the root sequence of the result's semantics." ) parser.add_option( "--audio", dest="audio", action="store", help= "use the harmonical to render the root sequence, as with --play, and store the result to a wave file." ) options, arguments = parser.parse_args() # Just get the default formalism formalism = get_default_formalism() def _print(string=""): if not options.quiet: print >> sys.stderr, string if len(arguments) == 0: print >> sys.stderr, "Specify a file to read the results from" sys.exit(1) results = ParseResults.from_file(arguments[0]) if len(arguments) > 1: res_num = int(arguments[1]) else: res_num = 0 prob, result = results.sorted_results[res_num] if options.printout: _print("Result:") # Just display the resulting category print result _print() if options.path: _print("Tonal space path:") # Compute the tonal path (coordinates) from the result path = formalism.semantics_to_coordinates(result.semantics) points, timings = zip(*path) print ", ".join(coordinates_to_roman_names(points)) _print() if options.play or options.audio is not None: _print("Building pitch structure from result...") # Convert the semantics into a list of TS points path = formalism.semantics_to_coordinates(result.semantics) # Decide on chord types # For now, since we don't know the original chords, use dom7 # for dom chords, maj for subdoms, and M7 for tonics fun_chords = { 'T': 'M7', 'D': '7', 'S': '', } functions = formalism.semantics_to_functions(result.semantics) chord_types = [(fun_chords[f], t) for (f, t) in functions] tones = path_to_tones(path, chord_types=chord_types, double_root=True) _print("Rendering audio samples...") samples = tones.render() if options.audio is not None: filename = os.path.abspath(options.audio) _print("Writing wave data to %s" % filename) save_wave_data(samples, filename) if options.play: _print("Playing...") play_audio(samples, wait_for_end=True) _print()
def main(): usage = "%prog [options] <results-files>" description = "Evaluates parse results stored in files by comparing "\ "them to the gold standard results stored with them, using any "\ "a variety of metrics." parser = OptionParser(usage=usage, description=description) parser.add_option("--errors", dest="errors", action="store_true", help="display errors reading in the files.") parser.add_option("--unscored", dest="unscored", action="store_true", help="output a list of files containing no results (i.e. no successful full parses) and exit") parser.add_option("--timeout", dest="timeout", action="store_true", help="output a list of parses that timed out") parser.add_option("-m", "--metric", dest="metric", action="store", help="semantics distance metric to use. Use '-m help' for a list of available metrics") parser.add_option("--mopt", "--metric-options", dest="mopts", action="append", help="options to pass to the semantics metric. Use with '--mopt help' with -m to see available options") parser.add_option("--mc", "--metric-computation", dest="print_computation", action="store_true", help="show the metric's computation trace for each input") parser.add_option("-f", "--f-score", dest="f_score", action="store_true", help="outputs recall, precision and f-score for an f-score-based metric. Just uses the same metric 3 times with output=recall, etc. Will only work with appropriate metrics") parser.add_option("-q", "--quiet", dest="quiet", action="store_true", help="just output the numbers, nothing else") parser.add_option("-t", "--time", dest="time", action="store_true", help="output average parse time. This is output by default, but hidden in quiet mode unless this switch is used") options, arguments = parser.parse_args() if options.f_score: # Special case: get 3 metrics metrics = [] opts = options.mopts or [] for opt in [ "output=precision", "output=recall" ]: metrics.append(command_line_metric(formalism, options.metric, opts+[opt])) if not options.quiet: print "Evaluating precision, recall and f-score on %s" % metrics[0].name else: # Get a metric according to the options metrics = [command_line_metric(formalism, options.metric, options.mopts)] if not options.quiet: print "Evaluating using metric: %s" % metrics[0].name if len(arguments) == 0: print >>sys.stderr, "Specify at least one file to read the results from" sys.exit(1) # Try loading all the input files input_pairs = [] errors = [] covered = 0 input_filenames = [] times = [] timed_out = 0 for filename in arguments: # We read in the whole file (it's pickled, so we have to), but don't # keep the pres object after the loop iteration, because it can # be very big try: pres = ParseResults.from_file(filename) except ParseResults.LoadError, err: if options.errors: # Print all load errors print >>sys.stderr, "Error loading file: %s" % (err) errors.append(filename) continue if options.timeout and pres.timed_out: print "Timed out: %s" % filename if pres.timed_out: timed_out += 1 # Try to get a gold standard result gold_result = pres.get_gold_semantics() if gold_result is None: # Can't evaluate this: ignore it if not options.quiet: print "No gold result for", filename continue # Get the top result's semantics if len(pres.semantics) == 0: # No results for this input_pairs.append((None, gold_result)) input_filenames.append(filename) if options.unscored: print "No results: %s" % filename continue top_result = pres.semantics[0][1] # Got a result and gold result for this covered += 1 input_pairs.append((top_result, gold_result)) input_filenames.append(filename) # Check this for compat with old stored results if hasattr(pres, 'cpu_time'): times.append(pres.cpu_time)
def main(): usage = "%prog [options] <results-file> [<result-number>=0]" parser = OptionParser(usage=usage) parser.add_option("-q", "--quiet", dest="quiet", action="store_true", help="only output the requested information, no meta-info.") parser.add_option("-p", "--print", dest="printout", action="store_true", help="output the result to stdout.") parser.add_option("--path", dest="path", action="store_true", help="display the fully-specified tonal space path.") parser.add_option("--play", dest="play", action="store_true", help="use the harmonical to play the root sequence of the result's semantics.") parser.add_option("--audio", dest="audio", action="store", help="use the harmonical to render the root sequence, as with --play, and store the result to a wave file.") options, arguments = parser.parse_args() # Just get the default formalism formalism = get_default_formalism() def _print(string=""): if not options.quiet: print >>sys.stderr, string if len(arguments) == 0: print >>sys.stderr, "Specify a file to read the results from" sys.exit(1) results = ParseResults.from_file(arguments[0]) if len(arguments) > 1: res_num = int(arguments[1]) else: res_num = 0 prob,result = results.sorted_results[res_num] if options.printout: _print("Result:") # Just display the resulting category print result _print() if options.path: _print("Tonal space path:") # Compute the tonal path (coordinates) from the result path = formalism.semantics_to_coordinates(result.semantics) points,timings = zip(*path) print ", ".join(coordinates_to_roman_names(points)) _print() if options.play or options.audio is not None: _print("Building pitch structure from result...") # Convert the semantics into a list of TS points path = formalism.semantics_to_coordinates(result.semantics) # Decide on chord types # For now, since we don't know the original chords, use dom7 # for dom chords, maj for subdoms, and M7 for tonics fun_chords = { 'T' : 'M7', 'D' : '7', 'S' : '', } functions = formalism.semantics_to_functions(result.semantics) chord_types = [(fun_chords[f],t) for (f,t) in functions] tones = path_to_tones(path, chord_types=chord_types, double_root=True) _print("Rendering audio samples...") samples = tones.render() if options.audio is not None: filename = os.path.abspath(options.audio) _print("Writing wave data to %s" % filename) save_wave_data(samples, filename) if options.play: _print("Playing...") play_audio(samples, wait_for_end=True) _print()
corpus = TonalSpaceAnalysisSet.load(corpus_name) # The rest of the args are result files to analyze res_files = arguments[1:] # Work out how many results to print out if options.print_results == -1: print_up_to = None else: print_up_to = options.print_results ranks = [] num_ranked = 0 for filename in res_files: # Load the parse results pres = ParseResults.from_file(filename) if options.gold_only and pres.gold_sequence is None: # Skip this sequence altogether if requested continue print "######################" print "Read %s" % filename # Try to get a correct answer from the PR file if pres.gold_sequence is None: print "No correct answer specified in input file" correct_song = None else: # Process the name of the sequence in the same way that # TonalSpaceAnalysisSet does # Ideally, they should make a common function call, but let's be # bad for once
def main(): features = {} input_files = glob.glob(PARSES_FILES) for file_results in input_files: # We read in the whole file (it's pickled, so we have to), but don't # keep the pres object after the loop iteration, because it can # be very big try: pres = ParseResults.from_file(file_results) except ParseResults.LoadError, err: if options.errors: # Print all load errors print >>sys.stderr, "Error loading file: %s" % (err) errors.append(file_results) continue print file_results if len(pres.semantics) == 0: continue top_result = pres.semantics[0][1] gold_result = pres.get_gold_semantics() # 'coord', 'xycoord', 'alpha' or 'roman' grammar = get_grammar() grammar.formalism.cl_output_options("tsformat=coord") coords = zip(*grammar.formalism.semantics_to_coordinates(gold_result))[0] funs = zip(*grammar.formalism.semantics_to_functions(gold_result))[0] gold_seq = zip(coords, funs) tags = [] for g in gold_seq: t = "%s,%s" % (coordinate_to_roman_name(g[0]), g[1]) tags.append(t) gold_graph,gold_time_map = semantics_to_dependency_graph(gold_result) depend_graph = eval("%s" % gold_graph.get_graph_pos(tags)) gold_graph = eval("%s" % gold_graph.get_graph_index()) # Words for g in gold_graph: word1 = g[0].split(",") uni_word = "UNIGRAM:"+str(word1[0]) if uni_word not in features: features[uni_word] = 0 else: features[uni_word] += 1 for dep in depend_graph: word1 = dep[0].split(",") uni_word = "UNIGRAM:"+str(word1[0]) if uni_word not in features: features[uni_word] = 0 else: features[uni_word] += 1 # Tags for dep in depend_graph: word1 = dep[0].split(",") uni_tag = "UNIGRAM:"+str(word1[1]) if uni_tag not in features: features[uni_tag] = 0 else: features[uni_tag] += 1 # Bigram Words for g in gold_graph: word1 = g[0].split(",") if g[1] == "ROOT": bigram_word = "BIGRAM:"+str(word1[0])+":ROOT" else: word2 = g[1].split(",") bigram_word = "BIGRAM:"+str(word1[0])+":"+str(word2[0]) if bigram_word not in features: features[bigram_word] = 0 else: features[bigram_word] += 1 for dep in depend_graph: word1 = dep[0].split(",") if dep[1] == "ROOT": bigram_word = "BIGRAM:"+str(word1[0])+":ROOT" else: word2 = dep[1].split(",") bigram_word = "BIGRAM:"+str(word1[0])+":"+str(word2[0]) if bigram_word not in features: features[bigram_word] = 0 else: features[bigram_word] += 1 # Bigram Tags for dep in depend_graph: word1 = dep[0].split(",") if dep[1] == "ROOT": bigram_tag = "BIGRAM:"+str(word1[1])+":ROOT" else: word2 = dep[1].split(",") bigram_tag = "BIGRAM:"+str(word1[1])+":"+str(word2[1]) if bigram_tag not in features: features[bigram_tag] = 0 else: features[bigram_tag] += 1 # Bigram Words/Tags for dep in depend_graph: word1 = dep[0].split(",") if dep[1] == "ROOT": bigram_words_tags = "BIGRAM:"+str(word1[0])+":"+str(word1[1])+":ROOT" else: word2 = dep[1].split(",") bigram_words_tags = "BIGRAM:"+str(word1[0])+":"+str(word1[1])+":"+str(word2[0])+":"+str(word2[1]) if bigram_words_tags not in features: features[bigram_words_tags] = 0 else: features[bigram_words_tags] += 1 # Trigram words for i in range(len(gold_graph)): if gold_graph[i][1] == "ROOT": # Get trigram if gold_graph[i-1][1] != "ROOT" and gold_graph[i-2][1] != "ROOT": head_root_word = gold_graph[i][0].split(",")[0] head_i1_word = gold_graph[i-1][0].split(",")[0] head_i2_word = gold_graph[i-2][0].split(",")[0] trigram_word = "TRIGRAM:" + head_root_word + ":" + head_i1_word + ":" + head_i2_word if trigram_word not in features: features[trigram_word] = 0 else: features[trigram_word] += 1 for i in range(len(depend_graph)): if depend_graph[i][1] == "ROOT": # Get trigram if depend_graph[i-1][1] != "ROOT" and depend_graph[i-2][1] != "ROOT": head_root_word = depend_graph[i][0].split(",")[0] head_i1_word = depend_graph[i-1][0].split(",")[0] head_i2_word = depend_graph[i-2][0].split(",")[0] trigram_word = "TRIGRAM:" + head_root_word + ":" + head_i1_word + ":" + head_i2_word if trigram_word not in features: features[trigram_word] = 0 else: features[trigram_word] += 1 # Trigram tags for i in range(len(depend_graph)): if depend_graph[i][1] == "ROOT": # Get trigram if depend_graph[i-1][1] != "ROOT" and depend_graph[i-2][1] != "ROOT": head_root_tag = depend_graph[i][0].split(",")[1] head_i1_tag = depend_graph[i-1][0].split(",")[1] head_i2_tag = depend_graph[i-2][0].split(",")[1] trigram_tag = "TRIGRAM:" + head_root_tag + ":" + head_i1_tag + ":" + head_i2_tag if trigram_tag not in features: features[trigram_tag] = 0 else: features[trigram_tag] += 1 # Trigram words/tags for i in range(len(depend_graph)): if depend_graph[i][1] == "ROOT": # Get trigram if depend_graph[i-1][1] != "ROOT" and depend_graph[i-2][1] != "ROOT": head_root = depend_graph[i][0].split(",") head_root_word_tag = head_root[0] + ":" + head_root[1] # words/tags head_i1 = depend_graph[i-1][0].split(",") head_i2 = depend_graph[i-2][0].split(",") head_i1_word_tag = head_i1[0] + ":" + head_i1[1] head_i2_word_tag = head_i2[0] + ":" + head_i2[1] trigram_word_tag = "TRIGRAM:" + head_root_word_tag + ":" + head_i1_word_tag + ":" + head_i2_word_tag if trigram_word_tag not in features: features[trigram_word_tag] = 0 else: features[trigram_word_tag] += 1
def _result_callback(response): if response is None: # Empty input, or the subprocess doesn't want us to do anything return else: # Mark this input as completed global completed_parses completed_parses[response['identifier']] = True if response['results'] is None: # There was some error: check what it was error = response['error'] print >> sys.stderr, "Error parsing %s" % str(response['input']) print >> sys.stderr, "The error was:" print >>sys.stderr, error[2] global parse_exit_status parse_exit_status = 1 else: # Keep this together with all the other processes' responses all_results.append(response) print "Parsed: %s" % response['input'] # Run any cleanup routines that the formalism defines grammar.formalism.clean_results(response['results']) # Remove complex results if atomic-only option has been set if options.atoms_only: response['results'] = remove_complex_categories(response['results'], grammar.formalism) if not options.no_results: print "Results:" list_results(response['results']) if output_dir is not None: # Try getting a gold standard analysis if one has been # associated with the input gold = response['input'].get_gold_analysis() # Get the results with their probabilities top_results = [(getattr(res, 'probability', None), res) \ for res in response['results']] if options.topn is not None: # Limit the results that get stored top_results = list(reversed(sorted( top_results)))[:options.topn] # Output the results to a file presults = ParseResults( top_results, signs=True, gold_parse=gold, timed_out=response['timed_out'], cpu_time=response['time']) filename = get_output_filename(response['identifier']) presults.save(filename) print "Parse results output to %s" % filename if time_parse: print "Parse took %f seconds" % response['time'] if options.lh_analysis: print >>sys.stderr, "\nLonguet-Higgins tonal space analysis for each result:" # Output the tonal space path for each result for i,result in enumerate(response['results']): path = grammar.formalism.sign_to_coordinates(result) coords,times = zip(*path) print "%d> %s" % (i, ", ".join( ["%s@%s" % (crd,time) for (crd,time) in zip(coordinates_to_roman_names(coords),times)])) if options.lh_coord: print >>sys.stderr, "\nLonguet-Higgins tonal space coordinates for each result:" # Output the tonal space path for each result for i,result in enumerate(response['results']): path = grammar.formalism.sign_to_coordinates(result) print "%d> %s" % (i, ", ".join(["(%d,%d)@%s" % (x,y,t) for ((x,y),t) in path])) # Print out any messages the parse routine sent to us for message in response['messages']: print message # Print as summary of what we've completed num_completed = len(filter(lambda x:x[1], completed_parses.items())) if not stdinput: if not options.no_progress: print format_table([ [str(ident), "Complete" if completed_parses[ident] else ""] for ident in sorted(completed_parses.keys())]) if num_inputs is None: print "\nCompleted %d parses" % num_completed else: print "\nCompleted %d/%d parses" % (num_completed, num_inputs) # Enter interactive mode now if requested in options # Don't do this is we're in a process pool if not multiprocessing and options.interactive: print from jazzparser.shell import interactive_shell env = {} env.update(globals()) env.update(locals()) interactive_shell(response['results'], options, response['tagger'], response['parser'], grammar.formalism, env, input_data=response['input']) print # Flush the output to make sure everything gets out before we start the next one sys.stderr.flush() sys.stdout.flush()
def _load_res(filename): try: return ParseResults.from_file(filename) except ParseResults.LoadError, err: if not options.quiet: print >>sys.stderr, "Error loading file %s: %s" % (filename, err)
continue else: print >>sys.stderr, "Restricting to input %d" % input_index if options.only_load: # Just output that we'd process this input, but don't do anything print "Input %d: %s" % (input_index,input_identifier) continue if options.skip_done: # Skip any inputs for which a readable output file already exists outfile = get_output_filename(input_identifier) if os.path.exists(outfile): # Try loading the output file try: old_res = ParseResults.from_file(outfile) except ParseResults.LoadError, err: pass else: # File loaded ok: don't process this input # Mark it as complete completed_parses[input_identifier] = True continue # Mark this as incomplete completed_parses[input_identifier] = False # Get a filename for a logger for this input if parse_logger_dir: parse_logger = os.path.join(parse_logger_dir, "%s.log" % \ slugify(input_identifier))