def main():
    input_files = glob.glob(PARSES_FILES)
    for file_results in input_files:
        # We read in the whole file (it's pickled, so we have to), but don't 
        #  keep the pres object after the loop iteration, because it can 
        #  be very big
        try:
            pres = ParseResults.from_file(file_results)
        except ParseResults.LoadError, err:
            if options.errors:
                # Print all load errors
                print >>sys.stderr, "Error loading file: %s" % (err)
            errors.append(file_results)
            continue

        # tracking progress
        print file_results 

        # get gold semantics
        gold_result = pres.get_gold_semantics()

        # calcuate maximum index of parses
        if len(pres.semantics) > 0:
            max_index = max_parse(pres.semantics, gold_result)

            print "Total: ", len(pres.semantics)
            print "Max: " , max_index
            zi = pres.semantics[max_index]

            parse_result = ParseResults([zi], gold_result)
            parse_result.save(RESULT_FILES + os.path.basename(file_results))
예제 #2
0
def result_lengths(filename, grammar=None):
    """
    Opens the parse results file and returns the lengths of the gold standard 
    path and the top parse result's path.
    
    """
    if grammar is None:
        grammar = get_grammar()
    # Load the data in from the file
    res = ParseResults.from_file(filename)
    
    gold_parse = res.get_gold_semantics()
    if gold_parse is None:
        gold_length = 0
    else:
        # Measure the length of the gold standard
        gold_length = grammar.formalism.Evaluation.tonal_space_length(gold_parse)
    
    # Get the results in order of probability
    results = res.semantics
    if len(results) == 0:
        # No results: cannot analyse them
        return gold_length,0
    top_result = results[0][1]
    top_length = grammar.formalism.Evaluation.tonal_space_length(top_result)
        
    return gold_length, top_length
예제 #3
0
def main():
    usage = "%prog [options] <seq-file>"
    description = "Parses a sequence from a sequence index file using the "\
        "annotations stored in the same file."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("-r", "--results", dest="results", action="store_true", help="output the results list")
    options, arguments = parser.parse_args()
    
    if len(arguments) < 1:
        print "Specify a parse results file"
        sys.exit(1)
    
    pres = ParseResults.from_file(arguments[0])
    
    if hasattr(pres, "signs") and pres.signs:
        print "Results stored as signs"
    else:
        print "Results stored as logical forms only"
        
    if pres.gold_parse is None:
        print "No gold parse stored"
    else:
        print "Gold parse available"
    
    if pres.gold_sequence is None:
        print "No gold sequence stored"
    else:
        print "Gold sequence available"
    
    if options.results:
        print
        for i,(prob,res) in enumerate(pres.parses):
            print "Result %d, probability %s" % (i,prob)
            print res
예제 #4
0
def result_lengths(filename, grammar=None):
    """
    Opens the parse results file and returns the lengths of the gold standard 
    path and the top parse result's path.
    
    """
    if grammar is None:
        grammar = get_grammar()
    # Load the data in from the file
    res = ParseResults.from_file(filename)

    gold_parse = res.get_gold_semantics()
    if gold_parse is None:
        gold_length = 0
    else:
        # Measure the length of the gold standard
        gold_length = grammar.formalism.Evaluation.tonal_space_length(
            gold_parse)

    # Get the results in order of probability
    results = res.semantics
    if len(results) == 0:
        # No results: cannot analyse them
        return gold_length, 0
    top_result = results[0][1]
    top_length = grammar.formalism.Evaluation.tonal_space_length(top_result)

    return gold_length, top_length
예제 #5
0
def main():
    usage = "%prog [options] <results-files>"
    description = "Evaluates parse results stored in files by comparing "\
        "them to the gold standard results stored with them, using any "\
        "a variety of metrics."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("--errors", dest="errors", action="store_true", help="display errors reading in the files.")
    parser.add_option("--unscored", dest="unscored", action="store_true", help="output a list of files containing no results (i.e. no successful full parses) and exit")
    parser.add_option("-m", "--metric", dest="metric", action="store", help="semantics distance metric to use. Use '-m help' for a list of available metrics")
    parser.add_option("--mopt", "--metric-options", dest="mopts", action="append", help="options to pass to the semantics metric. Use with '--mopt help' with -m to see available options")
    parser.add_option("--mc", "--metric-computation", dest="print_computation", action="store_true", help="show the metric's computation trace for each input")
    options, arguments = parser.parse_args()
        
    if len(arguments) == 0:
        print >>sys.stderr, "Specify at least one file to read the results from"
        sys.exit(1)
    
    deprec_metric = command_line_metric(formalism, "deprec")
    deps_metric = command_line_metric(formalism, "deps")
    
    # Try loading all the input files
    preses = []
    input_pairs = []
    errors = []
    covered = 0
    input_filenames = []
    for filename in arguments:
        try:
            pres = ParseResults.from_file(filename)
        except ParseResults.LoadError, err:
            if options.errors:
                # Print all load errors
                print >>sys.stderr, "Error loading file: %s" % (err)
            errors.append(filename)
            continue
        preses.append(pres)
        
        # Try to get a gold standard result
        gold_result = pres.get_gold_semantics()
        if gold_result is None:
            # Can't evaluate this: ignore it
            if options.unscored:
                print "No gold result for", filename
            continue
        
        # Get the top result's semantics
        if len(pres.semantics) == 0:
            # No results for this
            input_pairs.append((None, gold_result))
            input_filenames.append(filename)
            continue
        top_result = pres.semantics[0][1]
        
        # Got a result and gold result for this
        covered += 1
        input_pairs.append((top_result, gold_result))
        input_filenames.append(filename)
예제 #6
0
def main():
    usage = "%prog [options] <results-files>"
    description = "Evaluates parse results stored in files by comparing "\
        "them to the gold standard results stored with them, using any "\
        "a variety of metrics."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("--tabbed",
                      dest="tabbed",
                      action="store_true",
                      help="output a tabbed table of values")
    options, arguments = parser.parse_args()

    if len(arguments) == 0:
        print >> sys.stderr, "Specify at least one file to read the results from"
        sys.exit(1)

    deprec_metric = command_line_metric(formalism,
                                        "deprec",
                                        options="output=f")
    deps_metric = command_line_metric(formalism, "deps", options="output=f")

    # Try loading all the input files
    input_pairs = []
    errors = []
    covered = 0
    input_filenames = []
    for filename in arguments:
        try:
            pres = ParseResults.from_file(filename)
        except ParseResults.LoadError, err:
            errors.append(filename)
            continue

        # Try to get a gold standard result
        gold_result = pres.get_gold_semantics()
        if gold_result is None:
            # Can't evaluate this: ignore it
            if options.unscored:
                print "No gold result for", filename
            continue

        # Get the top result's semantics
        if len(pres.semantics) == 0:
            # No results for this
            input_pairs.append((None, gold_result))
            input_filenames.append(filename)
            continue
        top_result = pres.semantics[0][1]

        # Got a result and gold result for this
        covered += 1
        input_pairs.append((top_result, gold_result))
        input_filenames.append(filename)
예제 #7
0
 def run(self, args, state):
     from .shell import ShellError
     from jazzparser.data.parsing import ParseResults
     
     # Load the file
     pres = ParseResults.from_file(args[0])
     
     if not hasattr(pres, "signs") or not pres.signs:
         raise ShellError, "loaded parse results, but they're stored as "\
             "logical forms, not signs, so we can't load them into the "\
             "state"
     # Replace the results in the state
     state.results = [res for (prob,res) in pres.parses]
예제 #8
0
    def run(self, args, state):
        from .shell import ShellError
        from jazzparser.data.parsing import ParseResults

        # Load the file
        pres = ParseResults.from_file(args[0])

        if not hasattr(pres, "signs") or not pres.signs:
            raise ShellError, "loaded parse results, but they're stored as "\
                "logical forms, not signs, so we can't load them into the "\
                "state"
        # Replace the results in the state
        state.results = [res for (prob, res) in pres.parses]
예제 #9
0
def main():
    usage = "%prog [options] <res-file1> [<res-file2> ...]"
    description = "Reads in a parse results file, drops all but the top n "\
        "results and writes it out to another directory"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("-n", dest="n", action="store", type="int", help="number of results to keep. Default: 1", default=1)
    parser.add_option("-o", "--output-dir", dest="output_dir", action="store", help="directory to put the output files in. Default: same as inputs, with altered filenames")
    parser.add_option("-d", "--strip-derivations", dest="strip_derivations", action="store_true", help="remove derivation traces from the results")
    options, arguments = parser.parse_args()
    
    if len(arguments) < 1:
        print "Specify at least one parse results file"
        sys.exit(1)
    
    n = options.n
    
    if options.output_dir is not None:
        output_dir = os.path.abspath(options.output_dir)
        filename_suffix = ""
    else:
        output_dir = None
        filename_suffix = "-top-%d" % n
    print "Outputing to: %s\n" % output_dir
    
    for filename in arguments:
        # Run the garbage collector each time round to get rid of the old 
        #  objects. For some reason it doesn't get run often enough otherwise
        gc.collect()
        
        filebase = os.path.basename(filename)
        # Decide where the output's going for this file
        if output_dir is None:
            file_outdir = os.path.dirname(os.path.abspath(filename))
        else:
            file_outdir = output_dir
        file_outname = os.path.join(file_outdir, 
                                    "%s%s" % (filebase, filename_suffix))
        
        print "Reading in: %s" % filebase
        # Read in the parse results file
        pres = ParseResults.from_file(filename)
        pres.parses = pres.parses[:n]
        if options.strip_derivations and pres.signs:
            # Remove derivation traces, if they were stored in the first place
            for prob,res in pres.parses:
                res.derivation_trace = None
        pres.save(file_outname)
        # Allow this to be garbage collected now
        pres = None
예제 #10
0
def main():
    usage = "%prog [options] <results-files>"
    description = "Evaluates parse results stored in files by comparing "\
        "them to the gold standard results stored with them, using any "\
        "a variety of metrics."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("--tabbed", dest="tabbed", action="store_true", help="output a tabbed table of values")
    options, arguments = parser.parse_args()
        
    if len(arguments) == 0:
        print >>sys.stderr, "Specify at least one file to read the results from"
        sys.exit(1)
    
    deprec_metric = command_line_metric(formalism, "deprec", options="output=f")
    deps_metric = command_line_metric(formalism, "deps", options="output=f")
    
    # Try loading all the input files
    input_pairs = []
    errors = []
    covered = 0
    input_filenames = []
    for filename in arguments:
        try:
            pres = ParseResults.from_file(filename)
        except ParseResults.LoadError, err:
            errors.append(filename)
            continue
        
        # Try to get a gold standard result
        gold_result = pres.get_gold_semantics()
        if gold_result is None:
            # Can't evaluate this: ignore it
            if options.unscored:
                print "No gold result for", filename
            continue
        
        # Get the top result's semantics
        if len(pres.semantics) == 0:
            # No results for this
            input_pairs.append((None, gold_result))
            input_filenames.append(filename)
            continue
        top_result = pres.semantics[0][1]
        
        # Got a result and gold result for this
        covered += 1
        input_pairs.append((top_result, gold_result))
        input_filenames.append(filename)
def reranking(input_files):
	
	# Initialization: v = 0
	learning_rate = 0.2
	v = get_features_vector()
	input_files = sorted(input_files)
	# Algorithm:
	# For t = 1..T, i = 1..n
	#	zi = F(xi)
	#	if (zi != yi) v = v + f(xi, yi) - f(xi, zi)
	T = 10
	for t in range(T):
		print "========== Loop: %d ==========" % t
		for parses_result in input_files:
			# We read in the whole file (it's pickled, so we have to), but don't 
			#  keep the pres object after the loop iteration, because it can 
			#  be very big
			try:
				pres = ParseResults.from_file(parses_result)
			except ParseResults.LoadError, err:
				if options.errors:
					# Print all load errors
					print >>sys.stderr, "Error loading file: %s" % (err)
				errors.append(parses_result)
				continue

			# get gold semantics and gold dependency graph
			gold_result = pres.get_gold_semantics()
			gold_depend_graph = get_depend_graph(gold_result)			

			# calcuate maximum index of parses
			if len(pres.semantics) > 0:
				max_index = max_parse(pres.semantics, v)
				zi = pres.semantics[max_index]

				# get maximum dependency graph			
				zi_depend_graph = get_depend_graph(zi[1])
				
				if tonal_space_distance(zi[1].lf, gold_result.lf) != 0:
					gold_features = get_features(gold_depend_graph)
					zi_features = get_features(zi_depend_graph)
					for k, val in zi_features.iteritems():
						v[k] = v[k] + gold_features[k] - zi_features[k]

				print sum(v.values())
예제 #12
0
def main():
    usage = "%prog [options] <results-files>"
    description = "Prints a dependency tree for a parse result"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("-t",
                      "--times",
                      dest="times",
                      action="store_true",
                      help="show timings of nodes")
    parser.add_option("-l",
                      "--latex",
                      dest="latex",
                      action="store_true",
                      help="output Latex for the graphs using tikz-dependency")
    parser.add_option("--la",
                      "--latex-align",
                      dest="latex_align",
                      action="store_true",
                      help="show node alignments in Latex output")
    parser.add_option(
        "--align-time",
        dest="align_time",
        action="store_true",
        help=
        "show the graph of common dependencies when the two graphs are aligned by node times"
    )
    parser.add_option(
        "--align-max",
        dest="align_max",
        action="store_true",
        help=
        "show the graph of common dependencies when the two graphs are aligned to maximize the dependency recovery"
    )
    options, arguments = parser.parse_args()

    if len(arguments) == 0:
        print >> sys.stderr, "Specify a file to read the results from"
        sys.exit(1)
    filename = arguments[0]

    try:
        pres = ParseResults.from_file(filename)
    except ParseResults.LoadError, err:
        print >> sys.stderr, "Error loading file: %s" % (err)
        sys.exit(1)
예제 #13
0
def get_top_result(filename):
    """
    Loads a top parse result from a ParseResults file and the gold standard 
    result.
    
    @note: effectively now moved to ParseResults.get_top_result(). 
        This is just a wrapper for backward compatibility.
    
    @rtype: pair
    @return: gold standard result and top parser result
    
    """
    # Load the data in from the file
    res = ParseResults.from_file(filename)
    top, gold = res.get_top_result()
    if top is not None:
        return top.lf, gold.lf
    else:
        return None, None
예제 #14
0
def get_top_result(filename):
    """
    Loads a top parse result from a ParseResults file and the gold standard 
    result.
    
    @note: effectively now moved to ParseResults.get_top_result(). 
        This is just a wrapper for backward compatibility.
    
    @rtype: pair
    @return: gold standard result and top parser result
    
    """
    # Load the data in from the file
    res = ParseResults.from_file(filename)
    top,gold = res.get_top_result()
    if top is not None:
        return top.lf, gold.lf
    else:
        return None,None
예제 #15
0
def main():
    usage = "%prog [options] <results-files>"
    description = "Prints a dependency tree for a parse result"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("-t", "--times", dest="times", action="store_true", help="show timings of nodes")
    parser.add_option("-l", "--latex", dest="latex", action="store_true", help="output Latex for the graphs using tikz-dependency")
    parser.add_option("--la", "--latex-align", dest="latex_align", action="store_true", help="show node alignments in Latex output")
    parser.add_option("--align-time", dest="align_time", action="store_true", help="show the graph of common dependencies when the two graphs are aligned by node times")
    parser.add_option("--align-max", dest="align_max", action="store_true", help="show the graph of common dependencies when the two graphs are aligned to maximize the dependency recovery")
    options, arguments = parser.parse_args()
        
    if len(arguments) == 0:
        print >>sys.stderr, "Specify a file to read the results from"
        sys.exit(1)
    filename = arguments[0]

    try:
        pres = ParseResults.from_file(filename)
    except ParseResults.LoadError, err:
        print >>sys.stderr, "Error loading file: %s" % (err)
        sys.exit(1)
예제 #16
0
def main():    

    usage = "%prog [options] <results-files>"
    description = "Prints a dependency tree for a parse result"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("-t", "--times", dest="times", action="store_true", help="show timings of nodes")
    parser.add_option("-l", "--latex", dest="latex", action="store_true", help="output Latex for the graphs using tikz-dependency")
    parser.add_option("--la", "--latex-align", dest="latex_align", action="store_true", help="show node alignments in Latex output")
    parser.add_option("--align-time", dest="align_time", action="store_true", help="show the graph of common dependencies when the two graphs are aligned by node times")
    parser.add_option("--align-max", dest="align_max", action="store_true", help="show the graph of common dependencies when the two graphs are aligned to maximize the dependency recovery")
    options, arguments = parser.parse_args()
        
    
    if len(arguments) == 0:
        print >>sys.stderr, "Specify a file to read the results from"
        sys.exit(1)
    filename = arguments[0]

    # Swith PCCG/St+PCCG
    PARSER = "PCCG"
    FEATURE_PARAMS = "../xuanhong/params_2_pcfg.txt"
    if filename.find("stpcfg") != -1:
        PARSER = "St+PCCG"
        FEATURE_PARAMS = "../xuanhong/params_2_stpcfg.txt"        


    # Input sequence
    list_songs = read_list_songs("../xuanhong/list_songs.txt")
    song_name = os.path.basename(filename)
    seqs = SequenceIndex.from_file(settings.SEQUENCE_DATA)
    seq = seqs.sequences[list_songs[song_name]]
    input_sequence = DbInput.from_sequence(seq)
    
    try:
        pres = ParseResults.from_file(filename)
    except ParseResults.LoadError, err:
        print >>sys.stderr, "Error loading file: %s" % (err)
        sys.exit(1)
예제 #17
0
def main():
    usage = "%prog [options] <res-file1> [<res-file2> ...]"
    description = "Reads in a parse results file, drops all but the top n "\
        "results and writes it out to another directory"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("-n",
                      dest="n",
                      action="store",
                      type="int",
                      help="number of results to keep. Default: 1",
                      default=1)
    parser.add_option(
        "-o",
        "--output-dir",
        dest="output_dir",
        action="store",
        help=
        "directory to put the output files in. Default: same as inputs, with altered filenames"
    )
    parser.add_option("-d",
                      "--strip-derivations",
                      dest="strip_derivations",
                      action="store_true",
                      help="remove derivation traces from the results")
    options, arguments = parser.parse_args()

    if len(arguments) < 1:
        print "Specify at least one parse results file"
        sys.exit(1)

    n = options.n

    if options.output_dir is not None:
        output_dir = os.path.abspath(options.output_dir)
        filename_suffix = ""
    else:
        output_dir = None
        filename_suffix = "-top-%d" % n
    print "Outputing to: %s\n" % output_dir

    for filename in arguments:
        # Run the garbage collector each time round to get rid of the old
        #  objects. For some reason it doesn't get run often enough otherwise
        gc.collect()

        filebase = os.path.basename(filename)
        # Decide where the output's going for this file
        if output_dir is None:
            file_outdir = os.path.dirname(os.path.abspath(filename))
        else:
            file_outdir = output_dir
        file_outname = os.path.join(file_outdir,
                                    "%s%s" % (filebase, filename_suffix))

        print "Reading in: %s" % filebase
        # Read in the parse results file
        pres = ParseResults.from_file(filename)
        pres.parses = pres.parses[:n]
        if options.strip_derivations and pres.signs:
            # Remove derivation traces, if they were stored in the first place
            for prob, res in pres.parses:
                res.derivation_trace = None
        pres.save(file_outname)
        # Allow this to be garbage collected now
        pres = None
예제 #18
0
def main():
    usage = "%prog [options] <results-file> [<result-number>=0]"
    parser = OptionParser(usage=usage)
    parser.add_option(
        "-q",
        "--quiet",
        dest="quiet",
        action="store_true",
        help="only output the requested information, no meta-info.")
    parser.add_option("-p",
                      "--print",
                      dest="printout",
                      action="store_true",
                      help="output the result to stdout.")
    parser.add_option("--path",
                      dest="path",
                      action="store_true",
                      help="display the fully-specified tonal space path.")
    parser.add_option(
        "--play",
        dest="play",
        action="store_true",
        help=
        "use the harmonical to play the root sequence of the result's semantics."
    )
    parser.add_option(
        "--audio",
        dest="audio",
        action="store",
        help=
        "use the harmonical to render the root sequence, as with --play, and store the result to a wave file."
    )
    options, arguments = parser.parse_args()

    # Just get the default formalism
    formalism = get_default_formalism()

    def _print(string=""):
        if not options.quiet:
            print >> sys.stderr, string

    if len(arguments) == 0:
        print >> sys.stderr, "Specify a file to read the results from"
        sys.exit(1)
    results = ParseResults.from_file(arguments[0])

    if len(arguments) > 1:
        res_num = int(arguments[1])
    else:
        res_num = 0
    prob, result = results.sorted_results[res_num]

    if options.printout:
        _print("Result:")
        # Just display the resulting category
        print result
        _print()

    if options.path:
        _print("Tonal space path:")
        # Compute the tonal path (coordinates) from the result
        path = formalism.semantics_to_coordinates(result.semantics)
        points, timings = zip(*path)
        print ", ".join(coordinates_to_roman_names(points))
        _print()

    if options.play or options.audio is not None:
        _print("Building pitch structure from result...")
        # Convert the semantics into a list of TS points
        path = formalism.semantics_to_coordinates(result.semantics)
        # Decide on chord types
        # For now, since we don't know the original chords, use dom7
        #  for dom chords, maj for subdoms, and M7 for tonics
        fun_chords = {
            'T': 'M7',
            'D': '7',
            'S': '',
        }
        functions = formalism.semantics_to_functions(result.semantics)
        chord_types = [(fun_chords[f], t) for (f, t) in functions]

        tones = path_to_tones(path, chord_types=chord_types, double_root=True)

        _print("Rendering audio samples...")
        samples = tones.render()
        if options.audio is not None:
            filename = os.path.abspath(options.audio)
            _print("Writing wave data to %s" % filename)
            save_wave_data(samples, filename)
        if options.play:
            _print("Playing...")
            play_audio(samples, wait_for_end=True)
        _print()
예제 #19
0
def main():
    usage = "%prog [options] <results-files>"
    description = "Evaluates parse results stored in files by comparing "\
        "them to the gold standard results stored with them, using any "\
        "a variety of metrics."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("--errors", dest="errors", action="store_true", help="display errors reading in the files.")
    parser.add_option("--unscored", dest="unscored", action="store_true", help="output a list of files containing no results (i.e. no successful full parses) and exit")
    parser.add_option("--timeout", dest="timeout", action="store_true", help="output a list of parses that timed out")
    parser.add_option("-m", "--metric", dest="metric", action="store", help="semantics distance metric to use. Use '-m help' for a list of available metrics")
    parser.add_option("--mopt", "--metric-options", dest="mopts", action="append", help="options to pass to the semantics metric. Use with '--mopt help' with -m to see available options")
    parser.add_option("--mc", "--metric-computation", dest="print_computation", action="store_true", help="show the metric's computation trace for each input")
    parser.add_option("-f", "--f-score", dest="f_score", action="store_true", help="outputs recall, precision and f-score for an f-score-based metric. Just uses the same metric 3 times with output=recall, etc. Will only work with appropriate metrics")
    parser.add_option("-q", "--quiet", dest="quiet", action="store_true", help="just output the numbers, nothing else")
    parser.add_option("-t", "--time", dest="time", action="store_true", help="output average parse time. This is output by default, but hidden in quiet mode unless this switch is used")
    options, arguments = parser.parse_args()
        
    if options.f_score:
        # Special case: get 3 metrics
        metrics = []
        opts = options.mopts or []
        for opt in [ "output=precision", "output=recall" ]:
            metrics.append(command_line_metric(formalism, options.metric, 
                                                                opts+[opt]))
        if not options.quiet:
            print "Evaluating precision, recall and f-score on %s" % metrics[0].name
    else:
        # Get a metric according to the options
        metrics = [command_line_metric(formalism, options.metric, options.mopts)]
        if not options.quiet:
            print "Evaluating using metric: %s" % metrics[0].name
    
    if len(arguments) == 0:
        print >>sys.stderr, "Specify at least one file to read the results from"
        sys.exit(1)
    
    # Try loading all the input files
    input_pairs = []
    errors = []
    covered = 0
    input_filenames = []
    times = []
    timed_out = 0
    for filename in arguments:
        # We read in the whole file (it's pickled, so we have to), but don't 
        #  keep the pres object after the loop iteration, because it can 
        #  be very big
        try:
            pres = ParseResults.from_file(filename)
        except ParseResults.LoadError, err:
            if options.errors:
                # Print all load errors
                print >>sys.stderr, "Error loading file: %s" % (err)
            errors.append(filename)
            continue
        
        if options.timeout and pres.timed_out:
            print "Timed out: %s" % filename
        if pres.timed_out:
            timed_out += 1
        
        # Try to get a gold standard result
        gold_result = pres.get_gold_semantics()
        if gold_result is None:
            # Can't evaluate this: ignore it
            if not options.quiet:
                print "No gold result for", filename
            continue
        
        # Get the top result's semantics
        if len(pres.semantics) == 0:
            # No results for this
            input_pairs.append((None, gold_result))
            input_filenames.append(filename)
            if options.unscored:
                print "No results: %s" % filename
            continue
        top_result = pres.semantics[0][1]
        
        # Got a result and gold result for this
        covered += 1
        input_pairs.append((top_result, gold_result))
        input_filenames.append(filename)
        # Check this for compat with old stored results
        if hasattr(pres, 'cpu_time'):
            times.append(pres.cpu_time)
예제 #20
0
def main():
    usage = "%prog [options] <results-file> [<result-number>=0]"
    parser = OptionParser(usage=usage)
    parser.add_option("-q", "--quiet", dest="quiet", action="store_true", help="only output the requested information, no meta-info.")
    parser.add_option("-p", "--print", dest="printout", action="store_true", help="output the result to stdout.")
    parser.add_option("--path", dest="path", action="store_true", help="display the fully-specified tonal space path.")
    parser.add_option("--play", dest="play", action="store_true", help="use the harmonical to play the root sequence of the result's semantics.")
    parser.add_option("--audio", dest="audio", action="store", help="use the harmonical to render the root sequence, as with --play, and store the result to a wave file.")
    options, arguments = parser.parse_args()
    
    # Just get the default formalism
    formalism = get_default_formalism()
    
    def _print(string=""):
        if not options.quiet:
            print >>sys.stderr, string
        
    if len(arguments) == 0:
        print >>sys.stderr, "Specify a file to read the results from"
        sys.exit(1)
    results = ParseResults.from_file(arguments[0])
    
    if len(arguments) > 1:
        res_num = int(arguments[1])
    else:
        res_num = 0
    prob,result = results.sorted_results[res_num]
    
    if options.printout:
        _print("Result:")
        # Just display the resulting category
        print result
        _print()
        
    if options.path:
        _print("Tonal space path:")
        # Compute the tonal path (coordinates) from the result
        path = formalism.semantics_to_coordinates(result.semantics)
        points,timings = zip(*path)
        print ", ".join(coordinates_to_roman_names(points))
        _print()
        
    if options.play or options.audio is not None:
        _print("Building pitch structure from result...")
        # Convert the semantics into a list of TS points
        path = formalism.semantics_to_coordinates(result.semantics)
        # Decide on chord types
        # For now, since we don't know the original chords, use dom7 
        #  for dom chords, maj for subdoms, and M7 for tonics
        fun_chords = {
            'T' : 'M7',
            'D' : '7',
            'S' : '',
        }
        functions = formalism.semantics_to_functions(result.semantics)
        chord_types = [(fun_chords[f],t) for (f,t) in functions]
        
        tones = path_to_tones(path, chord_types=chord_types, double_root=True)
        
        _print("Rendering audio samples...")
        samples = tones.render()
        if options.audio is not None:
            filename = os.path.abspath(options.audio)
            _print("Writing wave data to %s" % filename)
            save_wave_data(samples, filename)
        if options.play:
            _print("Playing...")
            play_audio(samples, wait_for_end=True)
        _print()
예제 #21
0
    corpus = TonalSpaceAnalysisSet.load(corpus_name)

    # The rest of the args are result files to analyze
    res_files = arguments[1:]

    # Work out how many results to print out
    if options.print_results == -1:
        print_up_to = None
    else:
        print_up_to = options.print_results

    ranks = []
    num_ranked = 0
    for filename in res_files:
        # Load the parse results
        pres = ParseResults.from_file(filename)
        if options.gold_only and pres.gold_sequence is None:
            # Skip this sequence altogether if requested
            continue
        print "######################"
        print "Read %s" % filename

        # Try to get a correct answer from the PR file
        if pres.gold_sequence is None:
            print "No correct answer specified in input file"
            correct_song = None
        else:
            # Process the name of the sequence in the same way that
            #  TonalSpaceAnalysisSet does
            # Ideally, they should make a common function call, but let's be
            #  bad for once
def main():	

	features = {}
	input_files = glob.glob(PARSES_FILES)
	
	for file_results in input_files:
		# We read in the whole file (it's pickled, so we have to), but don't 
		#  keep the pres object after the loop iteration, because it can 
		#  be very big
		try:
			pres = ParseResults.from_file(file_results)
		except ParseResults.LoadError, err:
			if options.errors:
				# Print all load errors
				print >>sys.stderr, "Error loading file: %s" % (err)
			errors.append(file_results)
			continue

		print file_results
		if len(pres.semantics) == 0:
			continue
			
		top_result = pres.semantics[0][1]
		gold_result = pres.get_gold_semantics()

		# 'coord', 'xycoord', 'alpha' or 'roman'
		grammar = get_grammar()
		grammar.formalism.cl_output_options("tsformat=coord")	
		coords = zip(*grammar.formalism.semantics_to_coordinates(gold_result))[0]
		funs = zip(*grammar.formalism.semantics_to_functions(gold_result))[0]
		gold_seq = zip(coords, funs)

		tags = []
		for g in gold_seq:
			t = "%s,%s" % (coordinate_to_roman_name(g[0]), g[1])
			tags.append(t)

		gold_graph,gold_time_map = semantics_to_dependency_graph(gold_result)	
		depend_graph = eval("%s" % gold_graph.get_graph_pos(tags))
		gold_graph = eval("%s" % gold_graph.get_graph_index())

		# Words
		for g in gold_graph:
			word1 = g[0].split(",")
			uni_word = "UNIGRAM:"+str(word1[0])
			if uni_word not in features:
				features[uni_word] = 0
			else:
				features[uni_word] += 1

		for dep in depend_graph:
			word1 = dep[0].split(",")
			uni_word = "UNIGRAM:"+str(word1[0])
			if uni_word not in features:
				features[uni_word] = 0
			else:
				features[uni_word] += 1

		# Tags
		for dep in depend_graph:
			word1 = dep[0].split(",")
			uni_tag = "UNIGRAM:"+str(word1[1])
			if uni_tag not in features:
				features[uni_tag] = 0
			else:
				features[uni_tag] += 1

		# Bigram Words
		for g in gold_graph:
			word1 = g[0].split(",")
			if g[1] == "ROOT":
				bigram_word = "BIGRAM:"+str(word1[0])+":ROOT"
			else:
				word2 = g[1].split(",")
				bigram_word = "BIGRAM:"+str(word1[0])+":"+str(word2[0])
			if bigram_word not in features:
				features[bigram_word] = 0	
			else:
				features[bigram_word] += 1

		for dep in depend_graph:
			word1 = dep[0].split(",")
			if dep[1] == "ROOT":
				bigram_word = "BIGRAM:"+str(word1[0])+":ROOT"
			else:
				word2 = dep[1].split(",")
				bigram_word = "BIGRAM:"+str(word1[0])+":"+str(word2[0])
			if bigram_word not in features:
				features[bigram_word] = 0	
			else:
				features[bigram_word] += 1

		# Bigram Tags
		for dep in depend_graph:
			word1 = dep[0].split(",")
			if dep[1] == "ROOT":
				bigram_tag = "BIGRAM:"+str(word1[1])+":ROOT"
			else:
				word2 = dep[1].split(",")
				bigram_tag = "BIGRAM:"+str(word1[1])+":"+str(word2[1])
			if bigram_tag not in features:
				features[bigram_tag] = 0			
			else:
				features[bigram_tag] += 1

		# Bigram Words/Tags
		for dep in depend_graph:
			word1 = dep[0].split(",")
			if dep[1] == "ROOT":
				bigram_words_tags = "BIGRAM:"+str(word1[0])+":"+str(word1[1])+":ROOT"
			else:
				word2 = dep[1].split(",")
				bigram_words_tags = "BIGRAM:"+str(word1[0])+":"+str(word1[1])+":"+str(word2[0])+":"+str(word2[1])
			if bigram_words_tags not in features:
				features[bigram_words_tags] = 0
			else:
				features[bigram_words_tags] += 1

		# Trigram words
		for i in range(len(gold_graph)):
			if gold_graph[i][1] == "ROOT":
				# Get trigram
				if gold_graph[i-1][1] != "ROOT" and gold_graph[i-2][1] != "ROOT":
					head_root_word = gold_graph[i][0].split(",")[0]
					head_i1_word = gold_graph[i-1][0].split(",")[0]
					head_i2_word = gold_graph[i-2][0].split(",")[0]
					trigram_word = "TRIGRAM:" + head_root_word + ":" + head_i1_word + ":" + head_i2_word
					if trigram_word not in features:
						features[trigram_word] = 0
					else:
						features[trigram_word] += 1

		for i in range(len(depend_graph)):
			if depend_graph[i][1] == "ROOT":
				# Get trigram
				if depend_graph[i-1][1] != "ROOT" and depend_graph[i-2][1] != "ROOT":
					head_root_word = depend_graph[i][0].split(",")[0]
					head_i1_word = depend_graph[i-1][0].split(",")[0]
					head_i2_word = depend_graph[i-2][0].split(",")[0]
					trigram_word = "TRIGRAM:" + head_root_word + ":" + head_i1_word + ":" + head_i2_word
					if trigram_word not in features:
						features[trigram_word] = 0
					else:
						features[trigram_word] += 1

		# Trigram tags
		for i in range(len(depend_graph)):
			if depend_graph[i][1] == "ROOT":
				# Get trigram
				if depend_graph[i-1][1] != "ROOT" and depend_graph[i-2][1] != "ROOT":
					head_root_tag = depend_graph[i][0].split(",")[1]
					head_i1_tag = depend_graph[i-1][0].split(",")[1]
					head_i2_tag = depend_graph[i-2][0].split(",")[1]
					trigram_tag = "TRIGRAM:" + head_root_tag + ":" + head_i1_tag + ":" + head_i2_tag
					if trigram_tag not in features:
						features[trigram_tag] = 0
					else:
						features[trigram_tag] += 1

		# Trigram words/tags
		for i in range(len(depend_graph)):
			if depend_graph[i][1] == "ROOT":
				# Get trigram
				if depend_graph[i-1][1] != "ROOT" and depend_graph[i-2][1] != "ROOT":
					head_root = depend_graph[i][0].split(",")
					head_root_word_tag = head_root[0] + ":" + head_root[1]
					# words/tags
					head_i1 = depend_graph[i-1][0].split(",")
					head_i2 = depend_graph[i-2][0].split(",")
					head_i1_word_tag = head_i1[0] + ":" + head_i1[1]
					head_i2_word_tag = head_i2[0] + ":" + head_i2[1]

					trigram_word_tag = "TRIGRAM:" + head_root_word_tag + ":" + head_i1_word_tag + ":" + head_i2_word_tag
					if trigram_word_tag not in features:
						features[trigram_word_tag] = 0
					else:
						features[trigram_word_tag] += 1
예제 #23
0
 def _result_callback(response):
     if response is None:
         # Empty input, or the subprocess doesn't want us to do anything
         return
     else:
         # Mark this input as completed
         global completed_parses
         completed_parses[response['identifier']] = True
         
         if response['results'] is None:
             # There was some error: check what it was
             error = response['error']
             print >> sys.stderr, "Error parsing %s" % str(response['input'])
             print >> sys.stderr, "The error was:"
             print >>sys.stderr, error[2]
             global parse_exit_status
             parse_exit_status = 1
         else:
             # Keep this together with all the other processes' responses
             all_results.append(response)
             print "Parsed: %s" % response['input']
             
             # Run any cleanup routines that the formalism defines
             grammar.formalism.clean_results(response['results'])
             
             # Remove complex results if atomic-only option has been set
             if options.atoms_only:
                 response['results'] = remove_complex_categories(response['results'], grammar.formalism)
             
             if not options.no_results:
                 print "Results:"
                 list_results(response['results'])
             
             if output_dir is not None:
                 # Try getting a gold standard analysis if one has been 
                 #  associated with the input
                 gold = response['input'].get_gold_analysis()
                 
                 # Get the results with their probabilities
                 top_results = [(getattr(res, 'probability', None), res) \
                                     for res in response['results']]
                 if options.topn is not None:
                     # Limit the results that get stored
                     top_results = list(reversed(sorted(
                                             top_results)))[:options.topn]
                 # Output the results to a file
                 presults = ParseResults(
                                 top_results, 
                                 signs=True,
                                 gold_parse=gold,
                                 timed_out=response['timed_out'],
                                 cpu_time=response['time'])
                 filename = get_output_filename(response['identifier'])
                 presults.save(filename)
                 print "Parse results output to %s" % filename
             
             if time_parse:
                 print "Parse took %f seconds" % response['time']
                 
             if options.lh_analysis:
                 print >>sys.stderr, "\nLonguet-Higgins tonal space analysis for each result:"
                 # Output the tonal space path for each result
                 for i,result in enumerate(response['results']):
                     path = grammar.formalism.sign_to_coordinates(result)
                     coords,times = zip(*path)
                     print "%d> %s" % (i, ", ".join(
                         ["%s@%s" % (crd,time) for (crd,time) in 
                                 zip(coordinates_to_roman_names(coords),times)]))
                     
             if options.lh_coord:
                 print >>sys.stderr, "\nLonguet-Higgins tonal space coordinates for each result:"
                 # Output the tonal space path for each result
                 for i,result in enumerate(response['results']):
                     path = grammar.formalism.sign_to_coordinates(result)
                     print "%d> %s" % (i, ", ".join(["(%d,%d)@%s" % (x,y,t) for ((x,y),t) in path]))
             
             # Print out any messages the parse routine sent to us
             for message in response['messages']:
                 print message
                 
             # Print as summary of what we've completed
             num_completed = len(filter(lambda x:x[1], completed_parses.items()))
             if not stdinput:
                 if not options.no_progress:
                     print format_table([
                             [str(ident), 
                              "Complete" if completed_parses[ident] else ""]
                                 for ident in sorted(completed_parses.keys())])
                 if num_inputs is None:
                     print "\nCompleted %d parses" % num_completed
                 else:
                     print "\nCompleted %d/%d parses" % (num_completed, num_inputs)
                 
             # Enter interactive mode now if requested in options
             # Don't do this is we're in a process pool
             if not multiprocessing and options.interactive:
                 print 
                 from jazzparser.shell import interactive_shell
                 env = {}
                 env.update(globals())
                 env.update(locals())
                 interactive_shell(response['results'],
                                   options,
                                   response['tagger'], 
                                   response['parser'],
                                   grammar.formalism,
                                   env,
                                   input_data=response['input'])
             print
             # Flush the output to make sure everything gets out before we start the next one
             sys.stderr.flush()
             sys.stdout.flush()
예제 #24
0
    corpus = TonalSpaceAnalysisSet.load(corpus_name)
    
    # The rest of the args are result files to analyze
    res_files = arguments[1:]

    # Work out how many results to print out
    if options.print_results == -1:
        print_up_to = None
    else:
        print_up_to = options.print_results
    
    ranks = []
    num_ranked = 0
    for filename in res_files:
        # Load the parse results
        pres = ParseResults.from_file(filename)
        if options.gold_only and pres.gold_sequence is None:
            # Skip this sequence altogether if requested
            continue
        print "######################"
        print "Read %s" % filename
        
        # Try to get a correct answer from the PR file
        if pres.gold_sequence is None:
            print "No correct answer specified in input file"
            correct_song = None
        else:
            # Process the name of the sequence in the same way that 
            #  TonalSpaceAnalysisSet does
            # Ideally, they should make a common function call, but let's be 
            #  bad for once
예제 #25
0
 def _load_res(filename):
     try:
         return ParseResults.from_file(filename)
     except ParseResults.LoadError, err:
         if not options.quiet:
             print >>sys.stderr, "Error loading file %s: %s" % (filename, err)
예제 #26
0
         continue
     else:
         print >>sys.stderr, "Restricting to input %d" % input_index
 
 if options.only_load:
     # Just output that we'd process this input, but don't do anything
     print "Input %d: %s" % (input_index,input_identifier)
     continue
 
 if options.skip_done:
     # Skip any inputs for which a readable output file already exists
     outfile = get_output_filename(input_identifier)
     if os.path.exists(outfile):
         # Try loading the output file
         try:
             old_res = ParseResults.from_file(outfile)
         except ParseResults.LoadError, err:
             pass
         else:
             # File loaded ok: don't process this input
             # Mark it as complete
             completed_parses[input_identifier] = True
             continue
 
 # Mark this as incomplete
 completed_parses[input_identifier] = False
 
 # Get a filename for a logger for this input
 if parse_logger_dir:
     parse_logger = os.path.join(parse_logger_dir, "%s.log" % \
                                         slugify(input_identifier))