Exemple #1
0
def options_help_text(options, intro=None):
    """
    Produces a load of help text to output to the command line to 
    display the usage of all of the options in the list.
    """
    if len(options) == 0:
        return "This module has no options"
    from jazzparser.utils.tableprint import pprint_table
    from StringIO import StringIO
    rows = []
    # Put required options first
    for opt in [o for o in options if o.required]:
        rows.append([opt.name, "%s (REQUIRED)" % opt.usage, opt.help_text])
    for opt in [o for o in options if not o.required]:
        rows.append([opt.name, opt.usage, opt.help_text])
    output = StringIO()
    # Print the options in a nice table
    pprint_table(output,
                 rows,
                 separator="",
                 justs=[True, True, True],
                 widths=[None, 35, 40],
                 blank_row=True)
    strout = output.getvalue()
    output.close()
    if intro is not None:
        strout = "%s\n%s\n%s" % (intro, "=" * len(intro), strout)
    return strout
def count_categories(options, arguments):
    # Make a Django query to get all the chord data
    query = Chord.objects.exclude(sequence__analysis_omitted=True)
    # Allow blank categories to be ignored
    if options.no_blanks:
        print >> sys.stderr, "Excluding unannotated chords"
        query = query.exclude(category="")
    categories = query.values('category').annotate(
        count=Count('id')).order_by('category')
    total = query.count()
    table_header = [['Category', 'Count', '%']]
    table_data = []
    for data in categories:
        category = data['category'] and "%s" % data['category'] or "No category"
        percent = float(data['count']) / float(total) * 100.0
        table_data.append([category, data['count'], percent])
    # Sort the rows by the count
    table_data = reversed(sorted(table_data, key=lambda d: d[1]))
    # Now format the numbers
    table_data = [[row[0], "%s" % row[1],
                   "%.02f" % row[2]] for row in table_data]
    # Add the header on the top
    table_data = table_header + table_data
    if options.csv:
        print "\n".join([",".join([v for v in row]) for row in table_data])
    else:
        pprint_table(sys.stdout, table_data, [True, False, False], "|")
        print "Total chords: %s" % total
    return 0
def count_categories(options, arguments):
    # Make a Django query to get all the chord data
    query = Chord.objects.exclude(sequence__analysis_omitted=True)
    # Allow blank categories to be ignored
    if options.no_blanks:
        print >>sys.stderr, "Excluding unannotated chords"
        query = query.exclude(category="")
    categories = query.values('category').annotate(count=Count('id')).order_by('category')
    total = query.count()
    table_header = [['Category','Count','%']]
    table_data = []
    for data in categories:
        category = data['category'] and "%s" % data['category'] or "No category"
        percent = float(data['count']) / float(total) * 100.0
        table_data.append([category, data['count'], percent])
    # Sort the rows by the count
    table_data = reversed(sorted(table_data, key=lambda d: d[1]))
    # Now format the numbers
    table_data = [[row[0], "%s" % row[1], "%.02f" % row[2]] for row in table_data]
    # Add the header on the top
    table_data = table_header + table_data
    if options.csv:
        print "\n".join([",".join([v for v in row]) for row in table_data])
    else:
        pprint_table(sys.stdout, table_data, [True,False,False], "|")
        print "Total chords: %s" % total
    return 0
Exemple #4
0
def count_categories(options, arguments):
    # Read in the sequence data from the file
    filename = os.path.abspath(arguments[0])
    seqs = SequenceIndex.from_file(filename)
    
    category_counts = {}
    total = 0
    # Count up how many times each category is used
    for seq in seqs.sequences:
        for chord in seq.iterator():
            total += 1
            if chord.category not in category_counts:
                category_counts[chord.category] = 1
            else:
                category_counts[chord.category] += 1
    table_header = [['Category','Count','%']]
    table_data = []
    for cat,count in category_counts.items():
        category = cat or "No category"
        percent = float(count) / float(total) * 100.0
        table_data.append([category, count, percent])
    # Sort the rows by the count
    table_data = reversed(sorted(table_data, key=lambda d: d[1]))
    # Now format the numbers
    table_data = [[row[0], "%s" % row[1], "%.02f" % row[2]] for row in table_data]
    # Add the header on the top
    table_data = table_header + table_data
    if options.csv:
        print "\n".join([",".join([v for v in row]) for row in table_data])
    else:
        pprint_table(sys.stdout, table_data, [True,False,False], "|")
        print "Total chords: %s" % total
    return 0
Exemple #5
0
    def run(self, args, state):
        from jazzparser.utils.tableprint import pprint_table
        import sys

        if len(args) == 0:
            # Print the command usage info
            table = []
            for tool in state.all_tools:
                if len(tool.commands) > 1:
                    alts = " [Alternatively: %s]" % ", ".join(
                        tool.commands[1:])
                else:
                    alts = ""
                # If the command has options, list them here as well
                if len(tool.tool_options) != 0:
                    opts = "\nOptions: %s" % ", ".join(\
                                [opt.name for opt in tool.tool_options])
                else:
                    opts = ""
                table.append([tool.usage[0], tool.usage[1] + alts + opts])
            pprint_table(sys.stdout, table, default_just=True, widths=[30,50], \
                            blank_row=True, hanging_indent=4)
            print "\nType 'help <command>' for detailed help about a command"
        else:
            command = args[0]
            if command not in state.tools:
                print "%s is not a valid command." % command
                print "Type 'help' for a full command list."
            else:
                tool = state.tools[command]
                title = "%s Shell Command" % tool.name
                # Compile the help text for the tool's options
                if len(tool.tool_options):
                    opts = "\nOptions:"
                    # Put required options first
                    for opt in [o for o in tool.tool_options if o.required]:
                        opts += "\n  %s  %s (REQUIRED)\n  %s" % \
                                    (opt.name, opt.usage, \
                                     "\n    ".join(wrap(opt.help_text, 75)))
                    # Then all the rest
                    for opt in [
                            o for o in tool.tool_options if not o.required
                    ]:
                        opts += "\n%s  %s\n  %s" % \
                                    (opt.name, opt.usage, \
                                     "\n    ".join(wrap(opt.help_text, 75)))
                else:
                    opts = ""
                # Print out all of the info
                print """\
%s
%s
  Usage: %s     %s
  Command aliases: %s

%s%s""" % (title, "=" * len(title), tool.usage[0], tool.usage[1], ", ".join(
                    tool.commands), tool.help, opts)
Exemple #6
0
def main():
    parser = OptionParser()
    parser.add_option(
        "-t",
        "--tagger",
        dest="tagger",
        action="store_true",
        help=
        "The tagger component to use (full python path to the tagger class). Default: %s"
        % DEFAULT_TAGGER)
    options, arguments = parser.parse_args()

    if options.tagger is not None:
        tagger = options.tagger
    else:
        tagger = DEFAULT_TAGGER

    # Use the default grammar
    grammar = Grammar()
    tagger_class = get_tagger(tagger)

    total_entropy = 0.0
    total_chords = 0
    # Compile the data for displaying in a table
    data = []
    for sequence in ChordSequence.objects.filter(analysis_omitted=False):
        print "Analyzing entropy of model on %s" % sequence.name
        # Calculate the total word-level entropy of this sequence
        sequence_chords = list(sequence.iterator())
        entropy, sequence_length = sequence_entropy(sequence_chords, grammar,
                                                    tagger_class)
        data.append({
            'name':
            sequence.name.encode('ascii', 'replace'),
            'entropy':
            entropy,
            'length':
            sequence_length,
            'entropy_per_chord':
            (sequence_length != 0 and (entropy / sequence_length) or 0.0),
        })
        if sequence_length:
            total_entropy += entropy
            total_chords += sequence_length

    # Display a table of the results
    table_data = [['Sequence', 'Entropy', 'Chords', 'Entropy per chord']] + [[
        d['name'],
        "%.4f" % d['entropy'],
        "%d" % d['length'],
        "%.4f" % d['entropy_per_chord']
    ] for d in data]
    pprint_table(sys.stdout, table_data, [True, False, False, False])
    # Calculate the perplexity over the whole set
    perplexity = math.pow(2, total_entropy / total_chords)
    print "### Entropy per chord: %.4f" % (total_entropy / total_chords)
    print "### Perplexity = %.4f" % perplexity
Exemple #7
0
    def run(self, args, state):
        from jazzparser.utils.tableprint import pprint_table
        import sys
        
        if len(args) == 0:
            # Print the command usage info
            table = []
            for tool in state.all_tools:
                if len(tool.commands) > 1:
                    alts = " [Alternatively: %s]" % ", ".join(tool.commands[1:])
                else: alts = ""
                # If the command has options, list them here as well
                if len(tool.tool_options) != 0:
                    opts = "\nOptions: %s" % ", ".join(\
                                [opt.name for opt in tool.tool_options])
                else:
                    opts = ""
                table.append([tool.usage[0], tool.usage[1]+alts+opts])
            pprint_table(sys.stdout, table, default_just=True, widths=[30,50], \
                            blank_row=True, hanging_indent=4)
            print "\nType 'help <command>' for detailed help about a command"
        else:
            command = args[0]
            if command not in state.tools:
                print "%s is not a valid command." % command
                print "Type 'help' for a full command list."
            else:
                tool = state.tools[command]
                title = "%s Shell Command" % tool.name
                # Compile the help text for the tool's options
                if len(tool.tool_options):
                    opts = "\nOptions:"
                    # Put required options first
                    for opt in [o for o in tool.tool_options if o.required]:
                        opts += "\n  %s  %s (REQUIRED)\n  %s" % \
                                    (opt.name, opt.usage, \
                                     "\n    ".join(wrap(opt.help_text, 75)))
                    # Then all the rest
                    for opt in [o for o in tool.tool_options if not o.required]:
                        opts += "\n%s  %s\n  %s" % \
                                    (opt.name, opt.usage, \
                                     "\n    ".join(wrap(opt.help_text, 75)))
                else:
                    opts = ""
                # Print out all of the info
                print """\
%s
%s
  Usage: %s     %s
  Command aliases: %s

%s%s""" % (title, "=" * len(title), 
            tool.usage[0], tool.usage[1], 
            ", ".join(tool.commands), 
            tool.help, opts)
Exemple #8
0
def main():
    usage = "%prog [options] <seq-file>"
    description = "Outputs the details of all chord sequences from a "\
        "sequence index file to stdout. This is for getting a "\
        "(relatively) human-readable form of the data"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("--categories", "-c", dest="categories", action="store_true", help="include category annotations")
    parser.add_option("--coordinations", "-o", dest="coordinations", action="store_true", help="include coordination annotations")
    parser.add_option("--meta", "-m", dest="meta", action="store_true", help="output sequence meta data")
    parser.add_option("--no-map", "-n", dest="no_map", action="store_true", help="don't apply a mapping from the names in the corpus to those used in the paper")
    parser.add_option("--all", "-a", dest="all", action="store_true", help="output everything")
    options, arguments = parser.parse_args()
    
    if len(arguments) < 1:
        print "You must specify a sequence file"
        sys.exit(1)
       
    # Get the chord sequence
    seqs = SequenceIndex.from_file(arguments[0])
    
    # Show the song name
    for seq in seqs:
        print "Chords for '%s'" % seq.string_name
        
        if options.meta or options.all:
            print "Main key:    %s" % seq.key
            print "Bar length:  %d" % seq.bar_length
        
        # Put together a table of chords plus annotations (if requested)
        data = [[ str(chord) for chord in seq ], 
                [ str(chord.duration) for chord in seq ]]
        if options.categories or options.all:
            if options.no_map:
                # Don't apply any mapping to the category names
                data.append([ chord.category for chord in seq ])
            else:
                # Map the names to those used in the paper/thesis
                data.append([ annotation_to_lexicon_name(chord.category) for chord in seq ])
        if options.coordinations or options.all:
            coords = []
            for chord in seq:
                ti = chord.treeinfo
                if ti.coord_resolved and ti.coord_unresolved:
                    coords.append(")(")
                elif ti.coord_resolved:
                    coords.append(")")
                elif ti.coord_unresolved:
                    coords.append("(")
                else:
                    coords.append("")
            data.append(coords)
        pprint_table(sys.stdout, data, default_just=True)
        print
Exemple #9
0
def main():
    parser = OptionParser()
    usage = "%prog [options] [<seq-db-file>]"
    description = "Measure the degree of ambiguity (average cats per chord) "\
        "for a grammar over a particular dataset"
    parser.add_option('-g',
                      '--grammar',
                      dest='grammar',
                      action='store',
                      help='Speficy a grammar by name')
    options, arguments = parser.parse_args()

    if len(arguments) < 1:
        print "No sequence index file given: grammar stats only"
        seq_file = None
    else:
        seq_file = arguments[0]
    # Load the grammar
    grammar = get_grammar(options.grammar)

    # Some stats about ambiguity in the grammar
    table = []
    class_cats = []
    for class_name, chord_class in grammar.chord_classes.items():
        if class_name not in EXCLUDE_CLASSES:
            cats = grammar.get_signs_for_word(str(chord_class.words[0]))
            table.append([str(class_name), str(len(cats))])
            class_cats.append(len(cats))

    table.append(["Mean", "%.2f" % (float(sum(class_cats)) / len(class_cats))])
    table.append(["Std dev", "%.2f" % (std(class_cats))])
    print "Cats for each chord class:"
    pprint_table(sys.stdout, table, justs=[True, True])

    # Ambiguity stats on the dataset
    if seq_file is not None:
        seqs = SequenceIndex.from_file(arguments[0])

        counts = []
        for seq in seqs:
            for chord in seq:
                cats = grammar.get_signs_for_word(chord)
                counts.append(len(cats))

        table = []
        table.append(["Chords", str(len(counts))])
        table.append(
            ["Cats per chord",
             "%.2f" % (float(sum(counts)) / len(counts))])
        table.append(["Std dev", "%.2f" % (std(counts))])

        print
        pprint_table(sys.stdout, table, justs=[True, True])
Exemple #10
0
def main():
    parser = OptionParser()
    usage = "%prog [options] [<seq-db-file>]"
    description = "Measure the degree of ambiguity (average cats per chord) "\
        "for a grammar over a particular dataset"
    parser.add_option('-g', '--grammar', dest='grammar', action='store', help='Speficy a grammar by name')
    options, arguments = parser.parse_args()
    
    if len(arguments) < 1:
        print "No sequence index file given: grammar stats only"
        seq_file = None
    else:
        seq_file = arguments[0]
    # Load the grammar
    grammar = get_grammar(options.grammar)
    
    # Some stats about ambiguity in the grammar
    table = []
    class_cats = []
    for class_name,chord_class in grammar.chord_classes.items():
        if class_name not in EXCLUDE_CLASSES:
            cats = grammar.get_signs_for_word(str(chord_class.words[0]))
            table.append([str(class_name), str(len(cats))])
            class_cats.append(len(cats))
    
    table.append(["Mean", "%.2f" % (float(sum(class_cats))/len(class_cats))])
    table.append(["Std dev", "%.2f" % (std(class_cats))])
    print "Cats for each chord class:"
    pprint_table(sys.stdout, table, justs=[True, True])
    
    # Ambiguity stats on the dataset
    if seq_file is not None:
        seqs = SequenceIndex.from_file(arguments[0])
        
        counts = []
        for seq in seqs:
            for chord in seq:
                cats = grammar.get_signs_for_word(chord)
                counts.append(len(cats))
        
        table = []
        table.append(["Chords", str(len(counts))])
        table.append(["Cats per chord", "%.2f" % (float(sum(counts)) / len(counts))])
        table.append(["Std dev", "%.2f" % (std(counts))])
        
        print
        pprint_table(sys.stdout, table, justs=[True, True])
Exemple #11
0
def list_results(results, silent):
    """
    Like jazzparser.parser.list_results, but shows probabilities.
    
    Note this doesn't obey the Latex option because I couldn't be 
    bothered.
    
    """
    import math

    def _fmt_index(i):
        return format(i, " >3d")

    if len(results) == 0:
        if not silent:
            print "No results"
    elif silent:
        # Only print the results themselves if we're in silent mode
        for i in range(len(results)):
            print "%s, %s" % (results[i], fmt_prob(results[i].probability))
    else:
        previous_prob = None
        # Get the highest scoring probability to compute the ratio of the others
        if len(results):
            log_highest_prob = results[0].probability
            print "Log highest prob: %s" % log_highest_prob
        table = [["", "", "Prob", "Ratio", "Sign"]]
        for i in range(len(results)):
            # Mark where probabilities are identical
            if previous_prob == results[i].probability:
                same_marker = "*"
            else:
                same_marker = " "
            # Compute the ratio to the highest probability
            prob_ratio = math.exp(results[i].probability - log_highest_prob)
            table.append([
                "%s>" % _fmt_index(i), same_marker,
                fmt_prob(math.exp(results[i].probability)),
                "%.4f" % prob_ratio,
                str(results[i])
            ])
            previous_prob = results[i].probability
        pprint_table(sys.stdout, table, justs=[True, True, True, True, True])
def main():
    parser = OptionParser()
    parser.add_option("-t", "--tagger", dest="tagger", action="store_true", help="The tagger component to use (full python path to the tagger class). Default: %s" % DEFAULT_TAGGER)
    options, arguments = parser.parse_args()
    
    if options.tagger is not None:
        tagger = options.tagger
    else:
        tagger = DEFAULT_TAGGER
    
    # Use the default grammar
    grammar = Grammar()
    tagger_class = get_tagger(tagger)
    
    total_entropy = 0.0
    total_chords = 0
    # Compile the data for displaying in a table
    data = []
    for sequence in ChordSequence.objects.filter(analysis_omitted=False):
        print "Analyzing entropy of model on %s" % sequence.name
        # Calculate the total word-level entropy of this sequence
        sequence_chords = list(sequence.iterator())
        entropy,sequence_length = sequence_entropy(sequence_chords, grammar, tagger_class)
        data.append( {
            'name' : sequence.name.encode('ascii', 'replace'),
            'entropy' : entropy,
            'length' : sequence_length,
            'entropy_per_chord' : (sequence_length!=0 and (entropy/sequence_length) or 0.0),
        })
        if sequence_length:
            total_entropy += entropy
            total_chords += sequence_length
    
    # Display a table of the results
    table_data = [['Sequence', 'Entropy', 'Chords', 'Entropy per chord']] + [
        [ d['name'], "%.4f" % d['entropy'], "%d" % d['length'], "%.4f" % d['entropy_per_chord'] ] 
            for d in data ]
    pprint_table(sys.stdout, table_data, [True, False, False, False])
    # Calculate the perplexity over the whole set
    perplexity = math.pow(2, total_entropy/total_chords)
    print "### Entropy per chord: %.4f" % (total_entropy/total_chords)
    print "### Perplexity = %.4f" % perplexity
Exemple #13
0
def list_results(results, silent):
    """
    Like jazzparser.parser.list_results, but shows probabilities.
    
    Note this doesn't obey the Latex option because I couldn't be 
    bothered.
    
    """
    import math
    def _fmt_index(i):
        return format(i, " >3d")
        
    if len(results) == 0:
        if not silent:
            print "No results"    
    elif silent:
        # Only print the results themselves if we're in silent mode
        for i in range(len(results)):
            print "%s, %s" % (results[i], fmt_prob(results[i].probability))
    else:
        previous_prob = None
        # Get the highest scoring probability to compute the ratio of the others
        if len(results):
            log_highest_prob = results[0].probability
            print "Log highest prob: %s" % log_highest_prob
        table = [["", "", "Prob", "Ratio", "Sign"]]
        for i in range(len(results)):
            # Mark where probabilities are identical
            if previous_prob == results[i].probability:
                same_marker = "*"
            else:
                same_marker = " "
            # Compute the ratio to the highest probability
            prob_ratio = math.exp(results[i].probability - log_highest_prob)
            table.append(["%s>" % _fmt_index(i), same_marker, fmt_prob(math.exp(results[i].probability)), "%.4f" % prob_ratio, str(results[i])])
            previous_prob = results[i].probability
        pprint_table(sys.stdout, table, justs=[True,True,True,True,True])
def options_help_text(options, intro=None):
    """
    Produces a load of help text to output to the command line to 
    display the usage of all of the options in the list.
    """
    if len(options) == 0:
        return "This module has no options"
    from jazzparser.utils.tableprint import pprint_table
    from StringIO import StringIO

    rows = []
    # Put required options first
    for opt in [o for o in options if o.required]:
        rows.append([opt.name, "%s (REQUIRED)" % opt.usage, opt.help_text])
    for opt in [o for o in options if not o.required]:
        rows.append([opt.name, opt.usage, opt.help_text])
    output = StringIO()
    # Print the options in a nice table
    pprint_table(output, rows, separator="", justs=[True, True, True], widths=[None, 35, 40], blank_row=True)
    strout = output.getvalue()
    output.close()
    if intro is not None:
        strout = "%s\n%s\n%s" % (intro, "=" * len(intro), strout)
    return strout
Exemple #15
0
def confusion_matrix(matrix):
    """
    Given a confusion matrix as a dictionary, outputs it as a table.
    
    The matrix should be in the format of a dictionary, keyed by 
    correct values (strings), containing dictionaries, keyed by 
    incorrect values (string), of integers. The integers represent 
    the number of times the incorrect value was mistaken for the 
    correct value.
    
    """
    from jazzparser.utils.tableprint import pprint_table
    import sys
    # Convert the matrix into table data
    rows = []
    for cor,incor_table in matrix.items():
        for incor,count in incor_table.items():
            rows.append([cor,incor,count])
    rows = list(reversed(sorted(rows, key=lambda r:r[2])))
    rows = [[cor,incor,str(count)] for cor,incor,count in rows]
    header = [['Correct','Incorrect','Count'],['','','']]
    return pprint_table(sys.stdout, header+rows, separator=" | ", outer_seps=True, justs=[True,True,False])
Exemple #16
0
def main():
    usage = "%prog [options] <seq-file>"
    description = "Outputs the details of all chord sequences from a "\
        "sequence index file to stdout. This is for getting a "\
        "(relatively) human-readable form of the data"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("--categories",
                      "-c",
                      dest="categories",
                      action="store_true",
                      help="include category annotations")
    parser.add_option("--coordinations",
                      "-o",
                      dest="coordinations",
                      action="store_true",
                      help="include coordination annotations")
    parser.add_option("--meta",
                      "-m",
                      dest="meta",
                      action="store_true",
                      help="output sequence meta data")
    parser.add_option(
        "--no-map",
        "-n",
        dest="no_map",
        action="store_true",
        help=
        "don't apply a mapping from the names in the corpus to those used in the paper"
    )
    parser.add_option("--all",
                      "-a",
                      dest="all",
                      action="store_true",
                      help="output everything")
    options, arguments = parser.parse_args()

    if len(arguments) < 1:
        print "You must specify a sequence file"
        sys.exit(1)

    # Get the chord sequence
    seqs = SequenceIndex.from_file(arguments[0])

    # Show the song name
    for seq in seqs:
        print "Chords for '%s'" % seq.string_name

        if options.meta or options.all:
            print "Main key:    %s" % seq.key
            print "Bar length:  %d" % seq.bar_length

        # Put together a table of chords plus annotations (if requested)
        data = [[str(chord) for chord in seq],
                [str(chord.duration) for chord in seq]]
        if options.categories or options.all:
            if options.no_map:
                # Don't apply any mapping to the category names
                data.append([chord.category for chord in seq])
            else:
                # Map the names to those used in the paper/thesis
                data.append([
                    annotation_to_lexicon_name(chord.category) for chord in seq
                ])
        if options.coordinations or options.all:
            coords = []
            for chord in seq:
                ti = chord.treeinfo
                if ti.coord_resolved and ti.coord_unresolved:
                    coords.append(")(")
                elif ti.coord_resolved:
                    coords.append(")")
                elif ti.coord_unresolved:
                    coords.append("(")
                else:
                    coords.append("")
            data.append(coords)
        pprint_table(sys.stdout, data, default_just=True)
        print
Exemple #17
0
def main():
    def _check_args(args):
        if len(args) != 3:
            print >>sys.stderr, "Specify a tagger, model name and input file"
            sys.exit(1)
        return args[1],args[2]
    
    partitions,part_ids,options,arguments = prepare_evaluation_options(
        usage = "%prog [options] <tagger> <model-name> <input-file>",
        description = "Evaluate a tagging model by "\
            "tagging sequences from an input file. If the tagger doesn't "\
            "need a model name, use '-' as the model name.",
        check_args = _check_args,
        optparse_groups = [
            (("Tagging",),
                [(("--topt", "--tagger-options"), 
                    {'dest':"topts", 'action':"append", 'help':"options to pass to the tagger."}),
                ]),
            (("Output",), 
                [(("--no-model-info",), 
                    {'dest':"no_model_info", 'action':"store_true", 'help':"turns of outputing of information about the model being used before using it (useful for identifying output piped to a file later, but may be too verbose sometimes)"}),
                ]),
            (("Evaluation", "Type of evaluation and options"),
                [(("-a", "--agreement"), 
                    {'dest':"agreement", 'action':"store_true", 'help':"instead of doing any parses, just report the agreement of the tops tags with the gold standard tags."}),
                 (("--confusion",), 
                    {'dest':"confusion", 'action':"store_true", 'help':"print out confusion matrix after agreement calculation. Applies only in combination with --agreement"}),
                 (("-e", "--entropy"), 
                    {'dest':"entropy", 'action':"store_true", 'help':"instead of doing any parses, just report the entropy of the returned tag distribution with respect to the gold standard tags."}),
                 (("--tag-stats",), 
                    {'dest':"tag_stats", 'action':"store_true", 'help':"just output stats about the tags that the model assigns to this sequence (or these sequences)"}),
                 (("--topn",), 
                    {'dest':"topn", 'type':"int", 'action':"store", 'help':"when evaluating agreement consider the top N tags the tagger returns. By default, allows only the top one to count as a hit.", 'default':1}),
                ]),
        ],
    )
    
    grammar = Grammar()
    
    tagger_name = arguments[0]
    model_name = arguments[1]
    # Tagger shouldn't use a model in some cases
    no_tagger_model = model_name == "-"
    
    # Load the requested tagger class
    tagger_cls = get_tagger(tagger_name)
    topts = ModuleOption.process_option_string(options.topts)
    
    def _model_info(mname):
        """ Outputs info about the named model """
        if options.no_model_info:
            print >>sys.stderr, "Model %s" % mname
        else:
            # Can only output the nice model info if it's a ModelTagger
            if issubclass(tagger_cls, ModelTagger):
                print >>sys.stderr, "======== Model info ========"
                print >>sys.stderr, tagger_cls.MODEL_CLASS.load_model(mname).description
                print >>sys.stderr, "============================"
            else:
                print >>sys.stderr, "Tagger %s using model %s" % (tagger_cls.__name__, mname)
    
    num_parts = len(partitions)
    num_seqs = sum([len(p[0]) for p in partitions])
    
    ################# Evaluation ########################
    if options.tag_stats:
        raise NotImplementedError, "fix this if you want it"
        # Print out statistics for each partition, with its model
        if no_tagger_model:
            # There could be some circumstance in which we want to do this, 
            #  but I can't think what it is, so I'm not implementing it for now
            print >>sys.stderr, "Cannot run tag_stats with no tagger model"
            sys.exit(1)
        all_stats = {}
        for parti in range(num_parts):
            sequences,model,part_num = partitions[parti]
            # Output the model training info if requested
            _model_info(model)
            ######## This doesn't exist any more
            stats = sequences_top_tags_dict(tagger_cls, model, sequences, topn=options.topn)
            for tag,num in stats.items():
                if tag in all_stats:
                    all_stats[tag] += stats[tag]
                else:
                    all_stats[tag] = stats[tag]
        pprint_table(sys.stdout, list(reversed(sorted(all_stats.items(), key=lambda r:r[1]))), separator="|")
    elif options.agreement:
        # Print out agreement stats for each partition
        if no_tagger_model:
            # Same a tag_stats: probably no need for this ever
            print >>sys.stderr, "Cannot run agreement with no tagger model"
            sys.exit(1)
        correct = 0
        total = 0
        conf_mat = {}
        for parti in range(num_parts):
            sequences,model,part_num = partitions[parti]
            topts['model'] = model
            # Output the model training info if requested
            _model_info(model)
            pcorrect = 0
            ptotal = 0
            # Go through each sequence
            for seq in sequences:
                print >>sys.stderr, "Evaluating %s" % seq.string_name
                input = DbInput.from_sequence(seq)
                correct_tags = [chord.category for chord in seq.iterator()]
                cor,tot = tagger_agreement(input, grammar, tagger_cls, correct_tags, options=topts, confusion_matrix=conf_mat, topn=options.topn)
                pcorrect += cor
                ptotal += tot
                print "  Sequence: %.1f%%" % (float(cor)/tot*100)
                print "  So far: %.1f%%" % (float(pcorrect)/ptotal*100)
            print "Partition %d: %d / %d (%.2f%%)" % (part_num, pcorrect, ptotal, (float(pcorrect)/ptotal*100))
            correct += pcorrect
            total += ptotal
        if num_parts > 1:
            # Print out the overall stats
            print "%d / %d (%f%%)" % (correct,total,(float(correct)/total*100))
        if options.confusion:
            confusion_matrix(conf_mat) 
    elif options.entropy:
        print "Calculating cross-entropy of tagger with gold standard tags"
        entropy = 0.0
        num_chords = 0
        for parti in range(num_parts):
            sequences,model,part_num = partitions[parti]
            if not no_tagger_model:
                topts['model'] = model
                # Output the model training info if requested
                _model_info(model)
            pentropy = 0.0
            pnum_chords = 0
            # Compute the entropy for the partition model
            for seq in sequences:
                print >>sys.stderr, "Evaluating %s" % seq.string_name
                input = " ".join([str(chord) for chord in seq.iterator()])
                correct_tags = [chord.category for chord in seq.iterator()]
                ent,crds = tagger_entropy(input, grammar, tagger_cls, correct_tags, options=topts)
                pentropy += ent
                pnum_chords += crds
                print "   %f bits per chord" % (ent/crds)
            print "Partition %d: %f bits per chord (%d chords)" % (part_num, (pentropy/pnum_chords), pnum_chords)
            entropy += pentropy
            num_chords += pnum_chords
        # Print out the stats for all partitions together
        if num_parts > 1:
            print "%f bits per chord (%d chords)" % ((entropy/num_chords), num_chords)
    else:
        print >>sys.stderr, "Select an evaluation operation with one of the options"
        sys.exit(1)
Exemple #18
0
def main():
    usage = "%prog [options] <results-files>"
    description = """\
Read in a ParseResults file, just like result_alignment.py. Examines the \
errors that were made and outputs them in context.
"""
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("--window", "-w", dest="window", action="store", type="int", help="size of context window to show before and after each error. Default: 2", default=2)
    parser.add_option("--distance", "--dist", dest="distance", action="store_true", help="show the total distance travelled in the tonal space by the result and the gold standard")
    parser.add_option("--output-opts", "--oopts", dest="output_opts", action="store", help="options that affect the output formatting. Use '--output-opts help' for a list of options.")
    parser.add_option("--summary-threshold", dest="summary_threshold", action="store", type="int", help="how many times a substitution/insertion/deletion needs to have happened to be including in the summary (default: 4)", default=4)
    options, arguments = parser.parse_args()
        
    if len(arguments) == 0:
        print >>sys.stderr, "Specify at least one file to read the results from"
        sys.exit(1)
        
    grammar = get_grammar()
    grammar.formalism.cl_output_options(options.output_opts)
        
    # Size of window of context to show
    win = options.window
    
    errors = []
    unscored_files = []
    scored = 0
    unscored = 0
    result_lengths = []
    gold_lengths = []
    insertions = {}
    deletions = {}
    substitutions = {}
    error_types = {}
    for filename in arguments:
        try:
            top_result, gold_result = get_top_result(filename)
        except ParseResults.LoadError, err:
            print >>sys.stderr, "Error loading file: %s" % (err)
            errors.append(filename)
            continue
        else:
            print "============================="
            print "File: %s" % filename
            if top_result is None:
                # No alignment was found
                unscored +=1
                print "No result"
            else:
                # Wrap these up as a semantics, since some functions need that as input
                Sems = grammar.formalism.Semantics.Semantics
                top_sems, gold_sems = Sems(top_result), Sems(gold_result)
                
                # Do the alignment of the top result and gold result
                alignment,gold_seq,result_seq = results_alignment(top_result, gold_result)
                scored += 1
                # Get the actual list of coordinates
                coords = zip(*grammar.formalism.semantics_to_coordinates(gold_sems))[0]
                funs = zip(*grammar.formalism.semantics_to_functions(gold_sems))[0]
                gold_coords = zip(coords, funs)
                
                coords = zip(*grammar.formalism.semantics_to_coordinates(top_sems))[0]
                funs = zip(*grammar.formalism.semantics_to_functions(top_sems))[0]
                result_coords = zip(coords, funs)
                
                print "Result length: %d, gold length: %d" % \
                        (len(result_coords), len(gold_coords))
                result_lengths.append(len(result_coords))
                gold_lengths.append(len(gold_coords))
                
                if options.distance:
                    # Work out the total distance travelled
                    start, end = gold_coords[-1][0], gold_coords[0][0]
                    gold_vect = end[0] - start[0], end[1] - start[1]
                    # And for the actual result
                    start, end = result_coords[-1][0], result_coords[0][0]
                    result_vect = end[0] - start[0], end[1] - start[1]
                    print "Distance travelled:"
                    print "  Gold result:", gold_vect
                    print "  Top result: ", result_vect
                    print
                
                # Put together a table of error windows
                table = [
                    # Header row
                    ["", "Step", "", "Result", "Gold"]
                ]
                
                gold = iter(zip(gold_seq,gold_coords))
                result = iter(zip(result_seq,result_coords))
                context = []
                post_context = 0
                unseen = 0
                for op in alignment:
                    # Keep a record of how many of each error occur
                    if op not in error_types:
                        error_types[op] = 1
                    else:
                        error_types[op] += 1
                    
                    if op == "A":
                        # Aligned pair
                        # Move both sequences on
                        gold_step,gold_point = gold.next()
                        result_step,result_point = result.next()
                        if post_context > 0:
                            # Show this as part of the post-context of an error
                            table.append(["A", str(gold_step), "", str(result_point), str(gold_point)])
                            context = []
                            post_context -= 1
                        else:
                            # Add this to the rolling window of pre-context
                            if len(context) >= win:
                                # We've not shown something here
                                unseen += 1
                            if win > 0:
                                context.append((gold_step, gold_point, result_step, result_point))
                                context = context[-win:]
                    else:
                        # Mark if there was something we didn't show
                        if unseen:
                            table.append(["", "   ...%d..." % unseen, "", "", ""])
                            unseen = 0
                        if context:
                            # Show the error's pre-context
                            for (pre_gold_step,pre_gold_point,__,pre_result_point) in context:
                                table.append(["A", str(pre_gold_step), "", str(pre_result_point), str(pre_gold_point)])
                            context = []
                        
                        if op == "I":
                            # Inserted in the result
                            result_step,result_point = result.next()
                            table.append(["I", str(result_step), "", str(result_point), ""])
                            if str(result_step) not in insertions:
                                insertions[str(result_step)] = 1
                            else:
                                insertions[str(result_step)] += 1
                        elif op == "D":
                            # Deleted in the result
                            gold_step,gold_point = gold.next()
                            table.append(["D", str(gold_step), "", "", str(gold_point)])
                            if str(gold_step) not in deletions:
                                deletions[str(gold_step)] = 1
                            else:
                                deletions[str(gold_step)] += 1
                        else:
                            # Substituted
                            result_step, result_point = result.next()
                            gold_step, gold_point = gold.next()
                            table.append([str(op), str(result_step), "for %s" % str(gold_step), str(result_point), str(gold_point)])
                            subst_key = "%s > %s" % (gold_step, result_step)
                            if subst_key not in substitutions:
                                substitutions[subst_key] = 1
                            else:
                                substitutions[subst_key] += 1
                        # After anything other than an alignment, cancel the 
                        #  context window
                        context = []
                        # Show up to <win> in the post-context of alignments
                        post_context = win
                # Mark if there was something at the end we didn't show
                if unseen:
                    table.append(["", "   ...%d..." % unseen, "", "", ""])
                # Print out the table
                pprint_table(sys.stdout, table, justs=[True,True,True,True,True])
        
        print "\n"
Exemple #19
0
 print "Processed %d result sets" % (scored+unscored)
 print "Errors processing %d result sets" % len(errors)
 print "Average result length: %.2f (%d)" % (
                 float(sum(result_lengths)) / len(result_lengths),
                 sum(result_lengths))
 print "Average gold length:   %.2f (%d)" % (
                 float(sum(gold_lengths)) / len(gold_lengths),
                 sum(gold_lengths))
 # A table of error types
 print 
 print "Error types:"
 error_table = []
 for error, count in error_types.items():
     if error != "A":
         error_table.append([error, "%d" % count])
 pprint_table(sys.stdout, error_table, justs=[True, False])
 # Show common mistakes
 # Substitutions
 print 
 print "Common substitutions:"
 subst_table = []
 for subst,count in reversed(sorted(substitutions.items(), key=lambda x:x[1])):
     if count >= options.summary_threshold:
         subst_table.append(["%s" % subst, "%d" % count])
 pprint_table(sys.stdout, subst_table, justs=[True, False])
 
 # Deletions
 print
 print "Common deletions:"
 del_table = []
 for deln,count in reversed(sorted(deletions.items(), key=lambda x:x[1])):
Exemple #20
0
        for name, songsem in corpus:
            # Get the distance from this song
            dist = metric.distance(result, songsem)
            distances.append((name, dist, songsem))
        # Sort them to get the closest first
        distances.sort(key=lambda x: x[1])

        print
        # Print out the top results, as many as requested
        top_results = distances[:print_up_to]
        table = [["", "Song", "Distance"]] + [[
            "*" if res[0] == correct_song else "",
            "%s" % res[0],
            "%.2f" % res[1]
        ] for res in top_results]
        pprint_table(sys.stdout, table, default_just=True)
        print

        if correct_song is not None:
            # Look for the correct answer in the results
            for rank, (name, distance, __) in enumerate(distances):
                # Match up the song name to the correct one
                if name == correct_song:
                    correct_rank = rank
                    break
            else:
                # The song name was not found in the corpus at all
                correct_rank = None

            if correct_rank is None:
                print "Song was not found in corpus"
Exemple #21
0
def main():
    usage = "%prog [options] <in-file> [<index1> [<index2> ...]]"
    description = "Print the names of sequences in a sequence input "\
            "file. Optionally specify indices of sequences. If no index "\
            "is given, displays all sequences."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("--sa",
                      "-a",
                      "--sort-alpha",
                      "--alpha",
                      dest="alphabetical",
                      action="store_true",
                      help="order sequences alphabetically by name")
    parser.add_option("--sl",
                      "--sort-length",
                      dest="sort_length",
                      action="store_true",
                      help="order sequences by length")
    parser.add_option(
        "-i",
        "--index",
        dest="index",
        action="store_true",
        help=
        "also display the indices in the sequence file of each sequence, in the column before the ids"
    )
    parser.add_option("-l",
                      "--lengths",
                      dest="lengths",
                      action="store_true",
                      help="output lengths of the sequences")
    options, arguments = parser.parse_args()

    if len(arguments) < 1:
        print "You must specify an input file"
        sys.exit(1)
    seqs = SequenceIndex.from_file(arguments[0])

    indices = [int(ind) for ind in arguments[1:]]
    if len(indices) == 0:
        sequences = seqs.sequences
    else:
        sequences = [seqs.sequence_by_index(index) for index in indices]

    if options.alphabetical:
        # Sort by string_name
        sequences.sort(key=lambda s: s.string_name)
    elif options.sort_length:
        # Sort by sequence length
        sequences.sort(key=lambda s: len(s))

    header = ["Song name", "Id"]
    justs = [True, False]
    if options.lengths:
        header.append("Length")
        justs.append(False)
    if options.index:
        header.append("Index")
        justs.append(False)
    rows = [header]

    for seq in sequences:
        row = [seq.string_name, str(seq.id)]
        if options.lengths:
            row.append(str(len(seq)))
        if options.index:
            row.append(str(seqs.index_for_id(seq.id)))
        rows.append(row)
    pprint_table(sys.stdout, rows, justs=justs)
def main():
    usage = "%prog [options] <in-file> [<index1> [<index2> ...]]"
    description = (
        "Print the names of sequences in a sequence input "
        "file. Optionally specify indices of sequences. If no index "
        "is given, displays all sequences."
    )
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        "--sa",
        "-a",
        "--sort-alpha",
        "--alpha",
        dest="alphabetical",
        action="store_true",
        help="order sequences alphabetically by name",
    )
    parser.add_option(
        "--sl", "--sort-length", dest="sort_length", action="store_true", help="order sequences by length"
    )
    parser.add_option(
        "-i",
        "--index",
        dest="index",
        action="store_true",
        help="also display the indices in the sequence file of each sequence, in the column before the ids",
    )
    parser.add_option("-l", "--lengths", dest="lengths", action="store_true", help="output lengths of the sequences")
    options, arguments = parser.parse_args()

    if len(arguments) < 1:
        print "You must specify an input file"
        sys.exit(1)
    seqs = SequenceIndex.from_file(arguments[0])

    indices = [int(ind) for ind in arguments[1:]]
    if len(indices) == 0:
        sequences = seqs.sequences
    else:
        sequences = [seqs.sequence_by_index(index) for index in indices]

    if options.alphabetical:
        # Sort by string_name
        sequences.sort(key=lambda s: s.string_name)
    elif options.sort_length:
        # Sort by sequence length
        sequences.sort(key=lambda s: len(s))

    header = ["Song name", "Id"]
    justs = [True, False]
    if options.lengths:
        header.append("Length")
        justs.append(False)
    if options.index:
        header.append("Index")
        justs.append(False)
    rows = [header]

    for seq in sequences:
        row = [seq.string_name, str(seq.id)]
        if options.lengths:
            row.append(str(len(seq)))
        if options.index:
            row.append(str(seqs.index_for_id(seq.id)))
        rows.append(row)
    pprint_table(sys.stdout, rows, justs=justs)
Exemple #23
0
def main():
    usage = "%prog [options] <results-files>"
    description = """\
Read in a ParseResults file, just like result_alignment.py. Examines the \
errors that were made and outputs them in context.
"""
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        "--window",
        "-w",
        dest="window",
        action="store",
        type="int",
        help=
        "size of context window to show before and after each error. Default: 2",
        default=2)
    parser.add_option(
        "--distance",
        "--dist",
        dest="distance",
        action="store_true",
        help=
        "show the total distance travelled in the tonal space by the result and the gold standard"
    )
    parser.add_option(
        "--output-opts",
        "--oopts",
        dest="output_opts",
        action="store",
        help=
        "options that affect the output formatting. Use '--output-opts help' for a list of options."
    )
    parser.add_option(
        "--summary-threshold",
        dest="summary_threshold",
        action="store",
        type="int",
        help=
        "how many times a substitution/insertion/deletion needs to have happened to be including in the summary (default: 4)",
        default=4)
    options, arguments = parser.parse_args()

    if len(arguments) == 0:
        print >> sys.stderr, "Specify at least one file to read the results from"
        sys.exit(1)

    grammar = get_grammar()
    grammar.formalism.cl_output_options(options.output_opts)

    # Size of window of context to show
    win = options.window

    errors = []
    unscored_files = []
    scored = 0
    unscored = 0
    result_lengths = []
    gold_lengths = []
    insertions = {}
    deletions = {}
    substitutions = {}
    error_types = {}
    for filename in arguments:
        try:
            top_result, gold_result = get_top_result(filename)
        except ParseResults.LoadError, err:
            print >> sys.stderr, "Error loading file: %s" % (err)
            errors.append(filename)
            continue
        else:
            print "============================="
            print "File: %s" % filename
            if top_result is None:
                # No alignment was found
                unscored += 1
                print "No result"
            else:
                # Wrap these up as a semantics, since some functions need that as input
                Sems = grammar.formalism.Semantics.Semantics
                top_sems, gold_sems = Sems(top_result), Sems(gold_result)

                # Do the alignment of the top result and gold result
                alignment, gold_seq, result_seq = results_alignment(
                    top_result, gold_result)
                scored += 1
                # Get the actual list of coordinates
                coords = zip(
                    *grammar.formalism.semantics_to_coordinates(gold_sems))[0]
                funs = zip(
                    *grammar.formalism.semantics_to_functions(gold_sems))[0]
                gold_coords = zip(coords, funs)

                coords = zip(
                    *grammar.formalism.semantics_to_coordinates(top_sems))[0]
                funs = zip(
                    *grammar.formalism.semantics_to_functions(top_sems))[0]
                result_coords = zip(coords, funs)

                print "Result length: %d, gold length: %d" % \
                        (len(result_coords), len(gold_coords))
                result_lengths.append(len(result_coords))
                gold_lengths.append(len(gold_coords))

                if options.distance:
                    # Work out the total distance travelled
                    start, end = gold_coords[-1][0], gold_coords[0][0]
                    gold_vect = end[0] - start[0], end[1] - start[1]
                    # And for the actual result
                    start, end = result_coords[-1][0], result_coords[0][0]
                    result_vect = end[0] - start[0], end[1] - start[1]
                    print "Distance travelled:"
                    print "  Gold result:", gold_vect
                    print "  Top result: ", result_vect
                    print

                # Put together a table of error windows
                table = [
                    # Header row
                    ["", "Step", "", "Result", "Gold"]
                ]

                gold = iter(zip(gold_seq, gold_coords))
                result = iter(zip(result_seq, result_coords))
                context = []
                post_context = 0
                unseen = 0
                for op in alignment:
                    # Keep a record of how many of each error occur
                    if op not in error_types:
                        error_types[op] = 1
                    else:
                        error_types[op] += 1

                    if op == "A":
                        # Aligned pair
                        # Move both sequences on
                        gold_step, gold_point = gold.next()
                        result_step, result_point = result.next()
                        if post_context > 0:
                            # Show this as part of the post-context of an error
                            table.append([
                                "A",
                                str(gold_step), "",
                                str(result_point),
                                str(gold_point)
                            ])
                            context = []
                            post_context -= 1
                        else:
                            # Add this to the rolling window of pre-context
                            if len(context) >= win:
                                # We've not shown something here
                                unseen += 1
                            if win > 0:
                                context.append((gold_step, gold_point,
                                                result_step, result_point))
                                context = context[-win:]
                    else:
                        # Mark if there was something we didn't show
                        if unseen:
                            table.append(
                                ["", "   ...%d..." % unseen, "", "", ""])
                            unseen = 0
                        if context:
                            # Show the error's pre-context
                            for (pre_gold_step, pre_gold_point, __,
                                 pre_result_point) in context:
                                table.append([
                                    "A",
                                    str(pre_gold_step), "",
                                    str(pre_result_point),
                                    str(pre_gold_point)
                                ])
                            context = []

                        if op == "I":
                            # Inserted in the result
                            result_step, result_point = result.next()
                            table.append([
                                "I",
                                str(result_step), "",
                                str(result_point), ""
                            ])
                            if str(result_step) not in insertions:
                                insertions[str(result_step)] = 1
                            else:
                                insertions[str(result_step)] += 1
                        elif op == "D":
                            # Deleted in the result
                            gold_step, gold_point = gold.next()
                            table.append(
                                ["D",
                                 str(gold_step), "", "",
                                 str(gold_point)])
                            if str(gold_step) not in deletions:
                                deletions[str(gold_step)] = 1
                            else:
                                deletions[str(gold_step)] += 1
                        else:
                            # Substituted
                            result_step, result_point = result.next()
                            gold_step, gold_point = gold.next()
                            table.append([
                                str(op),
                                str(result_step),
                                "for %s" % str(gold_step),
                                str(result_point),
                                str(gold_point)
                            ])
                            subst_key = "%s > %s" % (gold_step, result_step)
                            if subst_key not in substitutions:
                                substitutions[subst_key] = 1
                            else:
                                substitutions[subst_key] += 1
                        # After anything other than an alignment, cancel the
                        #  context window
                        context = []
                        # Show up to <win> in the post-context of alignments
                        post_context = win
                # Mark if there was something at the end we didn't show
                if unseen:
                    table.append(["", "   ...%d..." % unseen, "", "", ""])
                # Print out the table
                pprint_table(sys.stdout,
                             table,
                             justs=[True, True, True, True, True])

        print "\n"
Exemple #24
0
 distances = []
 for name,songsem in corpus:
     # Get the distance from this song
     dist = metric.distance(result, songsem)
     distances.append((name,dist,songsem))
 # Sort them to get the closest first
 distances.sort(key=lambda x:x[1])
 
 print
 # Print out the top results, as many as requested
 top_results = distances[:print_up_to]
 table = [["","Song","Distance"]] + [
                 ["*" if res[0] == correct_song else "", 
                  "%s" % res[0], 
                  "%.2f" % res[1]] for res in top_results]
 pprint_table(sys.stdout, table, default_just=True)
 print
 
 if correct_song is not None:
     # Look for the correct answer in the results
     for rank,(name,distance,__) in enumerate(distances):
         # Match up the song name to the correct one
         if name == correct_song:
             correct_rank = rank
             break
     else:
         # The song name was not found in the corpus at all
         correct_rank = None
     
     if correct_rank is None:
         print "Song was not found in corpus"
Exemple #25
0
        print "\n"
    print "Processed %d result sets" % (scored + unscored)
    print "Errors processing %d result sets" % len(errors)
    print "Average result length: %.2f (%d)" % (
        float(sum(result_lengths)) / len(result_lengths), sum(result_lengths))
    print "Average gold length:   %.2f (%d)" % (
        float(sum(gold_lengths)) / len(gold_lengths), sum(gold_lengths))
    # A table of error types
    print
    print "Error types:"
    error_table = []
    for error, count in error_types.items():
        if error != "A":
            error_table.append([error, "%d" % count])
    pprint_table(sys.stdout, error_table, justs=[True, False])
    # Show common mistakes
    # Substitutions
    print
    print "Common substitutions:"
    subst_table = []
    for subst, count in reversed(
            sorted(substitutions.items(), key=lambda x: x[1])):
        if count >= options.summary_threshold:
            subst_table.append(["%s" % subst, "%d" % count])
    pprint_table(sys.stdout, subst_table, justs=[True, False])

    # Deletions
    print
    print "Common deletions:"
    del_table = []