Esempio n. 1
0
def main():
    usage = "%prog <out-file>"
    parser = OptionParser(usage=usage)
    parser.add_option("-n",
                      "--no-names",
                      dest="no_names",
                      action="store_true",
                      help="obscure names of the chord sequences")
    options, arguments = parser.parse_args()

    if len(arguments) == 0:
        print >> sys.stderr, "You must specify an output file as the first argument"
        sys.exit(1)
    filename = arguments[0]

    # Get songs that have multiple annotations
    songs = Song.objects.annotate(seqs=Count('chordsequence')).filter(
        seqs__gte=2)

    # Create db mirrors of all the sequences
    sequences = []
    pairs = []
    for song in songs:
        if song.chordsequence_set.count() > 2:
            print >>sys.stderr, "%s has more than 2 alternative annotations" % \
                song.string_name
        seqs = song.chordsequence_set.all()
        # Add a record of the pairing
        pairs.append((seqs[0].id, seqs[1].id))
        # Add the mirrored version of the sequence
        for seq in seqs:
            sequences.append(seq.mirror)

        if options.no_names:
            for seq in sequences:
                # Obscure the sequence's name
                seq.name = "sequence-%d" % seq.id

    consdata = ConsistencyData(sequences, pairs)
    consdata.save(filename)

    print "Output %d sequences with multiple annotations" % songs.count()
def main():
    usage = "%prog <out-file>"
    parser = OptionParser(usage=usage)
    parser.add_option("-n", "--no-names", dest="no_names", action="store_true", help="obscure names of the chord sequences")
    options, arguments = parser.parse_args()
    
    if len(arguments) == 0:
        print >>sys.stderr, "You must specify an output file as the first argument"
        sys.exit(1)
    filename = arguments[0]
    
    # Get songs that have multiple annotations
    songs = Song.objects.annotate(seqs=Count('chordsequence')).filter(seqs__gte=2)
    
    # Create db mirrors of all the sequences
    sequences = []
    pairs = []
    for song in songs:
        if song.chordsequence_set.count() > 2:
            print >>sys.stderr, "%s has more than 2 alternative annotations" % \
                song.string_name
        seqs = song.chordsequence_set.all()
        # Add a record of the pairing
        pairs.append((seqs[0].id, seqs[1].id))
        # Add the mirrored version of the sequence
        for seq in seqs:
            sequences.append(seq.mirror)
            
        if options.no_names:
            for seq in sequences:
                # Obscure the sequence's name
                seq.name = "sequence-%d" % seq.id
    
    consdata = ConsistencyData(sequences, pairs)
    consdata.save(filename)
    
    print "Output %d sequences with multiple annotations" % songs.count()
Esempio n. 3
0
def main():
    usage = "%prog [options] <consistency-data>"
    description = "Evaluates annotator consistency."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("-m", "--metric", dest="metric", action="store", 
        help="semantics distance metric to use. Use '-m help' for a list of "\
            "available metrics")
    parser.add_option("--mopt", "--metric-options", dest="mopts", 
        action="append", 
        help="options to pass to the semantics metric. Use with '--mopt help' "\
            "with -m to see available options")
    parser.add_option("-f", "--f-score", dest="f_score", action="store_true", 
        help="outputs recall, precision and f-score for an f-score-based "\
            "metric. Just uses the same metric 3 times with output=recall, "\
            "etc. Will only work with appropriate metrics")
    options, arguments = parser.parse_args()
    
    grammar = get_grammar()
    
    if options.metric is not None:
        use_metric = True
        if options.f_score:
            # Special case: get 3 metrics
            metrics = []
            opts = options.mopts or []
            for opt in [ "output=precision", "output=recall", "output=f" ]:
                metrics.append(command_line_metric(formalism, options.metric, 
                                                                    opts+[opt]))
            print "Evaluating precision, recall and f-score on %s" % metrics[0].name
        else:
            # Get a metric according to the options
            metrics = [command_line_metric(formalism, options.metric, options.mopts)]
            print "Evaluating using metric: %s" % metrics[0].name
    else:
        use_metric = False
    
    
    if len(arguments) < 1:
        print >>sys.stderr, "Specify a consistency data file"
        sys.exit(1)
    filename = arguments[0]
    
    consdata = ConsistencyData.from_file(filename)
    
    # Count up matching annotations
    matches = 0
    chords = 0
    for ann1,ann2 in consdata:
        for chord1,chord2 in zip(ann1,ann2):
            chords += 1
            if chord1.category == chord2.category:
                matches += 1
    # Count matching coordination points
    rean_coords = sum(sum(
                    [1 for crd in seq if crd.treeinfo.coord_unresolved])
                        for seq,gs in consdata) + \
                  sum(sum(
                    [1 for crd in seq if crd.treeinfo.coord_resolved])
                        for seq,gs in consdata)
    gold_coords = sum(sum(
                    [1 for crd in gs if crd.treeinfo.coord_unresolved])
                        for seq,gs in consdata) + \
                  sum(sum(
                    [1 for crd in gs if crd.treeinfo.coord_resolved])
                        for seq,gs in consdata)
    match_coords = sum(sum(
                    [1 for crdr,crdg in zip(seq,gs) if 
                                            crdr.treeinfo.coord_unresolved 
                                            and crdg.treeinfo.coord_unresolved])
                        for seq,gs in consdata) + \
                   sum(sum(
                    [1 for crdr,crdg in zip(seq,gs) if 
                                            crdr.treeinfo.coord_resolved 
                                            and crdg.treeinfo.coord_resolved])
                        for seq,gs in consdata)
    # Compute precision, recall and f-score from this
    precision = 100.0 * (matches + match_coords) / (chords + rean_coords)
    recall = 100.0 * (matches + match_coords) / (chords + gold_coords)
    fscore = 2.0 * precision * recall / (precision+recall)
    print "%d chords" % chords
    print "\nCategory and coordination accuracy:"
    print "Precision: %.2f" % precision
    print "Recall: %.2f" % recall
    print "F-score: %.2f" % fscore
    
    if use_metric:
        print 
        def _parse_seq(seq):
            # Parse the annotations to get a semantics
            try:
                gold_parses = parse_sequence_with_annotations(
                                                    DbInput.from_sequence(seq), 
                                                    grammar=grammar,
                                                    allow_subparses=False)
                # Got a result: return its semantics
                return gold_parses[0].semantics
            except ParseError, err:
                # Could not parse annotated sequence
                print >>sys.stderr, "Could not parse sequence '%s': %s" % \
                                                        (seq.string_name, err)
                return 
        
        # Prepare pairs of gold-standard parse results from the two annotations
        sem_pairs = [
            (_parse_seq(ann1), _parse_seq(ann2)) for (ann1,ann2) in consdata
        ]
        # Compute the distance using the metrics
        for metric in metrics:
            distance = metric.total_distance(sem_pairs)
            print "%s: %s" % (metric.identifier.capitalize(), 
                              metric.format_distance(distance))
Esempio n. 4
0
def main():
    usage = "%prog [options] <consistency-data>"
    description = "Evaluates annotator consistency."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("-m", "--metric", dest="metric", action="store",
        help="semantics distance metric to use. Use '-m help' for a list of "\
            "available metrics")
    parser.add_option("--mopt", "--metric-options", dest="mopts",
        action="append",
        help="options to pass to the semantics metric. Use with '--mopt help' "\
            "with -m to see available options")
    parser.add_option("-f", "--f-score", dest="f_score", action="store_true",
        help="outputs recall, precision and f-score for an f-score-based "\
            "metric. Just uses the same metric 3 times with output=recall, "\
            "etc. Will only work with appropriate metrics")
    options, arguments = parser.parse_args()

    grammar = get_grammar()

    if options.metric is not None:
        use_metric = True
        if options.f_score:
            # Special case: get 3 metrics
            metrics = []
            opts = options.mopts or []
            for opt in ["output=precision", "output=recall", "output=f"]:
                metrics.append(
                    command_line_metric(formalism, options.metric,
                                        opts + [opt]))
            print "Evaluating precision, recall and f-score on %s" % metrics[
                0].name
        else:
            # Get a metric according to the options
            metrics = [
                command_line_metric(formalism, options.metric, options.mopts)
            ]
            print "Evaluating using metric: %s" % metrics[0].name
    else:
        use_metric = False

    if len(arguments) < 1:
        print >> sys.stderr, "Specify a consistency data file"
        sys.exit(1)
    filename = arguments[0]

    consdata = ConsistencyData.from_file(filename)

    # Count up matching annotations
    matches = 0
    chords = 0
    for ann1, ann2 in consdata:
        for chord1, chord2 in zip(ann1, ann2):
            chords += 1
            if chord1.category == chord2.category:
                matches += 1
    # Count matching coordination points
    rean_coords = sum(sum(
                    [1 for crd in seq if crd.treeinfo.coord_unresolved])
                        for seq,gs in consdata) + \
                  sum(sum(
                    [1 for crd in seq if crd.treeinfo.coord_resolved])
                        for seq,gs in consdata)
    gold_coords = sum(sum(
                    [1 for crd in gs if crd.treeinfo.coord_unresolved])
                        for seq,gs in consdata) + \
                  sum(sum(
                    [1 for crd in gs if crd.treeinfo.coord_resolved])
                        for seq,gs in consdata)
    match_coords = sum(sum(
                    [1 for crdr,crdg in zip(seq,gs) if
                                            crdr.treeinfo.coord_unresolved
                                            and crdg.treeinfo.coord_unresolved])
                        for seq,gs in consdata) + \
                   sum(sum(
                    [1 for crdr,crdg in zip(seq,gs) if
                                            crdr.treeinfo.coord_resolved
                                            and crdg.treeinfo.coord_resolved])
                        for seq,gs in consdata)
    # Compute precision, recall and f-score from this
    precision = 100.0 * (matches + match_coords) / (chords + rean_coords)
    recall = 100.0 * (matches + match_coords) / (chords + gold_coords)
    fscore = 2.0 * precision * recall / (precision + recall)
    print "%d chords" % chords
    print "\nCategory and coordination accuracy:"
    print "Precision: %.2f" % precision
    print "Recall: %.2f" % recall
    print "F-score: %.2f" % fscore

    if use_metric:
        print

        def _parse_seq(seq):
            # Parse the annotations to get a semantics
            try:
                gold_parses = parse_sequence_with_annotations(
                    DbInput.from_sequence(seq),
                    grammar=grammar,
                    allow_subparses=False)
                # Got a result: return its semantics
                return gold_parses[0].semantics
            except ParseError, err:
                # Could not parse annotated sequence
                print >>sys.stderr, "Could not parse sequence '%s': %s" % \
                                                        (seq.string_name, err)
                return

        # Prepare pairs of gold-standard parse results from the two annotations
        sem_pairs = [(_parse_seq(ann1), _parse_seq(ann2))
                     for (ann1, ann2) in consdata]
        # Compute the distance using the metrics
        for metric in metrics:
            distance = metric.total_distance(sem_pairs)
            print "%s: %s" % (metric.identifier.capitalize(),
                              metric.format_distance(distance))