def main(): usage = "%prog <out-file>" parser = OptionParser(usage=usage) parser.add_option("-n", "--no-names", dest="no_names", action="store_true", help="obscure names of the chord sequences") options, arguments = parser.parse_args() if len(arguments) == 0: print >> sys.stderr, "You must specify an output file as the first argument" sys.exit(1) filename = arguments[0] # Get songs that have multiple annotations songs = Song.objects.annotate(seqs=Count('chordsequence')).filter( seqs__gte=2) # Create db mirrors of all the sequences sequences = [] pairs = [] for song in songs: if song.chordsequence_set.count() > 2: print >>sys.stderr, "%s has more than 2 alternative annotations" % \ song.string_name seqs = song.chordsequence_set.all() # Add a record of the pairing pairs.append((seqs[0].id, seqs[1].id)) # Add the mirrored version of the sequence for seq in seqs: sequences.append(seq.mirror) if options.no_names: for seq in sequences: # Obscure the sequence's name seq.name = "sequence-%d" % seq.id consdata = ConsistencyData(sequences, pairs) consdata.save(filename) print "Output %d sequences with multiple annotations" % songs.count()
def main(): usage = "%prog <out-file>" parser = OptionParser(usage=usage) parser.add_option("-n", "--no-names", dest="no_names", action="store_true", help="obscure names of the chord sequences") options, arguments = parser.parse_args() if len(arguments) == 0: print >>sys.stderr, "You must specify an output file as the first argument" sys.exit(1) filename = arguments[0] # Get songs that have multiple annotations songs = Song.objects.annotate(seqs=Count('chordsequence')).filter(seqs__gte=2) # Create db mirrors of all the sequences sequences = [] pairs = [] for song in songs: if song.chordsequence_set.count() > 2: print >>sys.stderr, "%s has more than 2 alternative annotations" % \ song.string_name seqs = song.chordsequence_set.all() # Add a record of the pairing pairs.append((seqs[0].id, seqs[1].id)) # Add the mirrored version of the sequence for seq in seqs: sequences.append(seq.mirror) if options.no_names: for seq in sequences: # Obscure the sequence's name seq.name = "sequence-%d" % seq.id consdata = ConsistencyData(sequences, pairs) consdata.save(filename) print "Output %d sequences with multiple annotations" % songs.count()
def main(): usage = "%prog [options] <consistency-data>" description = "Evaluates annotator consistency." parser = OptionParser(usage=usage, description=description) parser.add_option("-m", "--metric", dest="metric", action="store", help="semantics distance metric to use. Use '-m help' for a list of "\ "available metrics") parser.add_option("--mopt", "--metric-options", dest="mopts", action="append", help="options to pass to the semantics metric. Use with '--mopt help' "\ "with -m to see available options") parser.add_option("-f", "--f-score", dest="f_score", action="store_true", help="outputs recall, precision and f-score for an f-score-based "\ "metric. Just uses the same metric 3 times with output=recall, "\ "etc. Will only work with appropriate metrics") options, arguments = parser.parse_args() grammar = get_grammar() if options.metric is not None: use_metric = True if options.f_score: # Special case: get 3 metrics metrics = [] opts = options.mopts or [] for opt in [ "output=precision", "output=recall", "output=f" ]: metrics.append(command_line_metric(formalism, options.metric, opts+[opt])) print "Evaluating precision, recall and f-score on %s" % metrics[0].name else: # Get a metric according to the options metrics = [command_line_metric(formalism, options.metric, options.mopts)] print "Evaluating using metric: %s" % metrics[0].name else: use_metric = False if len(arguments) < 1: print >>sys.stderr, "Specify a consistency data file" sys.exit(1) filename = arguments[0] consdata = ConsistencyData.from_file(filename) # Count up matching annotations matches = 0 chords = 0 for ann1,ann2 in consdata: for chord1,chord2 in zip(ann1,ann2): chords += 1 if chord1.category == chord2.category: matches += 1 # Count matching coordination points rean_coords = sum(sum( [1 for crd in seq if crd.treeinfo.coord_unresolved]) for seq,gs in consdata) + \ sum(sum( [1 for crd in seq if crd.treeinfo.coord_resolved]) for seq,gs in consdata) gold_coords = sum(sum( [1 for crd in gs if crd.treeinfo.coord_unresolved]) for seq,gs in consdata) + \ sum(sum( [1 for crd in gs if crd.treeinfo.coord_resolved]) for seq,gs in consdata) match_coords = sum(sum( [1 for crdr,crdg in zip(seq,gs) if crdr.treeinfo.coord_unresolved and crdg.treeinfo.coord_unresolved]) for seq,gs in consdata) + \ sum(sum( [1 for crdr,crdg in zip(seq,gs) if crdr.treeinfo.coord_resolved and crdg.treeinfo.coord_resolved]) for seq,gs in consdata) # Compute precision, recall and f-score from this precision = 100.0 * (matches + match_coords) / (chords + rean_coords) recall = 100.0 * (matches + match_coords) / (chords + gold_coords) fscore = 2.0 * precision * recall / (precision+recall) print "%d chords" % chords print "\nCategory and coordination accuracy:" print "Precision: %.2f" % precision print "Recall: %.2f" % recall print "F-score: %.2f" % fscore if use_metric: print def _parse_seq(seq): # Parse the annotations to get a semantics try: gold_parses = parse_sequence_with_annotations( DbInput.from_sequence(seq), grammar=grammar, allow_subparses=False) # Got a result: return its semantics return gold_parses[0].semantics except ParseError, err: # Could not parse annotated sequence print >>sys.stderr, "Could not parse sequence '%s': %s" % \ (seq.string_name, err) return # Prepare pairs of gold-standard parse results from the two annotations sem_pairs = [ (_parse_seq(ann1), _parse_seq(ann2)) for (ann1,ann2) in consdata ] # Compute the distance using the metrics for metric in metrics: distance = metric.total_distance(sem_pairs) print "%s: %s" % (metric.identifier.capitalize(), metric.format_distance(distance))
def main(): usage = "%prog [options] <consistency-data>" description = "Evaluates annotator consistency." parser = OptionParser(usage=usage, description=description) parser.add_option("-m", "--metric", dest="metric", action="store", help="semantics distance metric to use. Use '-m help' for a list of "\ "available metrics") parser.add_option("--mopt", "--metric-options", dest="mopts", action="append", help="options to pass to the semantics metric. Use with '--mopt help' "\ "with -m to see available options") parser.add_option("-f", "--f-score", dest="f_score", action="store_true", help="outputs recall, precision and f-score for an f-score-based "\ "metric. Just uses the same metric 3 times with output=recall, "\ "etc. Will only work with appropriate metrics") options, arguments = parser.parse_args() grammar = get_grammar() if options.metric is not None: use_metric = True if options.f_score: # Special case: get 3 metrics metrics = [] opts = options.mopts or [] for opt in ["output=precision", "output=recall", "output=f"]: metrics.append( command_line_metric(formalism, options.metric, opts + [opt])) print "Evaluating precision, recall and f-score on %s" % metrics[ 0].name else: # Get a metric according to the options metrics = [ command_line_metric(formalism, options.metric, options.mopts) ] print "Evaluating using metric: %s" % metrics[0].name else: use_metric = False if len(arguments) < 1: print >> sys.stderr, "Specify a consistency data file" sys.exit(1) filename = arguments[0] consdata = ConsistencyData.from_file(filename) # Count up matching annotations matches = 0 chords = 0 for ann1, ann2 in consdata: for chord1, chord2 in zip(ann1, ann2): chords += 1 if chord1.category == chord2.category: matches += 1 # Count matching coordination points rean_coords = sum(sum( [1 for crd in seq if crd.treeinfo.coord_unresolved]) for seq,gs in consdata) + \ sum(sum( [1 for crd in seq if crd.treeinfo.coord_resolved]) for seq,gs in consdata) gold_coords = sum(sum( [1 for crd in gs if crd.treeinfo.coord_unresolved]) for seq,gs in consdata) + \ sum(sum( [1 for crd in gs if crd.treeinfo.coord_resolved]) for seq,gs in consdata) match_coords = sum(sum( [1 for crdr,crdg in zip(seq,gs) if crdr.treeinfo.coord_unresolved and crdg.treeinfo.coord_unresolved]) for seq,gs in consdata) + \ sum(sum( [1 for crdr,crdg in zip(seq,gs) if crdr.treeinfo.coord_resolved and crdg.treeinfo.coord_resolved]) for seq,gs in consdata) # Compute precision, recall and f-score from this precision = 100.0 * (matches + match_coords) / (chords + rean_coords) recall = 100.0 * (matches + match_coords) / (chords + gold_coords) fscore = 2.0 * precision * recall / (precision + recall) print "%d chords" % chords print "\nCategory and coordination accuracy:" print "Precision: %.2f" % precision print "Recall: %.2f" % recall print "F-score: %.2f" % fscore if use_metric: print def _parse_seq(seq): # Parse the annotations to get a semantics try: gold_parses = parse_sequence_with_annotations( DbInput.from_sequence(seq), grammar=grammar, allow_subparses=False) # Got a result: return its semantics return gold_parses[0].semantics except ParseError, err: # Could not parse annotated sequence print >>sys.stderr, "Could not parse sequence '%s': %s" % \ (seq.string_name, err) return # Prepare pairs of gold-standard parse results from the two annotations sem_pairs = [(_parse_seq(ann1), _parse_seq(ann2)) for (ann1, ann2) in consdata] # Compute the distance using the metrics for metric in metrics: distance = metric.total_distance(sem_pairs) print "%s: %s" % (metric.identifier.capitalize(), metric.format_distance(distance))