Exemple #1
0
 def get_gold_semantics(self):
     """
     Tries to return a gold standard semantics. In some cases this is 
     stored along with the results in C{gold_parse}. In others this is 
     not available, but a gold annotated chord sequence is: then we 
     can get the gold semantics by parsing the annotations. Note that 
     this might take a little bit of time.
     
     In other cases neither is available. Then C{None} will be returned.
     
     """
     from jazzparser.evaluation.parsing import parse_sequence_with_annotations
     
     if self.gold_parse is not None:
         return self.gold_parse
     elif self.gold_sequence is not None:
         # Parse the annotations to get a semantics
         try:
             gold_parses = parse_sequence_with_annotations(
                                                 self.gold_sequence, 
                                                 grammar=get_grammar(),
                                                 allow_subparses=False)
             if len(gold_parses) != 1:
                 # This shouldn't happen, since allow_subparses was False
                 return None
             # Got a result: return its semantics
             return gold_parses[0].semantics
         except ParseError:
             # Could not parse annotated sequence
             return None
     else:
         return None
    def get_gold_semantics(self):
        """
        Tries to return a gold standard semantics. In some cases this is 
        stored along with the results in C{gold_parse}. In others this is 
        not available, but a gold annotated chord sequence is: then we 
        can get the gold semantics by parsing the annotations. Note that 
        this might take a little bit of time.
        
        In other cases neither is available. Then C{None} will be returned.
        
        """
        from jazzparser.evaluation.parsing import parse_sequence_with_annotations

        if self.gold_parse is not None:
            return self.gold_parse
        elif self.gold_sequence is not None:
            # Parse the annotations to get a semantics
            try:
                gold_parses = parse_sequence_with_annotations(
                    self.gold_sequence, grammar=get_grammar(), allow_subparses=False
                )
                if len(gold_parses) != 1:
                    # This shouldn't happen, since allow_subparses was False
                    return None
                # Got a result: return its semantics
                return gold_parses[0].semantics
            except ParseError:
                # Could not parse annotated sequence
                return None
        else:
            return None
    def get_gold_analysis(self):
        """
        Parses the annotations to get a gold analysis.
        
        """
        from jazzparser.evaluation.parsing import parse_sequence_with_annotations
        from jazzparser.grammar import get_grammar

        parses = parse_sequence_with_annotations(self, get_grammar(), allow_subparses=False)
        return parses[0].semantics
Exemple #4
0
def keys_for_sequence(sequence, grammar=None):
    """
    Takes a chord sequence from the chord corpus and parses using its 
    annotations. Returns a list of the key (as a pitch class integer) for 
    each chord.
    
    This is simply worked out, once the parse is done. Every chord in a cadence 
    has the same key as the resolution of the cadence, which can be read off 
    by taking the equal temperament pitch class for the tonal space point of 
    the resolution.
    
    """
    from jazzparser.evaluation.parsing import parse_sequence_with_annotations
    if grammar is None:
        grammar = get_grammar()
    # Try parsing the sequence according to the tree in the database
    sub_parses = parse_sequence_with_annotations(sequence, grammar)
    if len(sub_parses) > 1:
        # We can only continue if we got a full parse
        raise ParseError, "could not fully parse the sequence %s." % \
                sequence.string_name
    sems = sub_parses[0].semantics
    
    # Get the keys for this LF, and the times when they start
    keys = grammar.formalism.semantics_to_keys(sems)
    key_roots, change_times = zip(*keys)
    key_roots = iter(key_roots)
    change_times = iter(change_times)
    
    chords = iter(sequence)
    # Get the first key as the current key
    key = key_roots.next()
    # Ignore the first time, as it should be 0
    change_times.next()
    chord_keys = []
    try:
        # Get the next time at which we'll need to change
        next_change = change_times.next()
        
        time = 0
        for chord in sequence.chords:
            if time >= next_change:
                # Move onto the next key
                key = key_roots.next()
                next_change = change_times.next()
            # Add the next chord with the current key value
            chord_keys.append((chord, key))
            time += chord.duration
    except StopIteration:
        # No more timings left
        # Include the rest of the chords with the current key
        for chord in chords:
            chord_keys.append((chord, key))
    
    return chord_keys
def keys_for_sequence(sequence, grammar=None):
    """
    Takes a chord sequence from the chord corpus and parses using its 
    annotations. Returns a list of the key (as a pitch class integer) for 
    each chord.
    
    This is simply worked out, once the parse is done. Every chord in a cadence 
    has the same key as the resolution of the cadence, which can be read off 
    by taking the equal temperament pitch class for the tonal space point of 
    the resolution.
    
    """
    from jazzparser.evaluation.parsing import parse_sequence_with_annotations

    if grammar is None:
        grammar = get_grammar()
    # Try parsing the sequence according to the tree in the database
    sub_parses = parse_sequence_with_annotations(sequence, grammar)
    if len(sub_parses) > 1:
        # We can only continue if we got a full parse
        raise ParseError, "could not fully parse the sequence %s." % sequence.string_name
    sems = sub_parses[0].semantics

    # Get the keys for this LF, and the times when they start
    keys = grammar.formalism.semantics_to_keys(sems)
    key_roots, change_times = zip(*keys)
    key_roots = iter(key_roots)
    change_times = iter(change_times)

    chords = iter(sequence)
    # Get the first key as the current key
    key = key_roots.next()
    # Ignore the first time, as it should be 0
    change_times.next()
    chord_keys = []
    try:
        # Get the next time at which we'll need to change
        next_change = change_times.next()

        time = 0
        for chord in sequence.chords:
            if time >= next_change:
                # Move onto the next key
                key = key_roots.next()
                next_change = change_times.next()
            # Add the next chord with the current key value
            chord_keys.append((chord, key))
            time += chord.duration
    except StopIteration:
        # No more timings left
        # Include the rest of the chords with the current key
        for chord in chords:
            chord_keys.append((chord, key))

    return chord_keys
Exemple #6
0
 def get_gold_analysis(self):
     """
     Parses the annotations to get a gold analysis.
     
     """
     from jazzparser.evaluation.parsing import parse_sequence_with_annotations
     from jazzparser.grammar import get_grammar
     parses = parse_sequence_with_annotations(self,
                                              get_grammar(),
                                              allow_subparses=False)
     return parses[0].semantics
Exemple #7
0
 def _parse_seq(seq):
     # Parse the annotations to get a semantics
     try:
         gold_parses = parse_sequence_with_annotations(
                                             DbInput.from_sequence(seq), 
                                             grammar=grammar,
                                             allow_subparses=False)
         # Got a result: return its semantics
         return gold_parses[0].semantics
     except ParseError, err:
         # Could not parse annotated sequence
         print >>sys.stderr, "Could not parse sequence '%s': %s" % \
                                                 (seq.string_name, err)
         return 
Exemple #8
0
 def _parse_seq(seq):
     # Parse the annotations to get a semantics
     try:
         gold_parses = parse_sequence_with_annotations(
             DbInput.from_sequence(seq),
             grammar=grammar,
             allow_subparses=False)
         # Got a result: return its semantics
         return gold_parses[0].semantics
     except ParseError, err:
         # Could not parse annotated sequence
         print >>sys.stderr, "Could not parse sequence '%s': %s" % \
                                                 (seq.string_name, err)
         return
    def get_gold_analysis(self):
        """
        Parses the annotations, if present, to get a gold analysis. Unlike 
        L{AnnotatedDbInput}, this input type cannot be assumed to have 
        annotations. It will therefore not raise an error if annotations 
        are missing or incomplete, but just return None.
        
        """
        from jazzparser.evaluation.parsing import parse_sequence_with_annotations
        from jazzparser.grammar import get_grammar
        from jazzparser.parsers import ParseError

        try:
            parses = parse_sequence_with_annotations(self, get_grammar(), allow_subparses=False)
        except ParseError:
            return None
        else:
            return parses[0].semantics
Exemple #10
0
    def get_gold_analysis(self):
        """
        Parses the annotations, if present, to get a gold analysis. Unlike 
        L{AnnotatedDbInput}, this input type cannot be assumed to have 
        annotations. It will therefore not raise an error if annotations 
        are missing or incomplete, but just return None.
        
        """
        from jazzparser.evaluation.parsing import parse_sequence_with_annotations
        from jazzparser.grammar import get_grammar
        from jazzparser.parsers import ParseError

        try:
            parses = parse_sequence_with_annotations(self,
                                                     get_grammar(),
                                                     allow_subparses=False)
        except ParseError:
            return None
        else:
            return parses[0].semantics
Exemple #11
0
def main():
    usage = "%prog [options] <results-files> <index>"
    description = "Prints a dependency tree for a parse result"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("-l", "--latex", dest="latex", action="store_true", help="output Latex for the graphs using tikz-dependency")
    parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file (--file). Type '--fopt help' for a list of available options.")
    options, arguments = parser.parse_args()
        
    if len(arguments) < 1:
        print >>sys.stderr, "Specify a file to read the results from"
        sys.exit(1)
    filename = arguments[0]
    if len(arguments) < 2:
        print >>sys.stderr, "Specify an of the sequence to load"
        sys.exit(1)
    index = int(arguments[1])
    
    grammar = get_grammar()
    
    # We always need an index, so this is given as an argument
    # Put it in the options list for loading the file
    fopts = options.file_options
    if fopts and len(fopts):
        fopts += ":index=%d" % index
    else:
        fopts = "index=%d" % index
    # Load the sequence index file
    dbinput = command_line_input(filename=filename, filetype="db", options=fopts)
    
    name = dbinput.name
    
    anal = parse_sequence_with_annotations(dbinput, grammar)[0]
    graph, time_map = semantics_to_dependency_graph(anal.semantics)
    
    # Join together chords that are on the same dependency node
    times = iter(sorted(time_map.values()))
    dep_time = times.next()
    current_chord = []
    joined_chords = []
    finished = False
    for chord_time,chord in sorted(dbinput.sequence.time_map.items()):
        if chord_time >= dep_time and not finished:
            if len(current_chord):
                joined_chords.append(current_chord)
            current_chord = [chord]
            try:
                dep_time = times.next()
            except StopIteration:
                finished = True
        else:
            current_chord.append(chord)
    joined_chords.append(current_chord)
    
    chords = [" ".join(filter_latex(str(crd)) for crd in item) 
                                                for item in joined_chords]
    annotations = [" ".join(filter_latex(crd.category) for crd in item) 
                                                for item in joined_chords]
    graph.words = annotations
    
    if options.latex:
        # Exit with status 1 if we don't output anything
        exit_status = 1
        
        # Output a full Latex document in one go
        if name is not None:
            title = r"""\title{%s}
\author{}
\date{}""" % name.capitalize()
            maketitle = r"\maketitle\thispagestyle{empty}\vspace{-20pt}"
        else:
            title = ""
            maketitle = ""
        
        # Print the header
        print r"""\documentclass[a4paper]{article}
\usepackage{tikz-dependency}
%% You may need to set paperheight (for width) and paperwidth (for height) to get things to fit
\usepackage[landscape,margin=1cm,paperheight=50cm]{geometry}
\pagestyle{empty}

%(title)s

\begin{document}
%(maketitle)s

\tikzstyle{every picture}+=[remember picture]
\centering

""" % \
        { 'title' : title,
          'maketitle' : maketitle }
        
        if graph is not None:
            exit_status = 0
            print dependency_graph_to_latex(graph, 
                                            fmt_lab=_fmt_label,
                                            extra_rows=[chords])
            print "\n\\vspace{15pt}"
        
        # Finish off the document
        print r"""
\end{document}
"""
        sys.exit(exit_status)
    else:
        # Not outputing Latex
        print graph
Exemple #12
0
def main():
    usage = "%prog [options] <seq-file>"
    description = "Parses a sequence from a sequence index file using the "\
        "annotations stored in the same file."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        "--popt",
        "--parser-options",
        dest="popts",
        action="append",
        help=
        "specify options for the parser. Type '--popt help' to get a list of options (we use a DirectedCkyParser)"
    )
    parser.add_option("--derivations",
                      "--deriv",
                      dest="derivations",
                      action="store_true",
                      help="print out derivation traces of all the results")
    parser.add_option("--index",
                      "-i",
                      dest="index",
                      action="store",
                      type="int",
                      help="parse just the sequence with this index")
    parser.add_option("--quiet",
                      "-q",
                      dest="quiet",
                      action="store_true",
                      help="show only errors in the output")
    parser.add_option(
        "--tonal-space",
        "--ts",
        dest="tonal_space",
        action="store_true",
        help="show the tonal space path (with -q, shows only paths)")
    parser.add_option(
        "--output-set",
        "-o",
        dest="output_set",
        action="store",
        help="store the analyses to a tonal space analysis set with this name")
    parser.add_option(
        "--trace-parse",
        "-t",
        dest="trace_parse",
        action="store_true",
        help=
        "output a trace of the shift-reduce parser's operations in producing the full interpretation from the annotations"
    )
    options, arguments = parser.parse_args()

    if len(arguments) < 1:
        print "You must specify a sequence file"
        sys.exit(1)

    if options.popts is not None:
        poptstr = options.popts
        if "help" in [s.strip().lower() for s in poptstr]:
            # Output this tagger's option help
            print options_help_text(
                DirectedCkyParser.PARSER_OPTIONS,
                intro="Available options for the directed parser")
            return 0
    else:
        poptstr = ""
    popts = ModuleOption.process_option_string(poptstr)

    grammar = get_grammar()
    if options.quiet:
        logger = create_plain_stderr_logger(log_level=logging.ERROR)
    else:
        logger = create_plain_stderr_logger()

    if options.trace_parse:
        parse_logger = logger
    else:
        parse_logger = None

    seq_index = SequenceIndex.from_file(arguments[0])
    # Get the chord sequence(s)
    if options.index is None:
        seqs = seq_index.sequences
    else:
        seqs = [seq_index.sequence_by_index(options.index)]
    logger.info("%d sequences\n" % len(seqs))

    full_analyses = []
    stats = {
        'full': 0,
        'partial': 0,
        'fail': 0,
    }
    # Try parsing every sequence
    for seq in seqs:
        logger.info("====== Sequence %s =======" % seq.string_name)
        try:
            results = parse_sequence_with_annotations(
                seq, grammar, logger=logger, parse_logger=parse_logger)
        except ParseError, err:
            logger.error("Error parsing: %s" % err)
            stats['fail'] += 1
        else:
            # This may have resulted in multiple partial parses
            logger.info("%d partial parses" % len(results))

            if len(results) == 1:
                stats['full'] += 1
            else:
                stats['partial'] += 1

            if options.derivations:
                # Output the derivation trace for each partial parse
                for result in results:
                    print
                    print result.derivation_trace

            if options.tonal_space:
                # Output the tonal space coordinates
                path = grammar.formalism.sign_to_coordinates(results[0])
                for i, point in enumerate(path):
                    print "%d, %d: %s" % (seq.id, i, point)

            # Only include a result in the output analyses if it was a full parse
            if len(results) == 1:
                full_analyses.append((seq.string_name, results[0].semantics))
            else:
                logger.warn("%s was not included in the output analyses, "\
                    "since it was not fully parsed" % seq.string_name)
Exemple #13
0
def main():
    usage = "%prog [options] <results-files> <index>"
    description = "Prints a dependency tree for a parse result"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("-l",
                      "--latex",
                      dest="latex",
                      action="store_true",
                      help="output Latex for the graphs using tikz-dependency")
    parser.add_option(
        "--file-options",
        "--fopt",
        dest="file_options",
        action="store",
        help=
        "options for the input file (--file). Type '--fopt help' for a list of available options."
    )
    options, arguments = parser.parse_args()

    if len(arguments) < 1:
        print >> sys.stderr, "Specify a file to read the results from"
        sys.exit(1)
    filename = arguments[0]
    if len(arguments) < 2:
        print >> sys.stderr, "Specify an of the sequence to load"
        sys.exit(1)
    index = int(arguments[1])

    grammar = get_grammar()

    # We always need an index, so this is given as an argument
    # Put it in the options list for loading the file
    fopts = options.file_options
    if fopts and len(fopts):
        fopts += ":index=%d" % index
    else:
        fopts = "index=%d" % index
    # Load the sequence index file
    dbinput = command_line_input(filename=filename,
                                 filetype="db",
                                 options=fopts)

    name = dbinput.name

    anal = parse_sequence_with_annotations(dbinput, grammar)[0]
    graph, time_map = semantics_to_dependency_graph(anal.semantics)

    # Join together chords that are on the same dependency node
    times = iter(sorted(time_map.values()))
    dep_time = times.next()
    current_chord = []
    joined_chords = []
    finished = False
    for chord_time, chord in sorted(dbinput.sequence.time_map.items()):
        if chord_time >= dep_time and not finished:
            if len(current_chord):
                joined_chords.append(current_chord)
            current_chord = [chord]
            try:
                dep_time = times.next()
            except StopIteration:
                finished = True
        else:
            current_chord.append(chord)
    joined_chords.append(current_chord)

    chords = [
        " ".join(filter_latex(str(crd)) for crd in item)
        for item in joined_chords
    ]
    annotations = [
        " ".join(filter_latex(crd.category) for crd in item)
        for item in joined_chords
    ]
    graph.words = annotations

    if options.latex:
        # Exit with status 1 if we don't output anything
        exit_status = 1

        # Output a full Latex document in one go
        if name is not None:
            title = r"""\title{%s}
\author{}
\date{}""" % name.capitalize()
            maketitle = r"\maketitle\thispagestyle{empty}\vspace{-20pt}"
        else:
            title = ""
            maketitle = ""

        # Print the header
        print r"""\documentclass[a4paper]{article}
\usepackage{tikz-dependency}
%% You may need to set paperheight (for width) and paperwidth (for height) to get things to fit
\usepackage[landscape,margin=1cm,paperheight=50cm]{geometry}
\pagestyle{empty}

%(title)s

\begin{document}
%(maketitle)s

\tikzstyle{every picture}+=[remember picture]
\centering

""" % \
        { 'title' : title,
          'maketitle' : maketitle }

        if graph is not None:
            exit_status = 0
            print dependency_graph_to_latex(graph,
                                            fmt_lab=_fmt_label,
                                            extra_rows=[chords])
            print "\n\\vspace{15pt}"

        # Finish off the document
        print r"""
\end{document}
"""
        sys.exit(exit_status)
    else:
        # Not outputing Latex
        print graph
Exemple #14
0
    def train(data,
              estimator,
              grammar,
              cutoff=0,
              logger=None,
              chord_map=None,
              order=2,
              backoff_orders=0,
              backoff_kwargs={}):
        """
        Initializes and trains an HMM in a supervised fashion using the given 
        training data. Training data should be chord sequence data (input 
        type C{bulk-db} or C{bulk-db-annotated}).
        
        """
        # Prepare a dummy logger if none was given
        if logger is None:
            logger = create_dummy_logger()
        logger.info(">>> Beginning training of ngram backoff model")

        training_data = []
        # Generate the gold standard data by parsing the annotations
        for dbinput in data:
            # Get a gold standard tonal space sequence
            try:
                parses = parse_sequence_with_annotations(dbinput, grammar, \
                                                        allow_subparses=False)
            except ParseError, err:
                # Just skip this sequence
                logger.error('Could not get a GS parse of %s: %s' %
                             (dbinput, err))
                continue
            # There should only be one of these now
            parse = parses[0]
            if parse is None:
                logger.error('Could not get a GS parse of %s' % (dbinput))
                continue

            # Get the form of the analysis we need for the training
            if chord_map is None:
                chords = [(c.root, c.type) for c in dbinput.chords]
            else:
                chords = [(c.root, chord_map[c.type]) for c in dbinput.chords]

            points, times = zip(
                *grammar.formalism.semantics_to_coordinates(parse.semantics))
            # Run through the sequence, transforming absolute points into
            #  the condensed relative representation
            ec0 = EnharmonicCoordinate.from_harmonic_coord(points[0])
            # The first point is relative to the origin and always in the
            #  (0,0) enharmonic space
            rel_points = [(0, 0, ec0.x, ec0.y)]
            for point in points[1:]:
                ec1 = EnharmonicCoordinate.from_harmonic_coord(point)
                # Find the nearest enharmonic instance of this point to the last
                nearest = ec0.nearest((ec1.x, ec1.y))
                # Work out how much we have to shift this by to get the point
                dX = ec1.X - nearest.X
                dY = ec1.Y - nearest.Y
                rel_points.append((dX, dY, ec1.x, ec1.y))
                ec0 = ec1
            funs, times = zip(
                *grammar.formalism.semantics_to_functions(parse.semantics))

            ### Synchronize the chords with the points and functions
            # We may need to repeat chords to match up with analysis
            #  points that span multiple chords
            analysis = iter(zip(rel_points, funs, times))
            rel_point, fun, __ = analysis.next()
            next_rel_point, next_fun, next_anal_time = analysis.next()
            # Keep track of how much time has elapsed
            time = 0
            training_seq = []
            reached_end = False
            for crd_pair, chord in zip(chords, dbinput.chords):
                if time >= next_anal_time and not reached_end:
                    # Move on to the next analysis point
                    rel_point, fun = next_rel_point, next_fun
                    try:
                        next_rel_point, next_fun, next_anal_time = analysis.next(
                        )
                    except StopIteration:
                        # No more points: keep using the same to the end
                        reached_end = True
                training_seq.append((crd_pair, (rel_point, fun)))
                time += chord.duration
            training_data.append(training_seq)
Exemple #15
0
 def train(data, estimator, grammar, cutoff=0, logger=None, 
             chord_map=None, order=2, backoff_orders=0, backoff_kwargs={}):
     """
     Initializes and trains an HMM in a supervised fashion using the given 
     training data. Training data should be chord sequence data (input 
     type C{bulk-db} or C{bulk-db-annotated}).
     
     """
     # Prepare a dummy logger if none was given
     if logger is None:
         logger = create_dummy_logger()
     logger.info(">>> Beginning training of ngram backoff model")
     
     training_data = []
     # Generate the gold standard data by parsing the annotations
     for dbinput in data:
         # Get a gold standard tonal space sequence
         try:
             parses = parse_sequence_with_annotations(dbinput, grammar, \
                                                     allow_subparses=False)
         except ParseError, err:
             # Just skip this sequence
             logger.error('Could not get a GS parse of %s: %s' % (dbinput,err))
             continue
         # There should only be one of these now
         parse = parses[0]
         if parse is None:
             logger.error('Could not get a GS parse of %s' % (dbinput))
             continue
         
         # Get the form of the analysis we need for the training
         if chord_map is None:
             chords = [(c.root, c.type) for c in dbinput.chords]
         else:
             chords = [(c.root, chord_map[c.type]) for c in dbinput.chords]
         
         points,times = zip(*grammar.formalism.semantics_to_coordinates(
                                                 parse.semantics))
         # Run through the sequence, transforming absolute points into 
         #  the condensed relative representation
         ec0 = EnharmonicCoordinate.from_harmonic_coord(points[0])
         # The first point is relative to the origin and always in the 
         #  (0,0) enharmonic space
         rel_points = [(0,0,ec0.x,ec0.y)]
         for point in points[1:]:
             ec1 = EnharmonicCoordinate.from_harmonic_coord(point)
             # Find the nearest enharmonic instance of this point to the last
             nearest = ec0.nearest((ec1.x, ec1.y))
             # Work out how much we have to shift this by to get the point
             dX = ec1.X - nearest.X
             dY = ec1.Y - nearest.Y
             rel_points.append((dX,dY,ec1.x,ec1.y))
             ec0 = ec1
         funs,times = zip(*grammar.formalism.semantics_to_functions(
                                                 parse.semantics))
         
         ### Synchronize the chords with the points and functions
         # We may need to repeat chords to match up with analysis 
         #  points that span multiple chords
         analysis = iter(zip(rel_points,funs,times))
         rel_point, fun, __ = analysis.next()
         next_rel_point,next_fun,next_anal_time = analysis.next()
         # Keep track of how much time has elapsed
         time = 0
         training_seq = []
         reached_end = False
         for crd_pair,chord in zip(chords, dbinput.chords):
             if time >= next_anal_time and not reached_end:
                 # Move on to the next analysis point
                 rel_point, fun = next_rel_point, next_fun
                 try:
                     next_rel_point,next_fun,next_anal_time = analysis.next()
                 except StopIteration:
                     # No more points: keep using the same to the end
                     reached_end = True
             training_seq.append((crd_pair, (rel_point,fun)))
             time += chord.duration
         training_data.append(training_seq)