Ejemplo n.º 1
0
def main():
    usage = "%prog [options] <model-type> <model_name> <in-file>"
    description = "Trains a backoff builder model using the given "\
        "input data. Specify a model type (ngram, etc) and a name to "\
        "identify it. The data file should be a stored SequenceIndex file."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option('-p', '--partitions', dest="partitions", action="store", type="int", help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.")
    parser.add_option('--opts', dest="training_opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.")
    # File input options
    parser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Same filetypes as jazzparser", default='bulk-db')
    parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.")
    options, arguments = parse_args_with_config(parser)
    
    if len(arguments) < 3:
        print >>sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[2])
    model_type = arguments[0]
    model_name = arguments[1]
    
    builder_cls = get_backoff_builder(model_type)
    model_cls = builder_cls.MODEL_CLASS
    
    # Load the sequence data from a dbinput file
    input_data = command_line_input(filename=filename, 
                                    filetype=options.filetype, 
                                    options=options.file_options,
                                    allowed_types=['bulk-db', 'bulk-db-annotated'])
    
    # Handle any training options that were given on the command line
    if options.training_opts is None:
        training_opts = {}
    elif options.training_opts.lower() == "help":
        print options_help_text(model_cls.TRAINING_OPTIONS, intro="Training options for %s" % model_cls.__name__)
        sys.exit(0)
    else:
        training_opts = ModuleOption.process_option_dict(
                            ModuleOption.process_option_string(options.training_opts), 
                            model_cls.TRAINING_OPTIONS)
        
    if options.partitions is not None:
        parts = holdout_partition(input_data, options.partitions)
        models = [(builder_cls.partition_model_name(model_name,num),seqs) for \
                        num,seqs in enumerate(parts)]
    else:
        models = [(model_name,input_data)]
    
    for part_name,seqs in models:
        # Instantiate a fresh model with this name
        model = model_cls(part_name, options=training_opts)
        # Train it with the loaded data
        model.train(seqs)
        model.save()
        print "Trained model %s" % (part_name)
Ejemplo n.º 2
0
def main():
    usage = "%prog <model-name>"
    description = "Debug a PCFG model"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("-g", "--grammar", dest="grammar", action="store", \
                        help="use the named grammar instead of the default.")
    parser.add_option("-d", "--debug", dest="debug", action="store_true", \
                        help="output debugging information during generation")
    parser.add_option("--file-options", "--fopt", dest="file_options", \
                        action="store", help="options for the input file "\
                        "(--file). Type '--fopt help' for a list of available "\
                        "options.")
    options, arguments = parse_args_with_config(parser)

    if len(arguments) < 1:
        print "Specify a model name"
        sys.exit(1)
    model_name = arguments[0]

    if len(arguments) < 2:
        print "Specify an input file"

    grammar = get_grammar(options.grammar)
    PcfgModel = grammar.formalism.PcfgModel
    # Load the trained model
    model = PcfgModel.load_model(model_name)

    # Try getting a file from the command-line options
    input_data = command_line_input(filename=arguments[1],
                                    filetype="db",
                                    options=options.file_options)

    # Prepare the input and annotations
    sequence = input_data.sequence
    categories = [chord.category for chord in sequence.iterator()]
    str_inputs = input_data.inputs
    # Build the implicit normal-form tree from the annotations
    try:
        tree = build_tree_for_sequence(sequence)
    except TreeBuildError, err:
        raise ModelTrainingError, "could not build a tree for '%s': %s" % \
            (sequence.string_name, err)
Ejemplo n.º 3
0
def main():
    usage = "%prog [options] <model-type> <model_name> <in-file>"
    description = "Trains a supertagging model using the given "\
        "input data. Specify a model type (baseline1, etc) and a name to "\
        "identify it. The data file may be a stored SequenceIndex file, or "\
        "any other type of bulk data file. "\
        "This can only be used with the follow types of models: %s" % ", ".join(TRAINABLE_MODELS)
    parser = OptionParser(usage=usage, description=description)
    parser.add_option('-p', '--partitions', dest="partitions", action="store", type="int", help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.")
    parser.add_option('--opts', dest="training_opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.")
    # File input options
    parser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Same filetypes as jazzparser", default='bulk-db')
    parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.")
    # Logging output
    parser.add_option('--log', dest="log", action="store", help="file to output training logs to. Specify a base filename; <modelname>.log will be added to the end")
    options, arguments = parse_args_with_config(parser)
    
    grammar = Grammar()
    
    # Get the model type first: we might not need the other args
    if len(arguments) == 0:
        print >>sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
    model_type = arguments[0]
    
    if model_type not in TRAINABLE_MODELS:
        print >>sys.stderr, "'%s' is not a valid model type. Available taggers are: %s" % \
            (model_type, ", ".join(TRAINABLE_MODELS))
        sys.exit(1)
    if model_type not in TAGGERS:
        print >>sys.stderr, "'%s' isn't a registered model type. Check that "\
            "the name in TRAINABLE_MODELS is correct" % model_type
        sys.exit(1)
    
    tagger_cls = get_tagger(model_type)
    if not issubclass(tagger_cls, ModelTagger):
        print >>sys.stderr, "'%s' tagger cannot be trained with this script. Only model taggers can be." % (tagger_cls.__name__)
        sys.exit(1)
    model_cls = tagger_cls.MODEL_CLASS
    
    # Handle any training options that were given on the command line
    if options.training_opts is None:
        training_opts = {}
    elif options.training_opts.lower() == "help":
        print options_help_text(model_cls.TRAINING_OPTIONS, intro="Training options for %s" % model_cls.__name__)
        sys.exit(0)
    else:
        training_opts = ModuleOption.process_option_dict(
                            ModuleOption.process_option_string(options.training_opts), 
                            model_cls.TRAINING_OPTIONS)
    
    # Get the rest of the args
    if len(arguments) < 3:
        print >>sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[2])
    model_name = arguments[1]

    # Load the sequence data
    # Only allow bulk types
    input_data = command_line_input(filename=filename, 
                                    filetype=options.filetype, 
                                    options=options.file_options,
                                    allowed_types=get_input_type_names(single=False, bulk=True))
    
    if options.partitions is not None and options.partitions > 1:
        parts = input_data.get_partitions(options.partitions)[1]
        models = [(tagger_cls.partition_model_name(model_name,num),seqs) for \
                                                num,seqs in enumerate(parts)]
    else:
        models = [(model_name,input_data)]
    
    for part_name,seqs in models:
        # Instantiate a fresh model with this name
        model = model_cls(part_name, options=training_opts)
        if options.log is not None:
            # Prepare a logger
            logfile = "%s%s.log" % (options.log, part_name)
            print "Logging output to file %s" % logfile
            logger = create_logger(filename=logfile)
        else:
            logger = None
            
        # Train the model with the loaded data
        model.train(seqs, logger=logger)
        model.save()
        print "Trained model %s" % (part_name)
Ejemplo n.º 4
0
def main():
    usage = "%prog [options] <model_name> <in-file>"
    description = "Trains a chord labeling model using the given "\
        "input data. The data file may be a stored SequenceIndex file, or "\
        "any other type of bulk data file."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        '-p',
        '--partitions',
        dest="partitions",
        action="store",
        type="int",
        help=
        "train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number."
    )
    parser.add_option(
        '--opts',
        dest="training_opts",
        action="append",
        help=
        "options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type."
    )
    # File input options
    parser.add_option(
        "--filetype",
        "--ft",
        dest="filetype",
        action="store",
        help=
        "select the file type for the input file. Same filetypes as jazzparser",
        default='bulk-db')
    parser.add_option(
        "--file-options",
        "--fopt",
        dest="file_options",
        action="store",
        help=
        "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options."
    )
    # Logging output
    parser.add_option(
        '--log',
        dest="log",
        action="store",
        help=
        "file to output training logs to. Specify a base filename; <modelname>.log will be added to the end"
    )
    options, arguments = parse_args_with_config(parser)

    grammar = Grammar()

    # Handle any training options that were given on the command line
    if options.training_opts is None:
        training_opts = {}
    elif "help" in [opt.lower() for opt in options.training_opts]:
        print options_help_text(HPChordLabeler.TRAINING_OPTIONS,
                                intro="Training options:")
        sys.exit(0)
    else:
        training_opts = ModuleOption.process_option_string(
            options.training_opts)

    if len(arguments) < 2:
        print >> sys.stderr, "You must specify a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[1])
    model_name = arguments[0]

    # Load the sequence data
    # Only allow bulk types
    input_data = command_line_input(filename=filename,
                                    filetype=options.filetype,
                                    options=options.file_options,
                                    allowed_types=get_input_type_names(
                                        single=False, bulk=True))

    # Only partition the chord data, not the MIDI data
    if options.partitions is not None and not \
            (isinstance(input_data, MidiTaggerTrainingBulkInput) and \
             input_data.chords is not None):
        print >>sys.stderr, "Can only partition chord data and no chord data "\
            "was supplied"
        sys.exit(1)

    if options.partitions:
        # The input includes chord training data
        parts = input_data.chords.get_partitions(options.partitions)[1]
        models = [("%s%d" % (model_name,num),chord_data) \
            for num,chord_data in enumerate(parts)]
    else:
        models = [(model_name, None)]

    for part_name, chord_data in models:
        if options.log is not None:
            # Prepare a logger
            logfile = "%s%s.log" % (options.log, part_name)
            print "Logging output to file %s" % logfile
            logger = create_logger(filename=logfile)
        else:
            logger = None

        # Create a fresh model with this name
        model = HPChordLabeler.train(input_data,
                                     part_name,
                                     logger=logger,
                                     options=training_opts,
                                     chord_data=chord_data)
        print "Trained model %s" % (part_name)
Ejemplo n.º 5
0
def main():
    usage = "%prog [options] <results-files> <index>"
    description = "Prints a dependency tree for a parse result"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("-l", "--latex", dest="latex", action="store_true", help="output Latex for the graphs using tikz-dependency")
    parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file (--file). Type '--fopt help' for a list of available options.")
    options, arguments = parser.parse_args()
        
    if len(arguments) < 1:
        print >>sys.stderr, "Specify a file to read the results from"
        sys.exit(1)
    filename = arguments[0]
    if len(arguments) < 2:
        print >>sys.stderr, "Specify an of the sequence to load"
        sys.exit(1)
    index = int(arguments[1])
    
    grammar = get_grammar()
    
    # We always need an index, so this is given as an argument
    # Put it in the options list for loading the file
    fopts = options.file_options
    if fopts and len(fopts):
        fopts += ":index=%d" % index
    else:
        fopts = "index=%d" % index
    # Load the sequence index file
    dbinput = command_line_input(filename=filename, filetype="db", options=fopts)
    
    name = dbinput.name
    
    anal = parse_sequence_with_annotations(dbinput, grammar)[0]
    graph, time_map = semantics_to_dependency_graph(anal.semantics)
    
    # Join together chords that are on the same dependency node
    times = iter(sorted(time_map.values()))
    dep_time = times.next()
    current_chord = []
    joined_chords = []
    finished = False
    for chord_time,chord in sorted(dbinput.sequence.time_map.items()):
        if chord_time >= dep_time and not finished:
            if len(current_chord):
                joined_chords.append(current_chord)
            current_chord = [chord]
            try:
                dep_time = times.next()
            except StopIteration:
                finished = True
        else:
            current_chord.append(chord)
    joined_chords.append(current_chord)
    
    chords = [" ".join(filter_latex(str(crd)) for crd in item) 
                                                for item in joined_chords]
    annotations = [" ".join(filter_latex(crd.category) for crd in item) 
                                                for item in joined_chords]
    graph.words = annotations
    
    if options.latex:
        # Exit with status 1 if we don't output anything
        exit_status = 1
        
        # Output a full Latex document in one go
        if name is not None:
            title = r"""\title{%s}
\author{}
\date{}""" % name.capitalize()
            maketitle = r"\maketitle\thispagestyle{empty}\vspace{-20pt}"
        else:
            title = ""
            maketitle = ""
        
        # Print the header
        print r"""\documentclass[a4paper]{article}
\usepackage{tikz-dependency}
%% You may need to set paperheight (for width) and paperwidth (for height) to get things to fit
\usepackage[landscape,margin=1cm,paperheight=50cm]{geometry}
\pagestyle{empty}

%(title)s

\begin{document}
%(maketitle)s

\tikzstyle{every picture}+=[remember picture]
\centering

""" % \
        { 'title' : title,
          'maketitle' : maketitle }
        
        if graph is not None:
            exit_status = 0
            print dependency_graph_to_latex(graph, 
                                            fmt_lab=_fmt_label,
                                            extra_rows=[chords])
            print "\n\\vspace{15pt}"
        
        # Finish off the document
        print r"""
\end{document}
"""
        sys.exit(exit_status)
    else:
        # Not outputing Latex
        print graph
Ejemplo n.º 6
0
def main():
    usage = "%prog [options] <input>"
    description = "Divides midi files into chunks, with size and offset, "\
        "given in the input file, and plays "\
        "the chunks consecutively. Input is a segmented bulk midi input file."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        '-g',
        '--gap',
        dest="gap",
        action="store",
        type="float",
        help=
        "time to wait between playing each chunk in seconds (potentially float). It will take some time to load the chunk and the sequencer usually pauses before reporting it's done: this is not included in this value",
        default=0.0)
    parser.add_option('-p',
                      '--print',
                      dest="print_events",
                      action="store_true",
                      help="print out all events for each chunk")
    parser.add_option('--pno',
                      '--print-note-ons',
                      dest="print_note_ons",
                      action="store_true",
                      help="print only note-on events")
    parser.add_option(
        '--fopt',
        dest="file_options",
        action="store",
        help=
        "options for file loading. Use '--fopt help' to see available options")
    options, arguments = parse_args_with_config(parser)

    filename = arguments[0]
    # Try getting a file from the command-line options
    input_data = command_line_input(filename=filename,
                                    filetype='bulk-segmidi',
                                    options=options.file_options)

    # Play each input in turn
    input_getter = iter(input_data)
    segmidi = input_getter.next()

    while True:
        print "###############################"
        print "Playing '%s'" % segmidi.name
        print "%s-beat chunks with a %d-tick offset\n" % \
                                    (segmidi.time_unit, segmidi.tick_offset)
        slices = list(segmidi)

        try:
            for i, strm in enumerate(slices):
                print "Playing chunk %d: %d events" % (i, len(strm.trackpool))
                if options.print_events:
                    print "\n".join("  %s" % ev
                                    for ev in sorted(strm.trackpool))
                elif options.print_note_ons:
                    print "\n".join("  %s" % ev for ev in sorted(strm.trackpool) \
                                                    if type(ev) is NoteOnEvent)
                # Play this midi chunk
                play_stream(strm, block=True)
                # Leave a gap before continuing
                if options.gap > 0.0:
                    time.sleep(options.gap)
        except KeyboardInterrupt:
            pass

        print "Continue to next song (<enter>); exit (x); play again (p)"
        command = raw_input(">> ").lower()
        if command == "x":
            sys.exit(0)
        elif command == "p":
            # Play again
            continue
        elif command == "":
            # Move to next
            segmidi = input_getter.next()
            continue
        else:
            print "Unknown command: %s" % command
            print "Playing again..."
            continue
    sys.exit(0)
Ejemplo n.º 7
0
def main():
    usage = "%prog [options] <input>"
    description = "Divides midi files into chunks, with size and offset, "\
        "given in the input file, and plays "\
        "the chunks consecutively. Input is a segmented bulk midi input file."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option('-g', '--gap', dest="gap", action="store", type="float", help="time to wait between playing each chunk in seconds (potentially float). It will take some time to load the chunk and the sequencer usually pauses before reporting it's done: this is not included in this value", default=0.0)
    parser.add_option('-p', '--print', dest="print_events", action="store_true", help="print out all events for each chunk")
    parser.add_option('--pno', '--print-note-ons', dest="print_note_ons", action="store_true", help="print only note-on events")
    parser.add_option('--fopt', dest="file_options", action="store", help="options for file loading. Use '--fopt help' to see available options")
    options, arguments = parse_args_with_config(parser)
    
    filename = arguments[0]
    # Try getting a file from the command-line options
    input_data = command_line_input(filename=filename, 
                                    filetype='bulk-segmidi',
                                    options=options.file_options)
    
    # Play each input in turn
    input_getter = iter(input_data)
    segmidi = input_getter.next()
    
    while True:
        print "###############################"
        print "Playing '%s'" % segmidi.name
        print "%s-beat chunks with a %d-tick offset\n" % \
                                    (segmidi.time_unit, segmidi.tick_offset)
        slices = list(segmidi)
        
        try:
            for i,strm in enumerate(slices):
                print "Playing chunk %d: %d events" % (i, len(strm.trackpool))
                if options.print_events:
                    print "\n".join("  %s" % ev for ev in sorted(strm.trackpool))
                elif options.print_note_ons:
                    print "\n".join("  %s" % ev for ev in sorted(strm.trackpool) \
                                                    if type(ev) is NoteOnEvent)
                # Play this midi chunk
                play_stream(strm, block=True)
                # Leave a gap before continuing
                if options.gap > 0.0:
                    time.sleep(options.gap)
        except KeyboardInterrupt:
            pass
            
        print "Continue to next song (<enter>); exit (x); play again (p)"
        command = raw_input(">> ").lower()
        if command == "x":
            sys.exit(0)
        elif command == "p":
            # Play again
            continue
        elif command == "":
            # Move to next
            segmidi = input_getter.next()
            continue
        else:
            print "Unknown command: %s" % command
            print "Playing again..."
            continue
    sys.exit(0)
Ejemplo n.º 8
0
def main():
    usage = "%prog [options] <results-files> <index>"
    description = "Prints a dependency tree for a parse result"
    parser = OptionParser(usage=usage, description=description)
    parser.add_option("-l",
                      "--latex",
                      dest="latex",
                      action="store_true",
                      help="output Latex for the graphs using tikz-dependency")
    parser.add_option(
        "--file-options",
        "--fopt",
        dest="file_options",
        action="store",
        help=
        "options for the input file (--file). Type '--fopt help' for a list of available options."
    )
    options, arguments = parser.parse_args()

    if len(arguments) < 1:
        print >> sys.stderr, "Specify a file to read the results from"
        sys.exit(1)
    filename = arguments[0]
    if len(arguments) < 2:
        print >> sys.stderr, "Specify an of the sequence to load"
        sys.exit(1)
    index = int(arguments[1])

    grammar = get_grammar()

    # We always need an index, so this is given as an argument
    # Put it in the options list for loading the file
    fopts = options.file_options
    if fopts and len(fopts):
        fopts += ":index=%d" % index
    else:
        fopts = "index=%d" % index
    # Load the sequence index file
    dbinput = command_line_input(filename=filename,
                                 filetype="db",
                                 options=fopts)

    name = dbinput.name

    anal = parse_sequence_with_annotations(dbinput, grammar)[0]
    graph, time_map = semantics_to_dependency_graph(anal.semantics)

    # Join together chords that are on the same dependency node
    times = iter(sorted(time_map.values()))
    dep_time = times.next()
    current_chord = []
    joined_chords = []
    finished = False
    for chord_time, chord in sorted(dbinput.sequence.time_map.items()):
        if chord_time >= dep_time and not finished:
            if len(current_chord):
                joined_chords.append(current_chord)
            current_chord = [chord]
            try:
                dep_time = times.next()
            except StopIteration:
                finished = True
        else:
            current_chord.append(chord)
    joined_chords.append(current_chord)

    chords = [
        " ".join(filter_latex(str(crd)) for crd in item)
        for item in joined_chords
    ]
    annotations = [
        " ".join(filter_latex(crd.category) for crd in item)
        for item in joined_chords
    ]
    graph.words = annotations

    if options.latex:
        # Exit with status 1 if we don't output anything
        exit_status = 1

        # Output a full Latex document in one go
        if name is not None:
            title = r"""\title{%s}
\author{}
\date{}""" % name.capitalize()
            maketitle = r"\maketitle\thispagestyle{empty}\vspace{-20pt}"
        else:
            title = ""
            maketitle = ""

        # Print the header
        print r"""\documentclass[a4paper]{article}
\usepackage{tikz-dependency}
%% You may need to set paperheight (for width) and paperwidth (for height) to get things to fit
\usepackage[landscape,margin=1cm,paperheight=50cm]{geometry}
\pagestyle{empty}

%(title)s

\begin{document}
%(maketitle)s

\tikzstyle{every picture}+=[remember picture]
\centering

""" % \
        { 'title' : title,
          'maketitle' : maketitle }

        if graph is not None:
            exit_status = 0
            print dependency_graph_to_latex(graph,
                                            fmt_lab=_fmt_label,
                                            extra_rows=[chords])
            print "\n\\vspace{15pt}"

        # Finish off the document
        print r"""
\end{document}
"""
        sys.exit(exit_status)
    else:
        # Not outputing Latex
        print graph
Ejemplo n.º 9
0
def main():
    usage = "%prog [options] <model_name> <in-file>"
    description = "Loads a chord labeling model and uses it to assign chord "\
        "labels to the given MIDI file."
    parser = OptionParser(usage=usage, description=description)
    # File input options
    parser.add_option(
        "--filetype",
        "--ft",
        dest="filetype",
        action="store",
        help=
        "select the file type for the input file. Same filetypes as jazzparser",
        default='segmidi')
    parser.add_option(
        "--file-options",
        "--fopt",
        dest="file_options",
        action="store",
        help=
        "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options."
    )
    # Labeling options
    parser.add_option(
        "--labeler-options",
        "--lopt",
        dest="labeler_options",
        action="append",
        help=
        "options for the labeler. Type '--lopt help' for a list of available options."
    )
    parser.add_option(
        "--no-key",
        "--nk",
        dest="no_key",
        action="store_true",
        help="merge together labels with the same key (same as --lopt nokey)")
    # Output options
    parser.add_option(
        "--single",
        "-1",
        dest="single",
        action="store_true",
        help=
        "show only one chord per time segment (same as --lopt n=1, but formats the output in a simpler way)"
    )
    parser.add_option(
        '-r',
        '--realize',
        dest="realize",
        action="store",
        help="realize the chord sequence as a midi file, overlaid on the input"
    )
    parser.add_option(
        '--chords-only',
        dest="chords_only",
        action="store_true",
        help=
        "only realize the chords: don't overlay on the input midi (only works with -r)"
    )
    options, arguments = parse_args_with_config(parser)

    if options.labeler_options is not None and "help" in options.labeler_options:
        print options_help_text(HPChordLabeler.LABELING_OPTIONS,
                                intro="Options for HP chord labeler")
        sys.exit(0)

    if len(arguments) < 2:
        print >>sys.stderr, "You must specify a model name and an input "\
            "(MIDI) data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[1])
    model_name = arguments[0]

    # Process the labeler options
    lopt_dict = ModuleOption.process_option_string(options.labeler_options)
    if options.single:
        # No point in getting more than one label, since we only display one
        lopt_dict['n'] = 1
    if options.no_key:
        # Just set the nokey option
        lopt_dict['nokey'] = True

    # Check they're valid before doing anything else
    HPChordLabeler.process_labeling_options(lopt_dict)

    input_data = command_line_input(filename,
                                    filetype=options.filetype,
                                    options=options.file_options,
                                    allowed_types=['segmidi', 'bulk-segmidi'])
    bulk = not is_bulk_type(type(input_data))
    if bulk:
        input_data = [input_data]

    for i, data in enumerate(input_data):
        input_stream = data.stream
        print "Read midi data in %d segments" % len(data)

        # Load the model
        model = HPChordLabeler.load_model(model_name)
        # Perform labeling
        labels = model.label(data, options=lopt_dict)
        # Try labeling as it will be passed to the tagger
        labs = model.label_lattice(data, options=lopt_dict)

        if options.single:
            # Special output for single label output
            print ", ".join(["%s" % timelabs[0][0] for timelabs in labels])
        else:
            # Print out the labels for each timestep
            for time, timelabs in enumerate(labels):
                print "%d: %s" % (time, ", ".join([
                    "%s (%.2e)" % (label, prob) for (label, prob) in timelabs
                ]))

        if options.realize is not None:
            # Get the single best chord label for each time
            best_labels = [timelabs[0][0] for timelabs in labels]
            # Realize as a midi file
            print "Realizing output chord sequence"
            real = ChordSequenceRealizer(best_labels,
                                         model.chord_vocab,
                                         resolution=input_stream.resolution,
                                         chord_length=data.time_unit,
                                         text_events=True)
            if options.chords_only:
                # Don't overlay
                stream = real.generate(offset=data.tick_offset)
            else:
                stream = real.generate(overlay=input_stream,
                                       offset=data.tick_offset)

            if bulk:
                filename = "%s-%d" % (options.realize, i)
            else:
                filename = options.realize
            write_midifile(stream, filename)
Ejemplo n.º 10
0
def main():
    usage = "%prog [options] <model_name> <in-file>"
    description = "Trains a chord labeling model using the given "\
        "input data. The data file may be a stored SequenceIndex file, or "\
        "any other type of bulk data file."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option('-p', '--partitions', dest="partitions", action="store", type="int", help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.")
    parser.add_option('--opts', dest="training_opts", action="append", help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.")
    # File input options
    parser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Same filetypes as jazzparser", default='bulk-db')
    parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.")
    # Logging output
    parser.add_option('--log', dest="log", action="store", help="file to output training logs to. Specify a base filename; <modelname>.log will be added to the end")
    options, arguments = parse_args_with_config(parser)
    
    grammar = Grammar()
    
    # Handle any training options that were given on the command line
    if options.training_opts is None:
        training_opts = {}
    elif "help" in [opt.lower() for opt in options.training_opts]:
        print options_help_text(HPChordLabeler.TRAINING_OPTIONS, intro="Training options:")
        sys.exit(0)
    else:
        training_opts = ModuleOption.process_option_string(options.training_opts)
        
    if len(arguments) < 2:
        print >>sys.stderr, "You must specify a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[1])
    model_name = arguments[0]
    
    # Load the sequence data
    # Only allow bulk types
    input_data = command_line_input(filename=filename, 
                                    filetype=options.filetype, 
                                    options=options.file_options,
                                    allowed_types=get_input_type_names(single=False, bulk=True))
    
    # Only partition the chord data, not the MIDI data
    if options.partitions is not None and not \
            (isinstance(input_data, MidiTaggerTrainingBulkInput) and \
             input_data.chords is not None):
        print >>sys.stderr, "Can only partition chord data and no chord data "\
            "was supplied"
        sys.exit(1)
    
    if options.partitions:
        # The input includes chord training data
        parts = input_data.chords.get_partitions(options.partitions)[1]
        models = [("%s%d" % (model_name,num),chord_data) \
            for num,chord_data in enumerate(parts)]
    else:
        models = [(model_name,None)]
    
    for part_name,chord_data in models:
        if options.log is not None:
            # Prepare a logger
            logfile = "%s%s.log" % (options.log, part_name)
            print "Logging output to file %s" % logfile
            logger = create_logger(filename=logfile)
        else:
            logger = None
        
        # Create a fresh model with this name
        model = HPChordLabeler.train(input_data, part_name, 
                                     logger=logger, 
                                     options=training_opts,
                                     chord_data=chord_data)
        print "Trained model %s" % (part_name)
Ejemplo n.º 11
0
def main():
    usage = "%prog [options] <model_name> <in-file>"
    description = "Loads a chord labeling model and uses it to assign chord "\
        "labels to the given MIDI file."
    parser = OptionParser(usage=usage, description=description)
    # File input options
    parser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Same filetypes as jazzparser", default='segmidi')
    parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.")
    # Labeling options
    parser.add_option("--labeler-options", "--lopt", dest="labeler_options", action="append", help="options for the labeler. Type '--lopt help' for a list of available options.")
    parser.add_option("--no-key", "--nk", dest="no_key", action="store_true", help="merge together labels with the same key (same as --lopt nokey)")
    # Output options
    parser.add_option("--single", "-1", dest="single", action="store_true", help="show only one chord per time segment (same as --lopt n=1, but formats the output in a simpler way)")
    parser.add_option('-r', '--realize', dest="realize", action="store", help="realize the chord sequence as a midi file, overlaid on the input")
    parser.add_option('--chords-only', dest="chords_only", action="store_true", help="only realize the chords: don't overlay on the input midi (only works with -r)")
    options, arguments = parse_args_with_config(parser)
    
    if options.labeler_options is not None and "help" in options.labeler_options:
        print options_help_text(HPChordLabeler.LABELING_OPTIONS, intro="Options for HP chord labeler")
        sys.exit(0)
        
    if len(arguments) < 2:
        print >>sys.stderr, "You must specify a model name and an input "\
            "(MIDI) data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[1])
    model_name = arguments[0]
    
    # Process the labeler options
    lopt_dict = ModuleOption.process_option_string(options.labeler_options)
    if options.single:
        # No point in getting more than one label, since we only display one
        lopt_dict['n'] = 1
    if options.no_key:
        # Just set the nokey option
        lopt_dict['nokey'] = True
    
    # Check they're valid before doing anything else
    HPChordLabeler.process_labeling_options(lopt_dict)
    
    input_data = command_line_input(filename, 
                                    filetype=options.filetype, 
                                    options=options.file_options,
                                    allowed_types=['segmidi','bulk-segmidi'])
    bulk = not is_bulk_type(type(input_data))
    if bulk:
        input_data = [input_data]
        
    for i,data in enumerate(input_data):
        input_stream = data.stream
        print "Read midi data in %d segments" % len(data)
        
        # Load the model
        model = HPChordLabeler.load_model(model_name)
        # Perform labeling
        labels = model.label(data, options=lopt_dict)
        # Try labeling as it will be passed to the tagger
        labs = model.label_lattice(data, options=lopt_dict)
        
        if options.single:
            # Special output for single label output
            print ", ".join(["%s" % timelabs[0][0] for timelabs in labels])
        else:
            # Print out the labels for each timestep
            for time,timelabs in enumerate(labels):
                print "%d: %s" % (time, 
                    ", ".join(["%s (%.2e)" % (label,prob) for (label,prob) in timelabs]))
        
        if options.realize is not None:
            # Get the single best chord label for each time
            best_labels = [timelabs[0][0] for timelabs in labels]
            # Realize as a midi file
            print "Realizing output chord sequence"
            real = ChordSequenceRealizer(best_labels, 
                                         model.chord_vocab, 
                                         resolution=input_stream.resolution, 
                                         chord_length=data.time_unit,
                                         text_events=True)
            if options.chords_only:
                # Don't overlay
                stream = real.generate(offset=data.tick_offset)
            else:
                stream = real.generate(overlay=input_stream, offset=data.tick_offset)
                
            if bulk:
                filename = "%s-%d" % (options.realize, i)
            else:
                filename = options.realize
            write_midifile(stream, filename)
Ejemplo n.º 12
0
def main():
    usage = "%prog [options] <model-type> <model_name> <in-file>"
    description = "Trains a supertagging model using the given "\
        "input data. Specify a model type (baseline1, etc) and a name to "\
        "identify it. The data file may be a stored SequenceIndex file, or "\
        "any other type of bulk data file. "\
        "This can only be used with the follow types of models: %s" % ", ".join(TRAINABLE_MODELS)
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        '-p',
        '--partitions',
        dest="partitions",
        action="store",
        type="int",
        help=
        "train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number."
    )
    parser.add_option(
        '--opts',
        dest="training_opts",
        action="store",
        help=
        "options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type."
    )
    # File input options
    parser.add_option(
        "--filetype",
        "--ft",
        dest="filetype",
        action="store",
        help=
        "select the file type for the input file. Same filetypes as jazzparser",
        default='bulk-db')
    parser.add_option(
        "--file-options",
        "--fopt",
        dest="file_options",
        action="store",
        help=
        "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options."
    )
    # Logging output
    parser.add_option(
        '--log',
        dest="log",
        action="store",
        help=
        "file to output training logs to. Specify a base filename; <modelname>.log will be added to the end"
    )
    options, arguments = parse_args_with_config(parser)

    grammar = Grammar()

    # Get the model type first: we might not need the other args
    if len(arguments) == 0:
        print >> sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
    model_type = arguments[0]

    if model_type not in TRAINABLE_MODELS:
        print >>sys.stderr, "'%s' is not a valid model type. Available taggers are: %s" % \
            (model_type, ", ".join(TRAINABLE_MODELS))
        sys.exit(1)
    if model_type not in TAGGERS:
        print >>sys.stderr, "'%s' isn't a registered model type. Check that "\
            "the name in TRAINABLE_MODELS is correct" % model_type
        sys.exit(1)

    tagger_cls = get_tagger(model_type)
    if not issubclass(tagger_cls, ModelTagger):
        print >> sys.stderr, "'%s' tagger cannot be trained with this script. Only model taggers can be." % (
            tagger_cls.__name__)
        sys.exit(1)
    model_cls = tagger_cls.MODEL_CLASS

    # Handle any training options that were given on the command line
    if options.training_opts is None:
        training_opts = {}
    elif options.training_opts.lower() == "help":
        print options_help_text(model_cls.TRAINING_OPTIONS,
                                intro="Training options for %s" %
                                model_cls.__name__)
        sys.exit(0)
    else:
        training_opts = ModuleOption.process_option_dict(
            ModuleOption.process_option_string(options.training_opts),
            model_cls.TRAINING_OPTIONS)

    # Get the rest of the args
    if len(arguments) < 3:
        print >> sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[2])
    model_name = arguments[1]

    # Load the sequence data
    # Only allow bulk types
    input_data = command_line_input(filename=filename,
                                    filetype=options.filetype,
                                    options=options.file_options,
                                    allowed_types=get_input_type_names(
                                        single=False, bulk=True))

    if options.partitions is not None and options.partitions > 1:
        parts = input_data.get_partitions(options.partitions)[1]
        models = [(tagger_cls.partition_model_name(model_name,num),seqs) for \
                                                num,seqs in enumerate(parts)]
    else:
        models = [(model_name, input_data)]

    for part_name, seqs in models:
        # Instantiate a fresh model with this name
        model = model_cls(part_name, options=training_opts)
        if options.log is not None:
            # Prepare a logger
            logfile = "%s%s.log" % (options.log, part_name)
            print "Logging output to file %s" % logfile
            logger = create_logger(filename=logfile)
        else:
            logger = None

        # Train the model with the loaded data
        model.train(seqs, logger=logger)
        model.save()
        print "Trained model %s" % (part_name)
Ejemplo n.º 13
0
     toptstr = ":".join(toptstr)
 else:
     toptstr = ""
 topts = ModuleOption.process_option_string(toptstr)
 # Check that the options are valid
 try:
     tagger_cls.check_options(topts)
 except ModuleOptionError, err:
     print "Problem with tagger options (--topt): %s" % err
     return 1
 
 ############################ Input processing #####################
 stdinput = False
 # Try getting a file from the command-line options
 input_data = command_line_input(filename=options.file, 
                                 filetype=options.filetype,
                                 options=options.file_options)
 # Record progress in this for helpful output
 if input_data is None:
     # No input file: process command line input
     input_string = " ".join(clinput)
     input_list = [input_string]
     name_getter = iter(["commandline"])
     # Take input from stdin if nothing else is given
     if len(input_string) == 0:
         stdinput = True
         # Use integers to identify each input
         name_getter = count()
         num_inputs = None
     else:
         num_inputs = 1
Ejemplo n.º 14
0
        toptstr = ":".join(toptstr)
    else:
        toptstr = ""
    topts = ModuleOption.process_option_string(toptstr)
    # Check that the options are valid
    try:
        tagger_cls.check_options(topts)
    except ModuleOptionError, err:
        print "Problem with tagger options (--topt): %s" % err
        return 1

    ############################ Input processing #####################
    stdinput = False
    # Try getting a file from the command-line options
    input_data = command_line_input(filename=options.file,
                                    filetype=options.filetype,
                                    options=options.file_options)
    # Record progress in this for helpful output
    if input_data is None:
        # No input file: process command line input
        input_string = " ".join(clinput)
        input_list = [input_string]
        name_getter = iter(["commandline"])
        # Take input from stdin if nothing else is given
        if len(input_string) == 0:
            stdinput = True
            # Use integers to identify each input
            name_getter = count()
            num_inputs = None
        else:
            num_inputs = 1
Ejemplo n.º 15
0
 try:
     DirectedCkyParser.check_options(popts)
 except ModuleOptionError, err:
     logger.error("Problem with parser options (--popt): %s" % err)
     sys.exit(1)
 
 if len(arguments) < 3:
     print >>sys.stderr, "Specify a song corpus name, a chord labeling "\
         "model name, and a file to read midi data from"
     sys.exit(1)
 
 # First argument is an TonalSpaceAnalysisSet
 corpus_filename = arguments[0]
 # Load the corpus file
 corpus = command_line_input(corpus_filename, 
                                 filetype='bulk-db', 
                                 options="")
 
 # The rest of the args are midi files to analyze
 filename = arguments[2]
 input_data = command_line_input(filename, 
                                 filetype=options.filetype, 
                                 options=options.file_options,
                                 allowed_types=['segmidi', 'bulk-segmidi'])
 if isinstance(input_data, SegmentedMidiInput):
     # Single input
     input_data = [input_data]
 
 
 # Work out how many results to print out
 if options.print_results == -1:
Ejemplo n.º 16
0
def main():
    usage = "%prog [options] <model-type> <model_name> <in-file>"
    description = "Trains a backoff builder model using the given "\
        "input data. Specify a model type (ngram, etc) and a name to "\
        "identify it. The data file should be a stored SequenceIndex file."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        '-p',
        '--partitions',
        dest="partitions",
        action="store",
        type="int",
        help=
        "train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number."
    )
    parser.add_option(
        '--opts',
        dest="training_opts",
        action="store",
        help=
        "options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type."
    )
    # File input options
    parser.add_option(
        "--filetype",
        "--ft",
        dest="filetype",
        action="store",
        help=
        "select the file type for the input file. Same filetypes as jazzparser",
        default='bulk-db')
    parser.add_option(
        "--file-options",
        "--fopt",
        dest="file_options",
        action="store",
        help=
        "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options."
    )
    options, arguments = parse_args_with_config(parser)

    if len(arguments) < 3:
        print >> sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[2])
    model_type = arguments[0]
    model_name = arguments[1]

    builder_cls = get_backoff_builder(model_type)
    model_cls = builder_cls.MODEL_CLASS

    # Load the sequence data from a dbinput file
    input_data = command_line_input(
        filename=filename,
        filetype=options.filetype,
        options=options.file_options,
        allowed_types=['bulk-db', 'bulk-db-annotated'])

    # Handle any training options that were given on the command line
    if options.training_opts is None:
        training_opts = {}
    elif options.training_opts.lower() == "help":
        print options_help_text(model_cls.TRAINING_OPTIONS,
                                intro="Training options for %s" %
                                model_cls.__name__)
        sys.exit(0)
    else:
        training_opts = ModuleOption.process_option_dict(
            ModuleOption.process_option_string(options.training_opts),
            model_cls.TRAINING_OPTIONS)

    if options.partitions is not None:
        parts = holdout_partition(input_data, options.partitions)
        models = [(builder_cls.partition_model_name(model_name,num),seqs) for \
                        num,seqs in enumerate(parts)]
    else:
        models = [(model_name, input_data)]

    for part_name, seqs in models:
        # Instantiate a fresh model with this name
        model = model_cls(part_name, options=training_opts)
        # Train it with the loaded data
        model.train(seqs)
        model.save()
        print "Trained model %s" % (part_name)