예제 #1
0
 You should have received a copy of the GNU General Public License
 along with The Jazz Parser.  If not, see <http://www.gnu.org/licenses/>.
 
============================ End license ======================================

"""
__author__ = "Mark Wilding <*****@*****.**>"

import sys
import logging
from optparse import OptionParser

from django.db.models import Q

from jazzparser.utils.loggers import init_logging
init_logging(logging.INFO)
# Get the logger from the logging system
logger = logging.getLogger("main_logger")

from apps.sequences.datautils import save_pickled_data


def main():
    usage = "%prog <out-file>"
    parser = OptionParser(usage=usage)
    parser.add_option(
        "-r",
        "--reannotated",
        dest="reannotated",
        action="store_true",
        help="include sequences that are reannotations of others")
예제 #2
0
def prepare_evaluation_options(usage=None, description=None, 
        optparse_options=[], check_args=None, optparse_groups=[]):
    """
    Various tasks common to the initial part of the evaluation routine
    scripts (C{models/eval.py}).
    
    @todo: This is not used any more. Remove it, after checking it's definitely 
        not used.
    
    @param usage: the optparse usage string
    @param description: the optparse description string
    @type optparse_options: list of tuples
    @param optparse_options: (args,kwargs) pairs to add additional 
        options to the optparse parser.
    @type check_args: function
    @param check_args: function to take the command-line arguments and 
        check them. This will be called early in the script. Must 
        return a tuple of (1) the model name (or model basename) that 
        will be used in the partition model names and (2) the input 
        filename to get sequences from.
    @type optparse_groups: list of pairs
    @param optparse_groups: specificatios for option groups to add to the 
        optparse option parser. The first of each pair is a tuple of 
        args to C{OptionGroup}'s init (excluding the first). 
        The second is a list of options 
        each formatted as C{optparse_options}.
        
    @rtype: tuple
    @return: (1) list of (sequences,model_name,partition_index) tuples
        for each partition; (2) list of lists containing the sequence 
        ids for each partition; (3) optparse options; (4) optparse 
        arguments.
    
    """
    import sys
    from optparse import OptionParser, OptionGroup
    from jazzparser.utils.config import parse_args_with_config
    from jazzparser.utils.loggers import init_logging
    from jazzparser.data.db_mirrors import SequenceIndex
    from jazzparser.utils.data import partition
    
    parser = OptionParser(usage=usage, description=description)
    group = OptionGroup(parser, "Input", "Input data and partitioning for evaluation")
    group.add_option("-s", "--sequence", dest="sequence", action="store", help="limit the evaluation to just one sequence, with the given index in the input file")
    group.add_option("--partition", dest="partition", action="store", help="restrict to only one partition of the data. Specify as i/n, where i is the partition number and n the total number of partitions.")
    group.add_option("-p", "--partitions", dest="partitions", type="int", action="store", help="test on all n partitions of the data, using a different model for each. Will look for a model <NAME>i, where <NAME> is the given model name and i the partition number.")
    parser.add_option_group(group)
    
    parser.add_option("--debug", dest="debug", action="store_true", help="show debugging output")
    
    # Add the options according to their specs
    for args,kwargs in optparse_options:
        parser.add_option(*args, **kwargs)
        
    # Add groups and their options
    for group_args,options in optparse_groups:
        # Check whether the group already exists
        same_titles = [g for g in parser.option_groups if g.title == group_args[0]]
        if same_titles:
            group = same_titles[0]
        else:
            group = OptionGroup(parser, *group_args)
            parser.add_option_group(group)
        # Add options to this group
        for args,kwargs in options:
            group.add_option(*args, **kwargs)
    options, arguments = parse_args_with_config(parser)
    
    if check_args is None:
        raise ValueError, "could not check arguments and get model "\
            "name. check_args must not be None"
    model_name,input_filename = check_args(arguments)
        
    if options.debug:
        # Set the log level to debug and do the standard logging init
        init_logging(logging.DEBUG)
    else:
        init_logging()
        
    # Load up sequences
    seqs = SequenceIndex.from_file(input_filename)
        
    def _get_seq_by_index(index):
        seq = seqs.sequence_by_index(index)
        if seq is None:
            print >>sys.stderr, "There are only %d sequences" % len(seqs)
            sys.exit(1)
        return seq
    
    ################ Data partitioning ####################
    if options.partitions is not None:
        # Divide the data up into n partitions and use a different model name for each
        total_parts = options.partitions
        print >>sys.stderr, "Cross validation: dividing test data into %d partitions" % total_parts
        partitions = [(part,"%s%d" % (model_name,i), i) for i,part in enumerate(partition(seqs.sequences, total_parts))]
        part_ids = partition(seqs.ids, total_parts)
    elif options.partition is not None:
        # Just select one partition
        # Split up the argument to get two integers
        parti,total_parts = options.partition.split("/")
        parti,total_parts = int(parti), int(total_parts)
        print >>sys.stderr, "Restricting sequences to %d-way partition %d" % (total_parts,parti)
        # Get a list of sequence indices to restrict our set to
        part_ids = partition(seqs.ids, total_parts)[parti]
        partitions = [ [(part,"%s%d" % (model_name,i), i) for i,part in enumerate(partition(seqs.sequences, total_parts))][parti] ]
    elif options.sequence is not None:
        # Just select one sequence
        seq = _get_seq_by_index(int(options.sequence))
        partitions = [( [seq], model_name, 0 )]
        part_ids = [seq.id]
    else:
        # Don't partition the sequences
        partitions = [(seqs.sequences, model_name,0)]
        part_ids = [None]
    
    return partitions,part_ids,options,arguments
예제 #3
0
 You should have received a copy of the GNU General Public License
 along with The Jazz Parser.  If not, see <http://www.gnu.org/licenses/>.
 
============================ End license ======================================

"""
__author__ = "Mark Wilding <*****@*****.**>" 

import sys
import logging
from optparse import OptionParser

from django.db.models import Q

from jazzparser.utils.loggers import init_logging
init_logging(logging.INFO)
# Get the logger from the logging system
logger = logging.getLogger("main_logger")

from apps.sequences.datautils import save_pickled_data

def main():
    usage = "%prog <out-file>"
    parser = OptionParser(usage=usage)
    parser.add_option("-r", "--reannotated", dest="reannotated", action="store_true", help="include sequences that are reannotations of others")
    parser.add_option("-p", "--partial", dest="partial", action="store_true", help="include sequences that are only partly annotated")
    parser.add_option("-n", "--no-names", dest="no_names", action="store_true", help="obscure names of the chord sequences")
    options, arguments = parser.parse_args()
    
    if len(arguments) == 0:
        print >>sys.stderr, "You must specify an output file as the first argument"
예제 #4
0
def main():
    set_proc_title("jazzparser")
    ########################################################
    usage = "jazzparser [<options>]"
    description = "The main parser interface for the Jazz Parser"
    ## Process the input options
    optparser = OptionParser(usage=usage, description=description)
    ###
    # File input options
    group = OptionGroup(optparser, "Input", "Input type and location")
    optparser.add_option_group(group)
    group.add_option("--file", "-f", dest="file", action="store", help="use a file to get parser input from. Use --filetype to specify the type of the file.")
    group.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file (--file). Use '--filetype help' for a list of available types. Default: chords", default='chords')
    group.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file (--file). Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.")
    group.add_option("--index", "--indices", dest="input_index", action="store", help="select individual inputs to process. Specify as a comma-separated list of indices. All inputs are loaded as usual, but only the ith input is processed, for each i in the list")
    group.add_option("--only-load", dest="only_load", action="store_true", help="don't do anything with the inputs, just load and list them. Handy for checking the inputs load and getting their indices")
    group.add_option("--partitions", dest="partitions", action="store", type="int", help="divide the input data into this number of partitions and use a different set of models for each. For any parser, tagger and backoff that takes a 'model' argument, the partition number will be appended to the given value")
    group.add_option("--seq-parts", "--sequence-partitions", dest="sequence_partitions", action="store", help="use a chord sequence index to partition the inputs. Input type (bulk) must support association of the inputs with chord sequences by id. Sequences in the given sequence index file are partitioned n ways (--partitions) and the inputs are processed according to their associated sequence.")
    group.add_option("--continue", "--skip-done", dest="skip_done", action="store_true", help="skip any inputs for which a readable results file already exists. This is useful for continuing a bulk job that was stopped in the middle")
    ###
    group = OptionGroup(optparser, "Parser", "Parser, supertagger and backoff parser")
    optparser.add_option_group(group)
    group.add_option("-d", "--derivations", dest="derivations", action="store_true", help="keep derivation logs during parse.")
    group.add_option("-g", "--grammar", dest="grammar", action="store", help="use the named grammar instead of the default.")
    # Parser options
    group.add_option("-p", "--parser", dest="parser", action="store", help="use the named parser algorithm instead of the default. Use '-p help' to see the list of available parsers. Default: %s" % settings.DEFAULT_PARSER, default=settings.DEFAULT_PARSER)
    group.add_option("--popt", "--parser-options", dest="popts", action="append", help="specify options for the parser. Type '--popt help', using '--parser <name>' to select a parser module, to get a list of options.")
    # Tagger options
    group.add_option("-t", "--tagger", "--supertagger", dest="supertagger", action="store", help="run the parser using the named supertagger. Use '-t help' to see the list of available taggers. Default: %s" % settings.DEFAULT_SUPERTAGGER, default=settings.DEFAULT_SUPERTAGGER)
    group.add_option("--topt", "--tagger-options", dest="topts", action="append", help="specify options for the tagger. Type '--topt help', using '-u <name>' to select a tagger module, to get a list of options.")
    # Backoff options
    group.add_option("-b", "--backoff", "--noparse", dest="backoff", action="store", help="use the named backoff model as a backoff if the parser produces no results")
    group.add_option("--bopt", "--backoff-options", "--backoff-options", "--npo", dest="backoff_opts", action="append", help="specify options for the  backoff model. Type '--npo help', using '--backoff <name>' to select a backoff modules, to get a list of options.")
    ###
    # Multiprocessing options
    group = OptionGroup(optparser, "Multiprocessing")
    optparser.add_option_group(group)
    group.add_option("--processes", dest="processes", action="store", type="int", help="number of processes to create to perform parses in parallel. Default: 1, i.e. no process pool. Use -1 to create a process for every input", default=1)
    ###
    # Output options
    group = OptionGroup(optparser, "Output")
    optparser.add_option_group(group)
    group.add_option("--output", dest="output", action="store", help="directory name to output parse results to. A filename specific to the individual input will be appended to this")
    group.add_option("--topn", dest="topn", action="store", type="int", help="limit the number of final results to store in the output file to the top n by probability. By default, stores all")
    group.add_option("--output-opts", "--oopts", dest="output_opts", action="store", help="options that affect the output formatting. Use '--output-opts help' for a list of options.")
    group.add_option("-a", "--atomic-results", dest="atoms_only", action="store_true", help="only include atomic categories in the results.")
    group.add_option("-l", "--latex", dest="latex", action="store_true", help="output all results as Latex source. Used to produce a whole Latex document, but doesn't any more")
    group.add_option("--all-times", dest="all_times", action="store_true", help="display all timing information on semantics in output.")
    group.add_option("-v", "--debug", dest="debug", action="store_true", help="output verbose debugging information.")
    group.add_option("--time", dest="time", action="store_true", help="time how long the parse takes and output with the results.")
    group.add_option("--no-results", dest="no_results", action="store_true", help="don't print out the parse results at the end. Obviously you'll want to make sure they're going to a file (--output). This is useful for bulk parse jobs, where the results produce a lot of unnecessary output")
    group.add_option("--no-progress", dest="no_progress", action="store_true", help="don't output the summary of completed sequences after each one finishes")
    ###
    # Output analysis and harmonical
    group = OptionGroup(optparser, "Output processing", "Output analysis and harmonical")
    optparser.add_option_group(group)
    group.add_option("--harmonical", dest="harmonical", action="store", help="use the harmonical to play the chords justly intoned according to the top result and output to a wave file.")
    group.add_option("--enharmonical", dest="enharmonical", action="store", help="use the harmonical to play the chords in equal temperament and output to a wave file.")
    group.add_option("--midi", dest="midi", action="store_true", help="generate MIDI files from the harmonical, instead of wave files.")
    group.add_option("--tempo", dest="tempo", action="store", type=int, help="tempo to use for the generated music (see --harmonical/--enharmonical). Default: 120", default=120)
    group.add_option("--lh-analysis", dest="lh_analysis", action="store_true", help="output the Longuet-Higgins space interpretation of the semantics for each result.")
    group.add_option("--lh-coordinates", dest="lh_coord", action="store_true", help="like lh-analysis, but displays the coordinates of the points instead of their names.")
    ###
    # Logging options
    group = OptionGroup(optparser, "Logging")
    optparser.add_option_group(group)
    group.add_option("--long-progress", dest="long_progress", action="store_true", help="print a summary of the chart so far after each chord/word has been processed.")
    group.add_option("--progress", "--short-progress", dest="short_progress", action="store_true", help="print a small amount of information out during parsing to indicate progress.")
    group.add_option("--logger", dest="logger", action="store", help="directory to put parser logging in. A filename based on an identifier for each individual input will be appended.")
    ###
    # Shell options
    group = OptionGroup(optparser, "Shell", "Interactive shell for inspecting results and parser state")
    optparser.add_option_group(group)
    group.add_option("-i", "--interactive", dest="interactive", action="store_true", help="enter interactive mode after parsing.")
    group.add_option("--error", dest="error_shell", action="store_true", help="catch any errors, report them and then enter the interactive shell. This also catches keyboard interrupts, so you can use it to halt parsing and enter the shell.")
    
    # Read in command line options and args
    options, clinput = parse_args_with_config(optparser)

    ########################### Option processing ####################
    
    # Get log level option first, so we can start using the logger
    if options.debug:
        log_level = logging.DEBUG
    else:
        log_level = logging.INFO
    # Set up a logger
    init_logging(log_level)
    
    if options.latex:
        settings.OPTIONS.OUTPUT_LATEX = True
    
    if options.logger:
        # Directory
        parse_logger_dir = options.logger
        check_directory(parse_logger_dir)
    else:
        parse_logger_dir = None
    
    ######## Grammar ########
    # Check the grammar actually exists
    grammar_names = get_grammar_names()
    if options.grammar is not None and options.grammar not in grammar_names:
        # This is not a valid grammar name
        logger.error("The grammar '%s' does not exist. Possible "\
            "grammars are: %s." % (options.grammar, ", ".join(grammar_names)))
        return 1
    grammar = get_grammar(options.grammar)
        
    ######## Parser ########
    # Load the requested parser
    from jazzparser.parsers import PARSERS
    if options.parser.lower() == "help":
        print "Available parsers are: %s" % ", ".join(PARSERS)
        return 0
    try:
        parser_cls = get_parser(options.parser)
    except ParserLoadError:
        logger.error("The parser '%s' could not be loaded. Possible "\
            "parsers are: %s" % (options.parser, ", ".join(PARSERS)))
        return 1
        
    # Get parser options
    if options.popts is not None:
        poptstr = options.popts
        if "help" in [s.strip().lower() for s in poptstr]:
            # Output this tagger's option help
            from jazzparser.utils.options import options_help_text
            print options_help_text(parser_cls.PARSER_OPTIONS, intro="Available options for selected parser")
            return 0
        poptstr = ":".join(poptstr)
    else:
        poptstr = ""
    popts = ModuleOption.process_option_string(poptstr)
    # Check that the options are valid
    try:
        parser_cls.check_options(popts)
    except ModuleOptionError, err:
        logger.error("Problem with parser options (--popt): %s" % err)
        return 1