コード例 #1
0
def main():
    parser = OptionParser()
    parser.add_option(
        "-t",
        "--tagger",
        dest="tagger",
        action="store_true",
        help=
        "The tagger component to use (full python path to the tagger class). Default: %s"
        % DEFAULT_TAGGER)
    options, arguments = parser.parse_args()

    if options.tagger is not None:
        tagger = options.tagger
    else:
        tagger = DEFAULT_TAGGER

    # Use the default grammar
    grammar = Grammar()
    tagger_class = get_tagger(tagger)

    total_entropy = 0.0
    total_chords = 0
    # Compile the data for displaying in a table
    data = []
    for sequence in ChordSequence.objects.filter(analysis_omitted=False):
        print "Analyzing entropy of model on %s" % sequence.name
        # Calculate the total word-level entropy of this sequence
        sequence_chords = list(sequence.iterator())
        entropy, sequence_length = sequence_entropy(sequence_chords, grammar,
                                                    tagger_class)
        data.append({
            'name':
            sequence.name.encode('ascii', 'replace'),
            'entropy':
            entropy,
            'length':
            sequence_length,
            'entropy_per_chord':
            (sequence_length != 0 and (entropy / sequence_length) or 0.0),
        })
        if sequence_length:
            total_entropy += entropy
            total_chords += sequence_length

    # Display a table of the results
    table_data = [['Sequence', 'Entropy', 'Chords', 'Entropy per chord']] + [[
        d['name'],
        "%.4f" % d['entropy'],
        "%d" % d['length'],
        "%.4f" % d['entropy_per_chord']
    ] for d in data]
    pprint_table(sys.stdout, table_data, [True, False, False, False])
    # Calculate the perplexity over the whole set
    perplexity = math.pow(2, total_entropy / total_chords)
    print "### Entropy per chord: %.4f" % (total_entropy / total_chords)
    print "### Perplexity = %.4f" % perplexity
コード例 #2
0
def main():
    parser = OptionParser()
    parser.add_option("-t", "--tagger", dest="tagger", action="store_true", help="The tagger component to use (full python path to the tagger class). Default: %s" % DEFAULT_TAGGER)
    options, arguments = parser.parse_args()
    
    if options.tagger is not None:
        tagger = options.tagger
    else:
        tagger = DEFAULT_TAGGER
    
    # Use the default grammar
    grammar = Grammar()
    tagger_class = get_tagger(tagger)
    
    total_entropy = 0.0
    total_chords = 0
    # Compile the data for displaying in a table
    data = []
    for sequence in ChordSequence.objects.filter(analysis_omitted=False):
        print "Analyzing entropy of model on %s" % sequence.name
        # Calculate the total word-level entropy of this sequence
        sequence_chords = list(sequence.iterator())
        entropy,sequence_length = sequence_entropy(sequence_chords, grammar, tagger_class)
        data.append( {
            'name' : sequence.name.encode('ascii', 'replace'),
            'entropy' : entropy,
            'length' : sequence_length,
            'entropy_per_chord' : (sequence_length!=0 and (entropy/sequence_length) or 0.0),
        })
        if sequence_length:
            total_entropy += entropy
            total_chords += sequence_length
    
    # Display a table of the results
    table_data = [['Sequence', 'Entropy', 'Chords', 'Entropy per chord']] + [
        [ d['name'], "%.4f" % d['entropy'], "%d" % d['length'], "%.4f" % d['entropy_per_chord'] ] 
            for d in data ]
    pprint_table(sys.stdout, table_data, [True, False, False, False])
    # Calculate the perplexity over the whole set
    perplexity = math.pow(2, total_entropy/total_chords)
    print "### Entropy per chord: %.4f" % (total_entropy/total_chords)
    print "### Perplexity = %.4f" % perplexity
コード例 #3
0
def main():
    usage = "%prog <model-type> <command> [<command-arg> ...]"
    description = (
        "Provides administrative operations for trained "
        "tagger models. Use the 'help' command to get a list "
        "of commands or command usage."
    )
    parser = OptionParser(usage=usage, description=description)
    options, arguments = parser.parse_args()

    if len(arguments) < 2:
        print >> sys.stderr, "You must specify a model type and a command"
        sys.exit(1)
    model_type = arguments[0]
    command = arguments[1].lower()
    command_args = arguments[2:]

    if model_type not in TAGGERS:
        print >> sys.stderr, "'%s' isn't a registered model type. Check that " "the name  is correct" % model_type
        sys.exit(1)

    tagger_cls = get_tagger(model_type)
    if not issubclass(tagger_cls, ModelTagger):
        print >> sys.stderr, "'%s' tagger cannot be modified with this script. Only model taggers can be." % (
            tagger_cls.__name__
        )
        sys.exit(1)
    model_cls = tagger_cls.MODEL_CLASS

    def _load_model(name):
        # Load the model from its file
        return model_cls.load_model(name)

    # Define behaviour for each command
    list_help = "Lists all the trained models available"

    def _list(args):
        # List the available models for the given model type
        models = model_cls.list_models()
        print "Available models for %s:" % model_cls.MODEL_TYPE
        print ", ".join(list(sorted(group_numerical_suffixes(models))))

    desc_help = "Outputs the descriptive text associated with the model at training time"

    def _desc(args):
        if len(args) == 0:
            raise CommandError, "desc requires a model name as an argument"
        try:
            model = _load_model(args[0])
        except ModelLoadError, err:
            # Try loading a model with 0 on the end - allows you to use
            #  just the base name for crossval models
            try:
                model = _load_model("%s0" % args[0])
                print >> sys.stderr, "No model %s, but %s0 does exist\n" % (args[0], args[0])
            except ModelLoadError:
                print >> sys.stderr, "No model %s or %s0\n" % (args[0], args[0])
                raise err
        print "Model descriptor"
        print "================"
        print model.description
コード例 #4
0
ファイル: train.py プロジェクト: johndpope/jazzparser
def main():
    usage = "%prog [options] <model-type> <model_name> <in-file>"
    description = "Trains a supertagging model using the given "\
        "input data. Specify a model type (baseline1, etc) and a name to "\
        "identify it. The data file may be a stored SequenceIndex file, or "\
        "any other type of bulk data file. "\
        "This can only be used with the follow types of models: %s" % ", ".join(TRAINABLE_MODELS)
    parser = OptionParser(usage=usage, description=description)
    parser.add_option('-p', '--partitions', dest="partitions", action="store", type="int", help="train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number.")
    parser.add_option('--opts', dest="training_opts", action="store", help="options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type.")
    # File input options
    parser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file. Same filetypes as jazzparser", default='bulk-db')
    parser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.")
    # Logging output
    parser.add_option('--log', dest="log", action="store", help="file to output training logs to. Specify a base filename; <modelname>.log will be added to the end")
    options, arguments = parse_args_with_config(parser)
    
    grammar = Grammar()
    
    # Get the model type first: we might not need the other args
    if len(arguments) == 0:
        print >>sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
    model_type = arguments[0]
    
    if model_type not in TRAINABLE_MODELS:
        print >>sys.stderr, "'%s' is not a valid model type. Available taggers are: %s" % \
            (model_type, ", ".join(TRAINABLE_MODELS))
        sys.exit(1)
    if model_type not in TAGGERS:
        print >>sys.stderr, "'%s' isn't a registered model type. Check that "\
            "the name in TRAINABLE_MODELS is correct" % model_type
        sys.exit(1)
    
    tagger_cls = get_tagger(model_type)
    if not issubclass(tagger_cls, ModelTagger):
        print >>sys.stderr, "'%s' tagger cannot be trained with this script. Only model taggers can be." % (tagger_cls.__name__)
        sys.exit(1)
    model_cls = tagger_cls.MODEL_CLASS
    
    # Handle any training options that were given on the command line
    if options.training_opts is None:
        training_opts = {}
    elif options.training_opts.lower() == "help":
        print options_help_text(model_cls.TRAINING_OPTIONS, intro="Training options for %s" % model_cls.__name__)
        sys.exit(0)
    else:
        training_opts = ModuleOption.process_option_dict(
                            ModuleOption.process_option_string(options.training_opts), 
                            model_cls.TRAINING_OPTIONS)
    
    # Get the rest of the args
    if len(arguments) < 3:
        print >>sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[2])
    model_name = arguments[1]

    # Load the sequence data
    # Only allow bulk types
    input_data = command_line_input(filename=filename, 
                                    filetype=options.filetype, 
                                    options=options.file_options,
                                    allowed_types=get_input_type_names(single=False, bulk=True))
    
    if options.partitions is not None and options.partitions > 1:
        parts = input_data.get_partitions(options.partitions)[1]
        models = [(tagger_cls.partition_model_name(model_name,num),seqs) for \
                                                num,seqs in enumerate(parts)]
    else:
        models = [(model_name,input_data)]
    
    for part_name,seqs in models:
        # Instantiate a fresh model with this name
        model = model_cls(part_name, options=training_opts)
        if options.log is not None:
            # Prepare a logger
            logfile = "%s%s.log" % (options.log, part_name)
            print "Logging output to file %s" % logfile
            logger = create_logger(filename=logfile)
        else:
            logger = None
            
        # Train the model with the loaded data
        model.train(seqs, logger=logger)
        model.save()
        print "Trained model %s" % (part_name)
コード例 #5
0
ファイル: tageval.py プロジェクト: johndpope/jazzparser
def main():
    def _check_args(args):
        if len(args) != 3:
            print >>sys.stderr, "Specify a tagger, model name and input file"
            sys.exit(1)
        return args[1],args[2]
    
    partitions,part_ids,options,arguments = prepare_evaluation_options(
        usage = "%prog [options] <tagger> <model-name> <input-file>",
        description = "Evaluate a tagging model by "\
            "tagging sequences from an input file. If the tagger doesn't "\
            "need a model name, use '-' as the model name.",
        check_args = _check_args,
        optparse_groups = [
            (("Tagging",),
                [(("--topt", "--tagger-options"), 
                    {'dest':"topts", 'action':"append", 'help':"options to pass to the tagger."}),
                ]),
            (("Output",), 
                [(("--no-model-info",), 
                    {'dest':"no_model_info", 'action':"store_true", 'help':"turns of outputing of information about the model being used before using it (useful for identifying output piped to a file later, but may be too verbose sometimes)"}),
                ]),
            (("Evaluation", "Type of evaluation and options"),
                [(("-a", "--agreement"), 
                    {'dest':"agreement", 'action':"store_true", 'help':"instead of doing any parses, just report the agreement of the tops tags with the gold standard tags."}),
                 (("--confusion",), 
                    {'dest':"confusion", 'action':"store_true", 'help':"print out confusion matrix after agreement calculation. Applies only in combination with --agreement"}),
                 (("-e", "--entropy"), 
                    {'dest':"entropy", 'action':"store_true", 'help':"instead of doing any parses, just report the entropy of the returned tag distribution with respect to the gold standard tags."}),
                 (("--tag-stats",), 
                    {'dest':"tag_stats", 'action':"store_true", 'help':"just output stats about the tags that the model assigns to this sequence (or these sequences)"}),
                 (("--topn",), 
                    {'dest':"topn", 'type':"int", 'action':"store", 'help':"when evaluating agreement consider the top N tags the tagger returns. By default, allows only the top one to count as a hit.", 'default':1}),
                ]),
        ],
    )
    
    grammar = Grammar()
    
    tagger_name = arguments[0]
    model_name = arguments[1]
    # Tagger shouldn't use a model in some cases
    no_tagger_model = model_name == "-"
    
    # Load the requested tagger class
    tagger_cls = get_tagger(tagger_name)
    topts = ModuleOption.process_option_string(options.topts)
    
    def _model_info(mname):
        """ Outputs info about the named model """
        if options.no_model_info:
            print >>sys.stderr, "Model %s" % mname
        else:
            # Can only output the nice model info if it's a ModelTagger
            if issubclass(tagger_cls, ModelTagger):
                print >>sys.stderr, "======== Model info ========"
                print >>sys.stderr, tagger_cls.MODEL_CLASS.load_model(mname).description
                print >>sys.stderr, "============================"
            else:
                print >>sys.stderr, "Tagger %s using model %s" % (tagger_cls.__name__, mname)
    
    num_parts = len(partitions)
    num_seqs = sum([len(p[0]) for p in partitions])
    
    ################# Evaluation ########################
    if options.tag_stats:
        raise NotImplementedError, "fix this if you want it"
        # Print out statistics for each partition, with its model
        if no_tagger_model:
            # There could be some circumstance in which we want to do this, 
            #  but I can't think what it is, so I'm not implementing it for now
            print >>sys.stderr, "Cannot run tag_stats with no tagger model"
            sys.exit(1)
        all_stats = {}
        for parti in range(num_parts):
            sequences,model,part_num = partitions[parti]
            # Output the model training info if requested
            _model_info(model)
            ######## This doesn't exist any more
            stats = sequences_top_tags_dict(tagger_cls, model, sequences, topn=options.topn)
            for tag,num in stats.items():
                if tag in all_stats:
                    all_stats[tag] += stats[tag]
                else:
                    all_stats[tag] = stats[tag]
        pprint_table(sys.stdout, list(reversed(sorted(all_stats.items(), key=lambda r:r[1]))), separator="|")
    elif options.agreement:
        # Print out agreement stats for each partition
        if no_tagger_model:
            # Same a tag_stats: probably no need for this ever
            print >>sys.stderr, "Cannot run agreement with no tagger model"
            sys.exit(1)
        correct = 0
        total = 0
        conf_mat = {}
        for parti in range(num_parts):
            sequences,model,part_num = partitions[parti]
            topts['model'] = model
            # Output the model training info if requested
            _model_info(model)
            pcorrect = 0
            ptotal = 0
            # Go through each sequence
            for seq in sequences:
                print >>sys.stderr, "Evaluating %s" % seq.string_name
                input = DbInput.from_sequence(seq)
                correct_tags = [chord.category for chord in seq.iterator()]
                cor,tot = tagger_agreement(input, grammar, tagger_cls, correct_tags, options=topts, confusion_matrix=conf_mat, topn=options.topn)
                pcorrect += cor
                ptotal += tot
                print "  Sequence: %.1f%%" % (float(cor)/tot*100)
                print "  So far: %.1f%%" % (float(pcorrect)/ptotal*100)
            print "Partition %d: %d / %d (%.2f%%)" % (part_num, pcorrect, ptotal, (float(pcorrect)/ptotal*100))
            correct += pcorrect
            total += ptotal
        if num_parts > 1:
            # Print out the overall stats
            print "%d / %d (%f%%)" % (correct,total,(float(correct)/total*100))
        if options.confusion:
            confusion_matrix(conf_mat) 
    elif options.entropy:
        print "Calculating cross-entropy of tagger with gold standard tags"
        entropy = 0.0
        num_chords = 0
        for parti in range(num_parts):
            sequences,model,part_num = partitions[parti]
            if not no_tagger_model:
                topts['model'] = model
                # Output the model training info if requested
                _model_info(model)
            pentropy = 0.0
            pnum_chords = 0
            # Compute the entropy for the partition model
            for seq in sequences:
                print >>sys.stderr, "Evaluating %s" % seq.string_name
                input = " ".join([str(chord) for chord in seq.iterator()])
                correct_tags = [chord.category for chord in seq.iterator()]
                ent,crds = tagger_entropy(input, grammar, tagger_cls, correct_tags, options=topts)
                pentropy += ent
                pnum_chords += crds
                print "   %f bits per chord" % (ent/crds)
            print "Partition %d: %f bits per chord (%d chords)" % (part_num, (pentropy/pnum_chords), pnum_chords)
            entropy += pentropy
            num_chords += pnum_chords
        # Print out the stats for all partitions together
        if num_parts > 1:
            print "%f bits per chord (%d chords)" % ((entropy/num_chords), num_chords)
    else:
        print >>sys.stderr, "Select an evaluation operation with one of the options"
        sys.exit(1)
コード例 #6
0
ファイル: admin.py プロジェクト: johndpope/jazzparser
def main():
    usage = "%prog <model-type> <command> [<command-arg> ...]"
    description = "Provides administrative operations for trained "\
                "tagger models. Use the 'help' command to get a list "\
                "of commands or command usage."
    parser = OptionParser(usage=usage, description=description)
    options, arguments = parser.parse_args()

    if len(arguments) < 2:
        print >> sys.stderr, "You must specify a model type and a command"
        sys.exit(1)
    model_type = arguments[0]
    command = arguments[1].lower()
    command_args = arguments[2:]

    if model_type not in TAGGERS:
        print >>sys.stderr, "'%s' isn't a registered model type. Check that "\
            "the name  is correct" % model_type
        sys.exit(1)

    tagger_cls = get_tagger(model_type)
    if not issubclass(tagger_cls, ModelTagger):
        print >> sys.stderr, "'%s' tagger cannot be modified with this script. Only model taggers can be." % (
            tagger_cls.__name__)
        sys.exit(1)
    model_cls = tagger_cls.MODEL_CLASS

    def _load_model(name):
        # Load the model from its file
        return model_cls.load_model(name)

    # Define behaviour for each command
    list_help = "Lists all the trained models available"

    def _list(args):
        # List the available models for the given model type
        models = model_cls.list_models()
        print "Available models for %s:" % model_cls.MODEL_TYPE
        print ", ".join(list(sorted(group_numerical_suffixes(models))))

    desc_help = "Outputs the descriptive text associated with the model at training time"

    def _desc(args):
        if len(args) == 0:
            raise CommandError, "desc requires a model name as an argument"
        try:
            model = _load_model(args[0])
        except ModelLoadError, err:
            # Try loading a model with 0 on the end - allows you to use
            #  just the base name for crossval models
            try:
                model = _load_model("%s0" % args[0])
                print >> sys.stderr, "No model %s, but %s0 does exist\n" % (
                    args[0], args[0])
            except ModelLoadError:
                print >> sys.stderr, "No model %s or %s0\n" % (args[0],
                                                               args[0])
                raise err
        print "Model descriptor"
        print "================"
        print model.description
コード例 #7
0
ファイル: train.py プロジェクト: johndpope/jazzparser
def main():
    usage = "%prog [options] <model-type> <model_name> <in-file>"
    description = "Trains a supertagging model using the given "\
        "input data. Specify a model type (baseline1, etc) and a name to "\
        "identify it. The data file may be a stored SequenceIndex file, or "\
        "any other type of bulk data file. "\
        "This can only be used with the follow types of models: %s" % ", ".join(TRAINABLE_MODELS)
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        '-p',
        '--partitions',
        dest="partitions",
        action="store",
        type="int",
        help=
        "train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number."
    )
    parser.add_option(
        '--opts',
        dest="training_opts",
        action="store",
        help=
        "options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type."
    )
    # File input options
    parser.add_option(
        "--filetype",
        "--ft",
        dest="filetype",
        action="store",
        help=
        "select the file type for the input file. Same filetypes as jazzparser",
        default='bulk-db')
    parser.add_option(
        "--file-options",
        "--fopt",
        dest="file_options",
        action="store",
        help=
        "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options."
    )
    # Logging output
    parser.add_option(
        '--log',
        dest="log",
        action="store",
        help=
        "file to output training logs to. Specify a base filename; <modelname>.log will be added to the end"
    )
    options, arguments = parse_args_with_config(parser)

    grammar = Grammar()

    # Get the model type first: we might not need the other args
    if len(arguments) == 0:
        print >> sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
    model_type = arguments[0]

    if model_type not in TRAINABLE_MODELS:
        print >>sys.stderr, "'%s' is not a valid model type. Available taggers are: %s" % \
            (model_type, ", ".join(TRAINABLE_MODELS))
        sys.exit(1)
    if model_type not in TAGGERS:
        print >>sys.stderr, "'%s' isn't a registered model type. Check that "\
            "the name in TRAINABLE_MODELS is correct" % model_type
        sys.exit(1)

    tagger_cls = get_tagger(model_type)
    if not issubclass(tagger_cls, ModelTagger):
        print >> sys.stderr, "'%s' tagger cannot be trained with this script. Only model taggers can be." % (
            tagger_cls.__name__)
        sys.exit(1)
    model_cls = tagger_cls.MODEL_CLASS

    # Handle any training options that were given on the command line
    if options.training_opts is None:
        training_opts = {}
    elif options.training_opts.lower() == "help":
        print options_help_text(model_cls.TRAINING_OPTIONS,
                                intro="Training options for %s" %
                                model_cls.__name__)
        sys.exit(0)
    else:
        training_opts = ModuleOption.process_option_dict(
            ModuleOption.process_option_string(options.training_opts),
            model_cls.TRAINING_OPTIONS)

    # Get the rest of the args
    if len(arguments) < 3:
        print >> sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[2])
    model_name = arguments[1]

    # Load the sequence data
    # Only allow bulk types
    input_data = command_line_input(filename=filename,
                                    filetype=options.filetype,
                                    options=options.file_options,
                                    allowed_types=get_input_type_names(
                                        single=False, bulk=True))

    if options.partitions is not None and options.partitions > 1:
        parts = input_data.get_partitions(options.partitions)[1]
        models = [(tagger_cls.partition_model_name(model_name,num),seqs) for \
                                                num,seqs in enumerate(parts)]
    else:
        models = [(model_name, input_data)]

    for part_name, seqs in models:
        # Instantiate a fresh model with this name
        model = model_cls(part_name, options=training_opts)
        if options.log is not None:
            # Prepare a logger
            logfile = "%s%s.log" % (options.log, part_name)
            print "Logging output to file %s" % logfile
            logger = create_logger(filename=logfile)
        else:
            logger = None

        # Train the model with the loaded data
        model.train(seqs, logger=logger)
        model.save()
        print "Trained model %s" % (part_name)
コード例 #8
0
ファイル: tag.py プロジェクト: johndpope/jazzparser
def main():
    usage = "%prog [<options>]"
    description = "Runs a supertagger from the Jazz Parser to tag some input "\
        "but just outputs the results, rather than continuing to parse."
    optparser = OptionParser(usage=usage, description=description)
    
    # Tagger options
    optparser.add_option("-t", "--tagger", "--supertagger", dest="supertagger", action="store", help="run the parser using the named supertagger. Use '-t help' to see the list of available taggers. Default: %s" % settings.DEFAULT_SUPERTAGGER, default=settings.DEFAULT_SUPERTAGGER)
    optparser.add_option("--topt", "--tagger-options", dest="topts", action="append", help="specify options for the tagger. Type '--topt help', using '-u <name>' to select a tagger module, to get a list of options.")
    # Commonly-used misc
    optparser.add_option("-g", "--grammar", dest="grammar", action="store", help="use the named grammar instead of the default.")
    # File input options
    optparser.add_option("--file", "-f", dest="file", action="store", help="use a file to get parser input from. Use --filetype to specify the type of the file.")
    optparser.add_option("--filetype", "--ft", dest="filetype", action="store", help="select the file type for the input file (--file). Use '--filetype help' for a list of available types. Default: chords", default='chords')
    optparser.add_option("--file-options", "--fopt", dest="file_options", action="store", help="options for the input file (--file). Type '--fopt help', using '--ft <type>' to select file type, for a list of available options.")
    # Misc options
    optparser.add_option("-v", "--debug", dest="debug", action="store_true", help="output verbose debugging information.")
    optparser.add_option("-i", "--interactive", dest="interactive", action="store_true", help="instead of just outputing all tags in one go, wait for user input between each iteration of adaptive supertagging")
    # Logging options
    optparser.add_option("--logger", dest="logger", action="store", help="directory to put parser logging in. A filename based on an identifier for each individual input will be appended.")
    # Read in command line options and args
    options, clinput = parse_args_with_config(optparser)
    
    ########################### Option processing ####################
    if options.logger:
        # Directory
        parse_logger_dir = options.logger
        check_directory(parse_logger_dir)
    else:
        parse_logger_dir = None
    
    ######## Grammar ########
    # Read in the grammar
    grammar = get_grammar(options.grammar)
        
    ######## Supertagger ########
    # Load the supertagger requested
    if options.supertagger.lower() == "help":
        print "Available taggers are: %s" % ", ".join(TAGGERS)
        return 0
    try:
        tagger_cls = get_tagger(options.supertagger)
    except TaggerLoadError:
        logger.error("The tagger '%s' could not be loaded. Possible "\
            "taggers are: %s" % (options.supertagger, ", ".join(TAGGERS)))
        return 1
        
    # Get supertagger options before initializing the tagger
    if options.topts is not None:
        toptstr = options.topts
        if "help" in [s.strip().lower() for s in toptstr]:
            # Output this tagger's option help
            from jazzparser.utils.options import options_help_text
            print options_help_text(tagger_cls.TAGGER_OPTIONS, intro="Available options for selected tagger")
            return 0
        toptstr = ":".join(toptstr)
    else:
        toptstr = ""
    topts = ModuleOption.process_option_string(toptstr)
    # Check that the options are valid
    try:
        tagger_cls.check_options(topts)
    except ModuleOptionError, err:
        print "Problem with tagger options (--topt): %s" % err
        return 1
コード例 #9
0
def main():
    usage = "%prog [<options>]"
    description = "Runs a supertagger from the Jazz Parser to tag some input "\
        "but just outputs the results, rather than continuing to parse."
    optparser = OptionParser(usage=usage, description=description)

    # Tagger options
    optparser.add_option(
        "-t",
        "--tagger",
        "--supertagger",
        dest="supertagger",
        action="store",
        help=
        "run the parser using the named supertagger. Use '-t help' to see the list of available taggers. Default: %s"
        % settings.DEFAULT_SUPERTAGGER,
        default=settings.DEFAULT_SUPERTAGGER)
    optparser.add_option(
        "--topt",
        "--tagger-options",
        dest="topts",
        action="append",
        help=
        "specify options for the tagger. Type '--topt help', using '-u <name>' to select a tagger module, to get a list of options."
    )
    # Commonly-used misc
    optparser.add_option("-g",
                         "--grammar",
                         dest="grammar",
                         action="store",
                         help="use the named grammar instead of the default.")
    # File input options
    optparser.add_option(
        "--file",
        "-f",
        dest="file",
        action="store",
        help=
        "use a file to get parser input from. Use --filetype to specify the type of the file."
    )
    optparser.add_option(
        "--filetype",
        "--ft",
        dest="filetype",
        action="store",
        help=
        "select the file type for the input file (--file). Use '--filetype help' for a list of available types. Default: chords",
        default='chords')
    optparser.add_option(
        "--file-options",
        "--fopt",
        dest="file_options",
        action="store",
        help=
        "options for the input file (--file). Type '--fopt help', using '--ft <type>' to select file type, for a list of available options."
    )
    # Misc options
    optparser.add_option("-v",
                         "--debug",
                         dest="debug",
                         action="store_true",
                         help="output verbose debugging information.")
    optparser.add_option(
        "-i",
        "--interactive",
        dest="interactive",
        action="store_true",
        help=
        "instead of just outputing all tags in one go, wait for user input between each iteration of adaptive supertagging"
    )
    # Logging options
    optparser.add_option(
        "--logger",
        dest="logger",
        action="store",
        help=
        "directory to put parser logging in. A filename based on an identifier for each individual input will be appended."
    )
    # Read in command line options and args
    options, clinput = parse_args_with_config(optparser)

    ########################### Option processing ####################
    if options.logger:
        # Directory
        parse_logger_dir = options.logger
        check_directory(parse_logger_dir)
    else:
        parse_logger_dir = None

    ######## Grammar ########
    # Read in the grammar
    grammar = get_grammar(options.grammar)

    ######## Supertagger ########
    # Load the supertagger requested
    if options.supertagger.lower() == "help":
        print "Available taggers are: %s" % ", ".join(TAGGERS)
        return 0
    try:
        tagger_cls = get_tagger(options.supertagger)
    except TaggerLoadError:
        logger.error("The tagger '%s' could not be loaded. Possible "\
            "taggers are: %s" % (options.supertagger, ", ".join(TAGGERS)))
        return 1

    # Get supertagger options before initializing the tagger
    if options.topts is not None:
        toptstr = options.topts
        if "help" in [s.strip().lower() for s in toptstr]:
            # Output this tagger's option help
            from jazzparser.utils.options import options_help_text
            print options_help_text(
                tagger_cls.TAGGER_OPTIONS,
                intro="Available options for selected tagger")
            return 0
        toptstr = ":".join(toptstr)
    else:
        toptstr = ""
    topts = ModuleOption.process_option_string(toptstr)
    # Check that the options are valid
    try:
        tagger_cls.check_options(topts)
    except ModuleOptionError, err:
        print "Problem with tagger options (--topt): %s" % err
        return 1
コード例 #10
0
 popts = ModuleOption.process_option_string(poptstr)
 # Check that the options are valid
 try:
     parser_cls.check_options(popts)
 except ModuleOptionError, err:
     logger.error("Problem with parser options (--popt): %s" % err)
     return 1
     
 ######## Supertagger ########
 # Now load the supertagger requested
 from jazzparser.taggers import TAGGERS
 if options.supertagger.lower() == "help":
     print "Available taggers are: %s" % ", ".join(TAGGERS)
     return 0
 try:
     tagger_cls = get_tagger(options.supertagger)
 except TaggerLoadError:
     logger.error("The tagger '%s' could not be loaded. Possible "\
         "taggers are: %s" % (options.supertagger, ", ".join(TAGGERS)))
     return 1
     
 # Get supertagger options before initializing the tagger
 if options.topts is not None:
     toptstr = options.topts
     if "help" in [s.strip().lower() for s in toptstr]:
         # Output this tagger's option help
         from jazzparser.utils.options import options_help_text
         print options_help_text(tagger_cls.TAGGER_OPTIONS, intro="Available options for selected tagger")
         return 0
     toptstr = ":".join(toptstr)
 else: