Exemple #1
0
def prepare_pos_tags():
    # Read in the possible categories from the grammar
    if settings.GRAMMAR is None:
        grammar = Grammar(jazzsettings.DEFAULT_GRAMMAR)
    else:
        grammar = Grammar(settings.GRAMMAR)
    return list(sorted(grammar.families.keys()))
Exemple #2
0
 def test_get_signs_for_word(self):
     """
     Tries getting a sign from the grammar from an example of 
     chord input.
     
     @see: L{jazzparser.grammar.Grammar.get_signs_for_word}
     
     """
     g = Grammar()
     # Try a few words
     for word in self.dbinput.chords[:10]:
         # This should be a list of signs
         signs = g.get_signs_for_word(word)
Exemple #3
0
 def test_tag_to_function(self):
     """
     Try getting a function for every tag and check it's in the 
     set of allowed functions.
     
     """
     g = Grammar()
     for tag in g.pos_tags:
         fun = g.tag_to_function(tag)
         if fun is None:
             warnings.warn("Tag %s has no function given by the "\
                 "grammar" % tag)
         else:
             self.assertIn(fun, ['T','D','S','Pass'])
Exemple #4
0
 def test_get_sign_for_word_by_tag(self):
     """
     @see: L{jazzparser.grammar.Grammar.get_sign_for_word_by_tag}
     
     """
     g = Grammar()
     # Get the list of allowed tags
     tags = g.pos_tags
     # Try a few words
     for chord in self.dbinput.chords[:10]:
         # Try a few tags for each
         for tag in tags[:6]:
             # Should get a sign or None
             sign = g.get_sign_for_word_by_tag(chord, tag)
Exemple #5
0
    def setUp(self):
        from jazzparser.formalisms.music_halfspan.rules import ApplicationRule
        from jazzparser.formalisms.music_halfspan.syntax import AtomicCategory, \
                            ComplexCategory, HalfCategory, Sign, Slash
        from jazzparser.formalisms.music_halfspan.semantics import \
                            DummyLogicalForm, Semantics
        from jazzparser.grammar import Grammar

        # Use the default grammar
        self.grammar = Grammar()

        # Get a rule to instantiate: forward application
        self.rule = self.grammar.rules_by_name['appf']
        # Create some categories we can store as if the rule applied to them
        # Create an atomic category
        self.cat0 = AtomicCategory(HalfCategory("I"), HalfCategory("I"))
        # Create a complex category that could be applied to the atomic one
        self.cat1 = ComplexCategory(HalfCategory("V", function="D"),
                                    Slash(True),
                                    HalfCategory("I", function=["D", "T"]))
        # An atomic category, as if 0 was applied to 1
        self.cat2 = AtomicCategory(HalfCategory("V", function="D"),
                                   HalfCategory("I"))

        # A dummy semantics to use for all signs
        dummy_sem = Semantics(DummyLogicalForm())

        # Create signs from the categories
        self.sign0 = Sign(self.cat0, dummy_sem.copy())
        self.sign1 = Sign(self.cat1, dummy_sem.copy())
        self.sign2 = Sign(self.cat2, dummy_sem.copy())
Exemple #6
0
 def test_load_default(self):
     """
     Just loads the default grammar to see if there are any errors 
     raised in the process.
     
     """
     # Instantiating Grammar with no args loads the default grammar
     Grammar()
Exemple #7
0
 def test_public_attrs(self):
     """
     Checks that the public attributes of Grammar get set when the 
     default grammar is loaded.
     
     """
     g = Grammar()
     for attr in self.SET_ATTRIBUTES:
         self.assertIsNotNone(getattr(g, attr))
Exemple #8
0
def main():
    parser = OptionParser()
    parser.add_option(
        "-t",
        "--tagger",
        dest="tagger",
        action="store_true",
        help=
        "The tagger component to use (full python path to the tagger class). Default: %s"
        % DEFAULT_TAGGER)
    options, arguments = parser.parse_args()

    if options.tagger is not None:
        tagger = options.tagger
    else:
        tagger = DEFAULT_TAGGER

    # Use the default grammar
    grammar = Grammar()
    tagger_class = get_tagger(tagger)

    total_entropy = 0.0
    total_chords = 0
    # Compile the data for displaying in a table
    data = []
    for sequence in ChordSequence.objects.filter(analysis_omitted=False):
        print "Analyzing entropy of model on %s" % sequence.name
        # Calculate the total word-level entropy of this sequence
        sequence_chords = list(sequence.iterator())
        entropy, sequence_length = sequence_entropy(sequence_chords, grammar,
                                                    tagger_class)
        data.append({
            'name':
            sequence.name.encode('ascii', 'replace'),
            'entropy':
            entropy,
            'length':
            sequence_length,
            'entropy_per_chord':
            (sequence_length != 0 and (entropy / sequence_length) or 0.0),
        })
        if sequence_length:
            total_entropy += entropy
            total_chords += sequence_length

    # Display a table of the results
    table_data = [['Sequence', 'Entropy', 'Chords', 'Entropy per chord']] + [[
        d['name'],
        "%.4f" % d['entropy'],
        "%d" % d['length'],
        "%.4f" % d['entropy_per_chord']
    ] for d in data]
    pprint_table(sys.stdout, table_data, [True, False, False, False])
    # Calculate the perplexity over the whole set
    perplexity = math.pow(2, total_entropy / total_chords)
    print "### Entropy per chord: %.4f" % (total_entropy / total_chords)
    print "### Perplexity = %.4f" % perplexity
Exemple #9
0
def main():
    usage = "%prog [options]"
    description = "Outputs a list of POS tags supplied by a particular grammar."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        "-g",
        "--grammar",
        dest="grammar",
        action="store",
        help=
        "load a grammar by name (defaults to the default grammar from the settings file)"
    )
    options, arguments = parser.parse_args()

    if options.grammar:
        grammar = Grammar(options.grammar)
    else:
        grammar = Grammar()

    pos_list = grammar.families.keys()
    pos_list.sort()
    for pos in pos_list:
        print pos
Exemple #10
0
 def test_equivalence_map(self):
     """
     Try reading something out of the equivalence map and check the map 
     works as expected.
     
     We expect the default grammar to have an equivalence map, so test 
     on this basis. It could happen in the future that it has no equivalence 
     map, which is perfectly legal. In this case, this test will need to 
     be updated so that it gets a grammar with a map, or just removed.
     
     """
     g = Grammar()
     if len(g.equiv_map) == 0:
         raise ValueError, "cannot test equivalence map because it's empty "\
             "in the default grammar"
     # Pick a key from the map
     key = g.equiv_map.keys()[0]
     # Get an equivalent morph item and root interval
     equiv = g.equiv_map[key]
     self.assertIsInstance(equiv.root, int)
     self.assertIsInstance(equiv.target, MorphItem)
Exemple #11
0
def main():
    usage = "%prog [options] <model_name> <in-file>"
    description = "Trains a chord labeling model using the given "\
        "input data. The data file may be a stored SequenceIndex file, or "\
        "any other type of bulk data file."
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        '-p',
        '--partitions',
        dest="partitions",
        action="store",
        type="int",
        help=
        "train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number."
    )
    parser.add_option(
        '--opts',
        dest="training_opts",
        action="append",
        help=
        "options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type."
    )
    # File input options
    parser.add_option(
        "--filetype",
        "--ft",
        dest="filetype",
        action="store",
        help=
        "select the file type for the input file. Same filetypes as jazzparser",
        default='bulk-db')
    parser.add_option(
        "--file-options",
        "--fopt",
        dest="file_options",
        action="store",
        help=
        "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options."
    )
    # Logging output
    parser.add_option(
        '--log',
        dest="log",
        action="store",
        help=
        "file to output training logs to. Specify a base filename; <modelname>.log will be added to the end"
    )
    options, arguments = parse_args_with_config(parser)

    grammar = Grammar()

    # Handle any training options that were given on the command line
    if options.training_opts is None:
        training_opts = {}
    elif "help" in [opt.lower() for opt in options.training_opts]:
        print options_help_text(HPChordLabeler.TRAINING_OPTIONS,
                                intro="Training options:")
        sys.exit(0)
    else:
        training_opts = ModuleOption.process_option_string(
            options.training_opts)

    if len(arguments) < 2:
        print >> sys.stderr, "You must specify a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[1])
    model_name = arguments[0]

    # Load the sequence data
    # Only allow bulk types
    input_data = command_line_input(filename=filename,
                                    filetype=options.filetype,
                                    options=options.file_options,
                                    allowed_types=get_input_type_names(
                                        single=False, bulk=True))

    # Only partition the chord data, not the MIDI data
    if options.partitions is not None and not \
            (isinstance(input_data, MidiTaggerTrainingBulkInput) and \
             input_data.chords is not None):
        print >>sys.stderr, "Can only partition chord data and no chord data "\
            "was supplied"
        sys.exit(1)

    if options.partitions:
        # The input includes chord training data
        parts = input_data.chords.get_partitions(options.partitions)[1]
        models = [("%s%d" % (model_name,num),chord_data) \
            for num,chord_data in enumerate(parts)]
    else:
        models = [(model_name, None)]

    for part_name, chord_data in models:
        if options.log is not None:
            # Prepare a logger
            logfile = "%s%s.log" % (options.log, part_name)
            print "Logging output to file %s" % logfile
            logger = create_logger(filename=logfile)
        else:
            logger = None

        # Create a fresh model with this name
        model = HPChordLabeler.train(input_data,
                                     part_name,
                                     logger=logger,
                                     options=training_opts,
                                     chord_data=chord_data)
        print "Trained model %s" % (part_name)
Exemple #12
0
def annotate_sequence(request, id):
    """
    Like edit_sequence, but only allows you to change the annotations,
    not the chord sequence. Supplies an automatic annotation that you 
    can choose to apply selectively to the sequence.
    """
    sequence = get_object_or_404(ChordSequence, id=id)
    raise NotImplementedError, "Don't use this for now: the JP has changed and this needs to be updated"
    #### Do the automatic tagging
    chords = list(sequence.iterator())
    # Get the default grammar
    grammar = Grammar(jpsettings.DEFAULT_GRAMMAR)
    tagger = TrigramAnnotatorChordTagger('alpha', grammar, chords)
    tagger_output = tagger.tag_input()

    if request.method == "POST":
        if 'cancel' in request.POST:
            return HttpResponseRedirect(reverse(index))
        else:
            # Save the data
            form = ChordSequenceForm(instance=sequence, data=request.POST)
            chord_forms = [
                ChordAnnotationForm(chord,
                                    tag,
                                    prefix="chord%s" % chord.id,
                                    data=request.POST)
                for chord, tag in zip(chords, tagger_output)
            ]

            # Check every chord form validates
            chords_valid = reduce(
                lambda so_far, chord: so_far and chord.is_valid(), chord_forms,
                True)
            if form.is_valid() and chords_valid:
                form.save()
                # This view can only change the annotations on a sequence
                for cf in chord_forms:
                    cf.save()

                if 'save_and_exit' in request.POST:
                    return HttpResponseRedirect(reverse(index))
                else:
                    return HttpResponseRedirect(
                        reverse(annotate_sequence, kwargs={'id': sequence.id}))
    else:
        form = ChordSequenceForm(instance=sequence)
        # Prepare a form for each chord
        chord_forms = [
            ChordAnnotationForm(chord, tag, prefix="chord%s" % chord.id)
            for chord, tag in zip(chords, tagger_output)
        ]

    # Calculate the width the sequence needs to be
    annotator_width = sum([cf.layout_width + 7 for cf in chord_forms])

    if len(chord_forms):
        first_field = chord_forms[0].prefix
    else:
        first_field = None

    context = {
        'sequence': sequence,
        'form': form,
        'chord_forms': chord_forms,
        'categories': category_pairs,
        'annotator_width': annotator_width,
        'first_field': first_field,
    }
    return render_to_response('sequences/annotate_sequence.html', context,
                              RequestContext(request))
Exemple #13
0
def main():
    usage = "%prog [options] <model-type> <model_name> <in-file>"
    description = "Trains a supertagging model using the given "\
        "input data. Specify a model type (baseline1, etc) and a name to "\
        "identify it. The data file may be a stored SequenceIndex file, or "\
        "any other type of bulk data file. "\
        "This can only be used with the follow types of models: %s" % ", ".join(TRAINABLE_MODELS)
    parser = OptionParser(usage=usage, description=description)
    parser.add_option(
        '-p',
        '--partitions',
        dest="partitions",
        action="store",
        type="int",
        help=
        "train a number of partitions of the given data. Trains a model on the complement of each partition, so it can be tested on the partition. The models will be named <NAME>n, where <NAME> is the model name and n the partition number."
    )
    parser.add_option(
        '--opts',
        dest="training_opts",
        action="store",
        help=
        "options to pass to the model trainer. Type '--opts help' for a list of options for a particular model type."
    )
    # File input options
    parser.add_option(
        "--filetype",
        "--ft",
        dest="filetype",
        action="store",
        help=
        "select the file type for the input file. Same filetypes as jazzparser",
        default='bulk-db')
    parser.add_option(
        "--file-options",
        "--fopt",
        dest="file_options",
        action="store",
        help=
        "options for the input file. Type '--fopt help', using '--ft <type>' to select file type, for a list of available options."
    )
    # Logging output
    parser.add_option(
        '--log',
        dest="log",
        action="store",
        help=
        "file to output training logs to. Specify a base filename; <modelname>.log will be added to the end"
    )
    options, arguments = parse_args_with_config(parser)

    grammar = Grammar()

    # Get the model type first: we might not need the other args
    if len(arguments) == 0:
        print >> sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
    model_type = arguments[0]

    if model_type not in TRAINABLE_MODELS:
        print >>sys.stderr, "'%s' is not a valid model type. Available taggers are: %s" % \
            (model_type, ", ".join(TRAINABLE_MODELS))
        sys.exit(1)
    if model_type not in TAGGERS:
        print >>sys.stderr, "'%s' isn't a registered model type. Check that "\
            "the name in TRAINABLE_MODELS is correct" % model_type
        sys.exit(1)

    tagger_cls = get_tagger(model_type)
    if not issubclass(tagger_cls, ModelTagger):
        print >> sys.stderr, "'%s' tagger cannot be trained with this script. Only model taggers can be." % (
            tagger_cls.__name__)
        sys.exit(1)
    model_cls = tagger_cls.MODEL_CLASS

    # Handle any training options that were given on the command line
    if options.training_opts is None:
        training_opts = {}
    elif options.training_opts.lower() == "help":
        print options_help_text(model_cls.TRAINING_OPTIONS,
                                intro="Training options for %s" %
                                model_cls.__name__)
        sys.exit(0)
    else:
        training_opts = ModuleOption.process_option_dict(
            ModuleOption.process_option_string(options.training_opts),
            model_cls.TRAINING_OPTIONS)

    # Get the rest of the args
    if len(arguments) < 3:
        print >> sys.stderr, "You must specify a model type, a model name and an input data file as arguments"
        sys.exit(1)
    filename = os.path.abspath(arguments[2])
    model_name = arguments[1]

    # Load the sequence data
    # Only allow bulk types
    input_data = command_line_input(filename=filename,
                                    filetype=options.filetype,
                                    options=options.file_options,
                                    allowed_types=get_input_type_names(
                                        single=False, bulk=True))

    if options.partitions is not None and options.partitions > 1:
        parts = input_data.get_partitions(options.partitions)[1]
        models = [(tagger_cls.partition_model_name(model_name,num),seqs) for \
                                                num,seqs in enumerate(parts)]
    else:
        models = [(model_name, input_data)]

    for part_name, seqs in models:
        # Instantiate a fresh model with this name
        model = model_cls(part_name, options=training_opts)
        if options.log is not None:
            # Prepare a logger
            logfile = "%s%s.log" % (options.log, part_name)
            print "Logging output to file %s" % logfile
            logger = create_logger(filename=logfile)
        else:
            logger = None

        # Train the model with the loaded data
        model.train(seqs, logger=logger)
        model.save()
        print "Trained model %s" % (part_name)
Exemple #14
0
def main():
    usage = "%prog [options]"
    parser = OptionParser(usage=usage)
    parser.add_option(
        "-g",
        "--grammar",
        dest="grammar",
        action="store",
        help=
        "load a grammar by name (defaults to the default grammar from the settings file)"
    )
    parser.add_option("-l",
                      "--lexicon",
                      dest="lexicon",
                      action="store_true",
                      help="show lexicon")
    parser.add_option("-r",
                      "--rules",
                      dest="rules",
                      action="store_true",
                      help="show rules")
    parser.add_option("-m",
                      "--morph",
                      dest="morph",
                      action="store_true",
                      help="show morphological entries")
    parser.add_option("-o",
                      "--modalities",
                      dest="modalities",
                      action="store_true",
                      help="show modality hierarchy")
    parser.add_option("-a",
                      "--attributes",
                      dest="attributes",
                      action="store_true",
                      help="show other grammar attributes")
    options, arguments = parser.parse_args()

    if options.grammar:
        grammar = Grammar(options.grammar)
    else:
        grammar = Grammar()

    show_lexicon = options.lexicon
    show_rules = options.rules
    show_morph = options.morph
    show_modes = options.modalities
    show_attrs = options.attributes
    # If no section options given, show them all
    show_all = not any([show_rules, show_lexicon, show_morph, show_modes])

    if show_lexicon or show_all:
        print "== LEXICON =="
        for family in sorted(sum(grammar.families.values(), [])):
            print ">> Family '%s'" % family.name
            for entry in family.entries:
                print entry.category
        print

    if show_rules or show_all:
        print "== RULES =="
        for rule in grammar.rules:
            print rule
        print

    if show_morph or show_all:
        print "== MORPH =="
        for morph in sorted(grammar.morphs, key=lambda m: m.pos):
            print "%s => %s" % (", ".join(morph.words), morph.pos)
        print

    if show_modes or show_all:
        print "== MODALITIES =="
        print grammar.modality_tree
        print

    if show_attrs or show_all:
        print "== ATTRIBUTES =="
        print "Max categories: %s" % grammar.max_categories