Example #1
0
def run_gp(mapping,crf,lm):
    """
    Run the g2p server.
    """

    #
    # loading
    #
    click.echo(u"Loading...", err=True)

    click.echo(u"...data alignment", err=True)
    aligner = gp.Aligner(mapping=mapping)

    click.echo(u"...transcription CRF model", err=True)
    transcriber = gp.Transcriber()
    transcriber.load(crf)

    click.echo(u"...n-gram language model", err=True)
    rater = gp.Rater.load(lm)

    click.echo(u"...output formatter", err=True)
    formatter = gp.Formatter()


    #
    # load app and run
    #
    app = apps.create_gp_app(aligner,transcriber,rater,formatter)
    app.run()
Example #2
0
def apply_gp(mapping, crf, lm, strings):
    """Convert strings"""

    #
    # loading
    #
    click.echo("Loading...", err=True)

    click.echo("...data alignment", err=True)
    aligner = gp.Aligner(mapping=mapping)

    click.echo("...transcription CRF model", err=True)
    transcriber = gp.Transcriber()
    transcriber.load(crf)

    click.echo("...n-gram language model", err=True)
    rater = gp.Rater.load(lm)

    #
    # conversion
    #

    # read input
    in_strings = []
    if strings and strings[0] == "-":
        for line in sys.stdin:
            in_strings.append(line.strip())
    elif strings:
        for datum in strings:
            in_strings.append(datum)
    else:
        pass

    # convert
    for string in in_strings:
        segmentations = aligner.scan(string.lower())
        best_transcription = []
        best_prob = 0.0
        for segmentation in segmentations:
            transcriptions = transcriber.transcribe(segmentation)
            for transcription in transcriptions:
                prob = rater.rate([segmentation, transcription])
                #click.echo("%s: %f" % (",".join(transcription),prob), err=True)
                if prob >= best_prob:
                    best_prob = prob
                    best_transcription = transcription
        click.echo(",".join(best_transcription))
Example #3
0
def train_gp(mapping, model, data):
    """Train a model."""

    #
    # stage 1: alignment
    #
    click.echo("Stage 1a: creating data alignment", err=True)

    # the aligner
    aligner = gp.Aligner(mapping=mapping)

    click.echo("Stage 1b: aligning training data", err=True)
    # iterate over input and align training data
    aligned_training_data = []
    with open(str(data), "r") as f:
        training_data = f.read()

        for line in tqdm(training_data.split("\n")):

            # skip comments
            if line.startswith("#"):
                continue

            # assume tab-separated values
            fields = line.split("\t")
            if len(fields) < 2:
                continue

            # align
            alignment = aligner.align(fields[0], fields[1])
            if alignment:
                aligned_training_data.append(alignment)
            else:
                click.echo("%s and %s could not be aligned." %
                           (fields[0], fields[1]))

    #
    # stage 2: crf training
    #
    click.echo("Stage 2: training transcription CRF model", err=True)

    # the transcriber
    transcriber = gp.Transcriber()

    # train with previously aligned training data
    transcriber.train(aligned_training_data)

    # save
    transcriber.save(model + ".gp.crf")

    #
    # stage 3: language model training
    #
    click.echo("Stage 3: training rating n-gram language model", err=True)

    # the rater
    rater = gp.Rater()

    # train with previously aligned training data
    rater.train(aligned_training_data)

    # save
    rater.save(model + ".gp.ngram")
Example #4
0
def test_scan(datadir):
    aligner = gp.Aligner(mapping=datadir.join('test_alignment.txt'))

    segmentations = aligner.scan(u"aabb")
    assert (segmentations == [['a', 'a', 'b', 'b'], ['aa', 'b', 'b']])
Example #5
0
def test_align(datadir):
    aligner = gp.Aligner(mapping=datadir.join('test_alignment.txt'))

    alignment = aligner.align(u"aabb", u"abbbb")
    assert (alignment[0] == ['aa', 'b', 'b'])
    assert (alignment[1] == ['a', 'bb', 'bb'])
Example #6
0
def test_expand(datadir):
    aligner = gp.Aligner(mapping=datadir.join('test_alignment.txt'))

    exp_fst = aligner.expand(u"aabb")
    exp_fst.draw('/tmp/exp.dot')
    assert (exp_fst.verify())
Example #7
0
def test_scan(datadir):
    aligner = gp.Aligner(mapping=datadir.join('test_alignment.txt'))

    seg_fst = aligner.segment(u"aabb")
    seg_fst.draw('/tmp/seg.dot')
    assert (seg_fst.verify())
Example #8
0
def test_chain(datadir):
    aligner = gp.Aligner(mapping=datadir.join('test_alignment.txt'))

    chain_fst = aligner.chain(u"aabb")
    chain_fst.draw('/tmp/chain.dot')
    assert (chain_fst.verify())
Example #9
0
def test_loading(datadir):
    aligner = gp.Aligner(mapping=datadir.join('test_alignment.txt'))
    assert (aligner.status == 1)
Example #10
0
def test_constructor():
    aligner = gp.Aligner()
    assert (aligner != None)