def test_reversible_encoding():
    """Encoding a text and decoding it produces the same result"""
    text = "For the glory for mankind"
    encoder = Encoder([text])
    coded = encoder.encodetext(text)
    decoded = encoder.decodeindexes(coded)

    print("Original text: %s" % text)
    print("Encoded text: " + str(coded))
    print("Decoded text: %s" % decoded)
    assert text == decoded
def test_consistent_encoding():
    """When two encoders are created with different corpus that share the same tokens, encoders are equal"""
    corpus1 = [
        "For the glory of mankind",
        "God's in his heaven. All's right with the world"
    ]
    corpus2 = [
        "For the glory of God",
        "mankind's in his world. All's right with the heaven"
    ]

    encoder1 = Encoder(corpus1)
    encoder2 = Encoder(corpus2)

    assert encoder1 == encoder2
def test_hypertrain_loadcheckpoints():
    """A previously generated checkpoints file can be used to continue the hyperoptimization"""
    modelclass = SmallWavenet
    corpus = Corpus([
        "This is a very small corpus for testing the hypertrain procedure.",
        "Hope it works!!!"
    ])
    encoder = Encoder(corpus, CharTokenizer)
    checkpointsfile = DATAFOLDER + "checkpoints"

    with NamedTemporaryFile("r") as tempfile:
        tempdir = mkdtemp()
        tmpname = tempfile.name
        copyfile(checkpointsfile, tmpname)
        model = hypertrain(modelclass,
                           encoder,
                           corpus,
                           tempdir,
                           n_calls=15,
                           verbose=2,
                           valmask=[False, True],
                           patience=1,
                           maxepochs=10,
                           checkpointfile=tmpname)
        rmtree(tempdir)
        assert model is not None
def test_writer_beamsearch():
    """Beam search works as expected"""
    mockmodel = MockModel()
    corpus = Corpus(["abc"])
    encoder = Encoder(corpus=corpus, tokenizer=CharTokenizer())
    writer = Writer(mockmodel, encoder, creativity=0, beamsize=3, batchsize=3)
    seed = np.array([0, 0])

    expected = [0, 0, 0]
    obtained = writer.beamsearch(seed)
    print("Expected", expected)
    print("Obtained", obtained)
    assert obtained == expected
Exemple #5
0
def train(corpus, corpusformat, encoderfile, modelfile, architecture,
          tokenizer, trials, tmpmodels, checkpoint, maxepochs):
    """Trains a Neurowriter model"""
    # Load corpus
    corpus = FORMATTERSBYNAME[corpusformat](corpus)
    print("Training with corpus:", corpus[0][0:1000])

    # Encoding
    encoder = Encoder(
        corpus,
        tokenizerbyname(tokenizer) if tokenizer is not None else None)
    encoder.save(encoderfile)

    print("Computed encoder:", encoder.char2index)

    # Prepare temporary files
    if tmpmodels is None:
        tmpdir = tempfile.TemporaryDirectory()
        tmpmodels = tmpdir.name
    if checkpoint is None:
        tmpfile = tempfile.NamedTemporaryFile()
        checkpoint = tmpfile.name

    # Model training
    modelclass = modelbyname(architecture)

    model = hypertrain(modelclass,
                       encoder,
                       corpus,
                       tmpmodels,
                       n_calls=trials,
                       verbose=2,
                       valmask=[False] * 3 + [True],
                       checkpointfile=checkpoint,
                       maxepochs=maxepochs)
    model.save(modelfile)
def test_hypertrain_run():
    """A small hypertraining procedure can be run"""
    modelclass = PerceptronModel
    corpus = Corpus([
        "This is a very small corpus for testing the hypertrain procedure.",
        "Hope it works!!!"
    ])
    encoder = Encoder(corpus, CharTokenizer)

    with NamedTemporaryFile("r") as tempfile:
        tempdir = mkdtemp()
        tmpname = tempfile.name
        model = hypertrain(modelclass,
                           encoder,
                           corpus,
                           tempdir,
                           n_calls=15,
                           verbose=2,
                           valmask=[False, True],
                           patience=1,
                           maxepochs=10,
                           checkpointfile=tmpname)
        rmtree(tempdir)
        assert model is not None