def main(loc,
         width=64,
         depth=2,
         batch_size=128,
         dropout=0.5,
         dropout_decay=1e-5,
         nb_epoch=20):
    print("Load spaCy")
    nlp = spacy.load('en',
                     parser=False,
                     entity=False,
                     matcher=False,
                     tagger=False)
    print("Construct model")
    Model.ops = CupyOps()
    with Model.define_operators({'>>': chain, '**': clone, '|': concatenate}):
        mwe_encode = ExtractWindow(nW=1) >> Maxout(width, width * 3)
        sent2vec = (get_word_ids >> flatten_add_lengths >> with_getitem(
            0,
            SpacyVectors(nlp, width) >> mwe_encode**depth) >> Pooling(
                mean_pool, max_pool))
        model = (((Arg(0) >> sent2vec) |
                  (Arg(1) >> sent2vec)) >> Maxout(width, width * 4) >> Maxout(
                      width, width)**depth >> Softmax(2, width))

    print("Read and parse quora data")
    rows = read_quora_tsv_data(pathlib.Path(loc))
    train, dev = partition(rows, 0.9)
    train_X, train_y = create_data(model.ops, nlp, train)
    dev_X, dev_y = create_data(model.ops, nlp, dev)
    print("Train")
    with model.begin_training(train_X[:20000],
                              train_y[:20000]) as (trainer, optimizer):
        trainer.batch_size = batch_size
        trainer.nb_epoch = nb_epoch
        trainer.dropout = dropout
        trainer.dropout_decay = dropout_decay

        epoch_times = [timer()]
        epoch_loss = [0.]
        n_train_words = sum(len(d0) + len(d1) for d0, d1 in train_X)
        n_dev_words = sum(len(d0) + len(d1) for d0, d1 in dev_X)

        def track_progress():
            stats = get_stats(model, optimizer.averages, dev_X, dev_y,
                              epoch_loss[-1], epoch_times[-1], n_train_words,
                              n_dev_words)
            stats.append(trainer.dropout)
            stats = tuple(stats)
            print(
                len(epoch_loss),
                "%.3f loss, %.3f (%.3f) acc, %d/%d=%d wps train, %d/%.3f=%d wps run. d.o.=%.3f"
                % stats)
            epoch_times.append(timer())
            epoch_loss.append(0.)

        trainer.each_epoch.append(track_progress)
        for X, y in trainer.iterate(train_X, train_y):
            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            backprop(yh - y, optimizer)
Example #2
0
def main(width=100,
         depth=4,
         vector_length=64,
         min_batch_size=1,
         max_batch_size=32,
         learn_rate=0.001,
         momentum=0.9,
         dropout=0.5,
         dropout_decay=1e-4,
         nb_epoch=20,
         L2=1e-6):
    cfg = dict(locals())
    print(cfg)
    if cupy is not None:
        print("Using GPU")
        Model.ops = CupyOps()
    train_data, check_data, nr_tag = ancora_pos_tags()

    extracter = FeatureExtracter('es', attrs=[LOWER, SHAPE, PREFIX, SUFFIX])
    Model.lsuv = True
    with Model.define_operators({
            '**': clone,
            '>>': chain,
            '+': add,
            '|': concatenate
    }):
        lower_case = HashEmbed(width, 100, column=0)
        shape = HashEmbed(width // 2, 200, column=1)
        prefix = HashEmbed(width // 2, 100, column=2)
        suffix = HashEmbed(width // 2, 100, column=3)

        model = (with_flatten(
            (lower_case | shape | prefix | suffix) >> Maxout(width, pieces=3)
            >> Residual(ExtractWindow(nW=1) >> Maxout(width, pieces=3))**depth
            >> Softmax(nr_tag),
            pad=depth))

    train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag)
    dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag)

    n_train = float(sum(len(x) for x in train_X))
    global epoch_train_acc
    with model.begin_training(train_X[:5000], train_y[:5000],
                              **cfg) as (trainer, optimizer):
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = min_batch_size
        batch_size = float(min_batch_size)
        for X, y in trainer.iterate(train_X, train_y):
            yh, backprop = model.begin_update(X, drop=trainer.dropout)

            gradient = [yh[i] - y[i] for i in range(len(yh))]

            backprop(gradient, optimizer)

            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001
    with model.use_params(trainer.optimizer.averages):
        print(model.evaluate(dev_X, model.ops.flatten(dev_y)))
        with open('/tmp/model.pickle', 'wb') as file_:
            pickle.dump(model, file_)
Example #3
0
def main(width=32, vector_length=8):
    train_data, check_data, nr_tag = ancora_pos_tags()

    model = FeedForward(
        (layerize(flatten_sequences), BatchNorm(Embed(width, vector_length)),
         ExtractWindow(nW=2), BatchNorm(ReLu(width)), BatchNorm(ReLu(width)),
         Softmax(nr_tag)))

    train_X, train_y = zip(*train_data)
    dev_X, dev_y = zip(*check_data)
    dev_y = model.ops.flatten(dev_y)
    with model.begin_training(train_X, train_y) as (trainer, optimizer):
        trainer.batch_size = 8
        trainer.nb_epoch = 10
        trainer.dropout = 0.2
        trainer.dropout_decay = 0.
        trainer.each_epoch.append(lambda: print(model.evaluate(dev_X, dev_y)))
        for X, y in trainer.iterate(train_X, train_y):
            y = model.ops.flatten(y)
            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            d_loss, loss = categorical_crossentropy(yh, y)
            optimizer.set_loss(loss)
            backprop(d_loss, optimizer)
    with model.use_params(optimizer.averages):
        print(model.evaluate(dev_X, dev_y))
Example #4
0
def main(nr_epoch=20, nr_sent=0, width=128, depth=3):
    print("Loading spaCy and preprocessing")
    nlp = spacy.load('en', parser=False, tagger=False, entity=False)
    train_sents, dev_sents, _ = datasets.ewtb_pos_tags()
    train_sents, dev_sents, nr_class = spacy_preprocess(
        nlp, train_sents, dev_sents)
    if nr_sent >= 1:
        train_sents = train_sents[:nr_sent]

    print("Building the model")
    with Model.define_operators({'>>': chain, '|': concatenate, '**': clone}):
        model = (Orth >> SpacyVectors(nlp) >>
                 (ExtractWindow(nW=1) >> BatchNorm(Maxout(width)))**depth >>
                 Softmax(nr_class))

    print("Preparing training")
    dev_X, dev_y = zip(*dev_sents)
    dev_y = model.ops.flatten(dev_y)
    train_X, train_y = zip(*train_sents)
    with model.begin_training(train_X, train_y) as (trainer, optimizer):
        trainer.nb_epoch = nr_epoch
        trainer.dropout = 0.9
        trainer.dropout_decay = 1e-4
        trainer.batch_size = 4
        epoch_times = [timer()]
        epoch_loss = [0.]
        n_train = sum(len(y) for y in train_y)

        def track_progress():
            start = timer()
            acc = model.evaluate(dev_X, dev_y)
            end = timer()
            with model.use_params(optimizer.averages):
                avg_acc = model.evaluate(dev_X, dev_y)
            stats = (epoch_loss[-1], acc, avg_acc, n_train,
                     (end - epoch_times[-1]), n_train /
                     (end - epoch_times[-1]), len(dev_y), (end - start),
                     float(dev_y.shape[0]) / (end - start), trainer.dropout)
            print(
                len(epoch_loss),
                "%.3f loss, %.3f (%.3f) acc, %d/%d=%d wps train, %d/%.3f=%d wps run. d.o.=%.3f"
                % stats)
            epoch_times.append(end)
            epoch_loss.append(0.)

        trainer.each_epoch.append(track_progress)
        print("Training")
        for examples, truth in trainer.iterate(train_X, train_y):
            truth = model.ops.flatten(truth)
            guess, finish_update = model.begin_update(examples,
                                                      drop=trainer.dropout)
            gradient, loss = categorical_crossentropy(guess, truth)
            if loss:
                optimizer.set_loss(loss)
                finish_update(gradient, optimizer)
            epoch_loss[-1] += loss / n_train
    with model.use_params(optimizer.averages):
        print("End: %.3f" % model.evaluate(dev_X, dev_y))
Example #5
0
def main(width=64,
         depth=2,
         vector_length=64,
         min_batch_size=1,
         max_batch_size=32,
         dropout=0.9,
         dropout_decay=1e-3,
         nb_epoch=20,
         L2=1e-6):
    cfg = dict(locals())
    print(cfg)
    if cupy is not None:
        print("Using GPU")
        Model.ops = CupyOps()
    train_data, check_data, nr_tag = ancora_pos_tags()

    extracter = FeatureExtracter('es', attrs=[LOWER, SHAPE, PREFIX, SUFFIX])
    with Model.define_operators({
            '**': clone,
            '>>': chain,
            '+': add,
            '|': concatenate
    }):
        lower_case = Embed(width, vector_length, 5000, column=0)
        prefix = Embed(width, vector_length, 5000, column=2)
        suffix = Embed(width, vector_length, 5000, column=3)

        model = (layerize(flatten_sequences) >> (lower_case + prefix + suffix)
                 >> Residual(ExtractWindow(nW=1) >> Maxout(width))**depth >>
                 Softmax(nr_tag))

    train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag)
    dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag)

    n_train = float(sum(len(x) for x in train_X))
    global epoch_train_acc
    with model.begin_training(train_X, train_y, **cfg) as (trainer, optimizer):
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = min_batch_size
        batch_size = float(min_batch_size)
        for X, y in trainer.iterate(train_X, train_y):
            y = model.ops.flatten(y)

            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            loss = ((yh - y)**2).sum() / y.shape[0]
            if loss > 0.:
                optimizer.set_loss(loss)

            backprop(yh - y, optimizer)

            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001

            epoch_train_acc += (yh.argmax(axis=1) == y.argmax(axis=1)).sum()
            if epoch_train_acc / n_train >= 0.999:
                break
    with model.use_params(trainer.optimizer.averages):
        print(model.evaluate(dev_X, model.ops.flatten(dev_y)))
Example #6
0
def main(width=300, depth=4, vector_length=64,
         min_batch_size=1, max_batch_size=32,
        dropout=0.9, dropout_decay=1e-3, nb_epoch=20, L2=1e-6,
         device="cpu"):
    cfg = dict(locals())
    print(cfg, file=sys.stderr)
    if cupy is not None and device != 'cpu':
        print("Using GPU", file=sys.stderr)
        Model.ops = CupyOps()
        Model.ops.device = device
    train_data, check_data, tag_map = twitter_ner()
    dev_words, dev_tags = list(zip(*check_data))
    nr_tag = len(tag_map)

    extracter = FeatureExtracter('en', attrs=[ORTH, LOWER, SHAPE, PREFIX, SUFFIX])
    Model.lsuv = True
    with Model.define_operators({'**': clone, '>>': chain, '+': add,
                                 '|': concatenate}):
        glove = StaticVectors('en', width//2, column=0)
        lower_case = (HashEmbed(width, 500, column=1) + HashEmbed(width, 100, column=1))
        shape = HashEmbed(width//2, 200, column=2)
        prefix     = HashEmbed(width//2, 100, column=3)
        suffix     = HashEmbed(width//2, 100, column=4)

        model = (
            layerize(flatten_sequences)
            >> (lower_case | shape | prefix | suffix)
            >> BN(Maxout(width, pieces=3), nO=width)
            >> Residual(ExtractWindow(nW=1) >> BN(Maxout(width, pieces=3), nO=width)) ** depth
            >> Softmax(nr_tag))

    train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag)
    dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag)

    n_train = float(sum(len(x) for x in train_X))
    global epoch_train_acc
    with model.begin_training(train_X, train_y, **cfg) as (trainer, optimizer):
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = min_batch_size
        batch_size = float(min_batch_size)
        for X, y in trainer.iterate(train_X, train_y):
            y = model.ops.flatten(y)

            yh, backprop = model.begin_update(X, drop=trainer.dropout)

            backprop(yh - y, optimizer)

            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001

            epoch_train_acc += (yh.argmax(axis=1) == y.argmax(axis=1)).sum()
            #if epoch_train_acc / n_train >= 0.999:
            #    break
    with model.use_params(trainer.optimizer.averages):
        print(model.evaluate(dev_X, model.ops.flatten(dev_y)), file=sys.stderr)
        print_dev_sentences(model, dev_words, dev_tags, dev_X, tag_map)
Example #7
0
def main(width=32, nr_vector=1000):
    train_data, check_data, nr_tag = ancora_pos_tags(encode_words=True)

    model = with_flatten(
                 chain(
                    HashEmbed(width, 1000),
                    ReLu(width, width),
                    ReLu(width, width),
                    Softmax(nr_tag, width)))

    train_X, train_y = zip(*train_data)
    dev_X, dev_y = zip(*check_data)
    train_y = [to_categorical(y, nb_classes=nr_tag) for y in train_y]
    dev_y = [to_categorical(y, nb_classes=nr_tag) for y in dev_y]
    with model.begin_training(train_X, train_y) as (trainer, optimizer):
        trainer.each_epoch.append(
            lambda: print(model.evaluate(dev_X, dev_y)))
        for X, y in trainer.iterate(train_X, train_y):
            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            backprop([yh[i]-y[i] for i in range(len(yh))], optimizer)
    with model.use_params(optimizer.averages):
        print(model.evaluate(dev_X, dev_y))
Example #8
0
def main(width=128,
         depth=4,
         vector_length=64,
         max_batch_size=32,
         dropout=0.9,
         drop_decay=1e-4,
         nb_epoch=20,
         L2=1e-5):
    cfg = dict(locals())
    Model.ops = CupyOps()
    train_data, check_data, nr_tag = ancora_pos_tags()

    with Model.define_operators({'**': clone, '>>': chain}):
        model = (layerize(flatten_sequences) >> Embed(width, vector_length) >>
                 (ExtractWindow(nW=1) >> Maxout(width, pieces=3))**depth >>
                 Softmax(nr_tag))

    train_X, train_y = preprocess(model.ops, train_data, nr_tag)
    dev_X, dev_y = preprocess(model.ops, check_data, nr_tag)

    n_train = float(sum(len(x) for x in train_X))
    global epoch_train_acc
    with model.begin_training(train_X, train_y, **cfg) as (trainer, optimizer):
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = 1
        batch_size = 1.
        for X, y in trainer.iterate(train_X, train_y):
            y = model.ops.flatten(y)

            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            backprop(yh - y, optimizer)

            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001

            epoch_train_acc += (yh.argmax(axis=1) == y.argmax(axis=1)).sum()
    with model.use_params(trainer.optimizer.averages):
        print(model.evaluate(dev_X, model.ops.flatten(dev_y)))
Example #9
0
def main(depth=2, width=512, nb_epoch=20):
    if CupyOps.xp != None:
        Model.ops = CupyOps()
    # Configuration here isn't especially good. But, for demo..
    with Model.define_operators({'**': clone, '>>': chain}):
        model = ReLu(width)**depth >> Softmax()

    train_data, dev_data, _ = datasets.mnist()
    train_X, train_y = model.ops.unzip(train_data)
    dev_X, dev_y = model.ops.unzip(dev_data)

    dev_y = to_categorical(dev_y)
    with model.begin_training(train_X, train_y) as (trainer, optimizer):
        epoch_loss = [0.]

        def report_progress():
            with model.use_params(optimizer.averages):
                print(epoch_loss[-1], model.evaluate(dev_X, dev_y),
                      trainer.dropout)
            epoch_loss.append(0.)

        trainer.each_epoch.append(report_progress)
        trainer.nb_epoch = nb_epoch
        trainer.dropout = 0.75
        trainer.batch_size = 128
        trainer.dropout_decay = 1e-4
        train_X = model.ops.asarray(train_X, dtype='float32')
        y_onehot = to_categorical(train_y)
        for X, y in trainer.iterate(train_X, y_onehot):
            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            loss = ((yh - y)**2.).sum() / y.shape[0]
            backprop(yh - y, optimizer)
            epoch_loss[-1] += loss
        with model.use_params(optimizer.averages):
            print('Avg dev.: %.3f' % model.evaluate(dev_X, dev_y))
            with open('out.pickle', 'wb') as file_:
                pickle.dump(model, file_, -1)
Example #10
0
def create_embed_relu_relu_softmax(depth, width, vector_length):
    with Model.define_operators({">>": chain}):
        model = with_flatten(
            Embed(width, vector_length) >> ExtractWindow(
                nW=1) >> ReLu(width) >> ReLu(width) >> Softmax(20))
    return model
Example #11
0
def create_elu_maxout_softmax(depth, width):
    with Model.define_operators({">>": chain}):
        model = ELU(width) >> Maxout(5) >> Softmax()
    return model
Example #12
0
def create_relu_batchnorm_softmax(depth, width):
    with Model.define_operators({"*": clone, ">>": chain}):
        model = BatchNorm(ReLu(width, 784)) >> Softmax(10, width)
    return model
Example #13
0
def main(nr_epoch=20,
         nr_sent=0,
         width=128,
         depth=3,
         max_batch_size=32,
         dropout=0.3):
    print("Loading spaCy and preprocessing")
    nlp = spacy.load("en", parser=False, tagger=False, entity=False)
    train_sents, dev_sents, _ = datasets.ewtb_pos_tags()
    train_sents, dev_sents, nr_class = spacy_preprocess(
        nlp, train_sents, dev_sents)
    if nr_sent >= 1:
        train_sents = train_sents[:nr_sent]

    print("Building the model")
    with Model.define_operators({">>": chain, "|": concatenate, "**": clone}):
        model = (Orth >> SpacyVectors(nlp, width) >>
                 (ExtractWindow(nW=1) >> BatchNorm(Maxout(width)))**depth >>
                 Softmax(nr_class))

    print("Preparing training")
    dev_X, dev_y = zip(*dev_sents)
    dev_y = model.ops.flatten(dev_y)
    dev_y = to_categorical(dev_y, nb_classes=50)
    train_X, train_y = zip(*train_sents)
    with model.begin_training(train_X, train_y) as (trainer, optimizer):
        trainer.nb_epoch = nr_epoch
        trainer.dropout = dropout
        trainer.dropout_decay = 1e-4
        trainer.batch_size = 1
        epoch_times = [timer()]
        epoch_loss = [0.0]
        n_train = sum(len(y) for y in train_y)

        def track_progress():
            start = timer()
            acc = model.evaluate(dev_X, dev_y)
            end = timer()
            with model.use_params(optimizer.averages):
                avg_acc = model.evaluate(dev_X, dev_y)
            stats = (
                epoch_loss[-1],
                acc,
                avg_acc,
                n_train,
                (end - epoch_times[-1]),
                n_train / (end - epoch_times[-1]),
                len(dev_y),
                (end - start),
                float(dev_y.shape[0]) / (end - start),
                trainer.dropout,
            )
            print(
                len(epoch_loss),
                "%.3f train, %.3f (%.3f) dev, %d/%d=%d wps train, %d/%.3f=%d wps run. d.o.=%.3f"
                % stats,
            )
            epoch_times.append(end)
            epoch_loss.append(0.0)

        trainer.each_epoch.append(track_progress)
        print("Training")
        batch_size = 1.0
        for examples, truth in trainer.iterate(train_X, train_y):
            truth = to_categorical(model.ops.flatten(truth), nb_classes=50)
            guess, finish_update = model.begin_update(examples,
                                                      drop=trainer.dropout)
            n_correct = (guess.argmax(axis=1) == truth.argmax(axis=1)).sum()
            finish_update(guess - truth, optimizer)
            epoch_loss[-1] += n_correct / n_train
            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001
    with model.use_params(optimizer.averages):
        print("End: %.3f" % model.evaluate(dev_X, dev_y))