Example #1
0
def main(width=32, vector_length=8):
    train_data, check_data, nr_tag = ancora_pos_tags()

    model = FeedForward(
        (layerize(flatten_sequences), BatchNorm(Embed(width, vector_length)),
         ExtractWindow(nW=2), BatchNorm(ReLu(width)), BatchNorm(ReLu(width)),
         Softmax(nr_tag)))

    train_X, train_y = zip(*train_data)
    dev_X, dev_y = zip(*check_data)
    dev_y = model.ops.flatten(dev_y)
    with model.begin_training(train_X, train_y) as (trainer, optimizer):
        trainer.batch_size = 8
        trainer.nb_epoch = 10
        trainer.dropout = 0.2
        trainer.dropout_decay = 0.
        trainer.each_epoch.append(lambda: print(model.evaluate(dev_X, dev_y)))
        for X, y in trainer.iterate(train_X, train_y):
            y = model.ops.flatten(y)
            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            d_loss, loss = categorical_crossentropy(yh, y)
            optimizer.set_loss(loss)
            backprop(d_loss, optimizer)
    with model.use_params(optimizer.averages):
        print(model.evaluate(dev_X, dev_y))
Example #2
0
def main(width=100,
         depth=4,
         vector_length=64,
         min_batch_size=1,
         max_batch_size=32,
         learn_rate=0.001,
         momentum=0.9,
         dropout=0.5,
         dropout_decay=1e-4,
         nb_epoch=20,
         L2=1e-6):
    cfg = dict(locals())
    print(cfg)
    if cupy is not None:
        print("Using GPU")
        Model.ops = CupyOps()
    train_data, check_data, nr_tag = ancora_pos_tags()

    extracter = FeatureExtracter('es', attrs=[LOWER, SHAPE, PREFIX, SUFFIX])
    Model.lsuv = True
    with Model.define_operators({
            '**': clone,
            '>>': chain,
            '+': add,
            '|': concatenate
    }):
        lower_case = HashEmbed(width, 100, column=0)
        shape = HashEmbed(width // 2, 200, column=1)
        prefix = HashEmbed(width // 2, 100, column=2)
        suffix = HashEmbed(width // 2, 100, column=3)

        model = (with_flatten(
            (lower_case | shape | prefix | suffix) >> Maxout(width, pieces=3)
            >> Residual(ExtractWindow(nW=1) >> Maxout(width, pieces=3))**depth
            >> Softmax(nr_tag),
            pad=depth))

    train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag)
    dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag)

    n_train = float(sum(len(x) for x in train_X))
    global epoch_train_acc
    with model.begin_training(train_X[:5000], train_y[:5000],
                              **cfg) as (trainer, optimizer):
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = min_batch_size
        batch_size = float(min_batch_size)
        for X, y in trainer.iterate(train_X, train_y):
            yh, backprop = model.begin_update(X, drop=trainer.dropout)

            gradient = [yh[i] - y[i] for i in range(len(yh))]

            backprop(gradient, optimizer)

            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001
    with model.use_params(trainer.optimizer.averages):
        print(model.evaluate(dev_X, model.ops.flatten(dev_y)))
        with open('/tmp/model.pickle', 'wb') as file_:
            pickle.dump(model, file_)
Example #3
0
def main(width=128,
         depth=1,
         vector_length=128,
         min_batch_size=16,
         max_batch_size=16,
         learn_rate=0.001,
         momentum=0.9,
         dropout=0.5,
         dropout_decay=1e-4,
         nb_epoch=20,
         L2=1e-6):
    using_gpu = prefer_gpu()
    if using_gpu:
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
    cfg = dict(locals())
    print(cfg)
    train_data, check_data, nr_tag = ancora_pos_tags()
    train_data = list(train_data)
    check_data = list(check_data)

    extracter = FeatureExtracter('es', attrs=[LOWER, SHAPE, PREFIX, SUFFIX])
    with Model.define_operators({
            '**': clone,
            '>>': chain,
            '+': add,
            '|': concatenate
    }):
        lower_case = HashEmbed(width, 100, column=0)
        shape = HashEmbed(width // 2, 200, column=1)
        prefix = HashEmbed(width // 2, 100, column=2)
        suffix = HashEmbed(width // 2, 100, column=3)

        model = (with_flatten(
            (lower_case | shape | prefix | suffix) >> Maxout(width, pieces=3))
                 >> PyTorchBiLSTM(width, width, depth) >> with_flatten(
                     Softmax(nr_tag)))

    train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag)
    dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag)

    n_train = float(sum(len(x) for x in train_X))
    global epoch_train_acc
    with model.begin_training(train_X[:10], train_y[:10],
                              **cfg) as (trainer, optimizer):
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = min_batch_size
        batch_size = float(min_batch_size)
        for X, y in trainer.iterate(train_X, train_y):
            yh, backprop = model.begin_update(X, drop=trainer.dropout)

            gradient = [yh[i] - y[i] for i in range(len(yh))]

            backprop(gradient, optimizer)

            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001
    print(model.evaluate(dev_X, model.ops.flatten(dev_y)))
    with open('/tmp/model.pickle', 'wb') as file_:
        pickle.dump(model, file_)
Example #4
0
def main(
    width=100,
    depth=4,
    vector_length=64,
    min_batch_size=1,
    max_batch_size=32,
    learn_rate=0.001,
    momentum=0.9,
    dropout=0.5,
    dropout_decay=1e-4,
    nb_epoch=20,
    L2=1e-6,
):
    cfg = dict(locals())
    print(cfg)
    prefer_gpu()
    train_data, check_data, nr_tag = ancora_pos_tags()

    extracter = FeatureExtracter("es", attrs=[LOWER, SHAPE, PREFIX, SUFFIX])
    Model.lsuv = True
    with Model.define_operators({"**": clone, ">>": chain, "+": add, "|": concatenate}):
        lower_case = HashEmbed(width, 100, column=0)
        shape = HashEmbed(width // 2, 200, column=1)
        prefix = HashEmbed(width // 2, 100, column=2)
        suffix = HashEmbed(width // 2, 100, column=3)

        model = with_flatten(
            (lower_case | shape | prefix | suffix)
            >> Maxout(width, pieces=3)
            >> Residual(ExtractWindow(nW=1) >> Maxout(width, pieces=3)) ** depth
            >> Softmax(nr_tag),
            pad=depth,
        )

    train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag)
    dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag)

    n_train = float(sum(len(x) for x in train_X))
    global epoch_train_acc
    with model.begin_training(train_X[:5000], train_y[:5000], **cfg) as (
        trainer,
        optimizer,
    ):
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = min_batch_size
        batch_size = float(min_batch_size)
        for X, y in trainer.iterate(train_X, train_y):
            yh, backprop = model.begin_update(X, drop=trainer.dropout)

            gradient = [yh[i] - y[i] for i in range(len(yh))]

            backprop(gradient, optimizer)

            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001
    with model.use_params(trainer.optimizer.averages):
        print(model.evaluate(dev_X, model.ops.flatten(dev_y)))
        with open("/tmp/model.pickle", "wb") as file_:
            pickle.dump(model, file_)
Example #5
0
def main(width=64,
         depth=2,
         vector_length=64,
         min_batch_size=1,
         max_batch_size=32,
         dropout=0.9,
         dropout_decay=1e-3,
         nb_epoch=20,
         L2=1e-6):
    cfg = dict(locals())
    print(cfg)
    if cupy is not None:
        print("Using GPU")
        Model.ops = CupyOps()
    train_data, check_data, nr_tag = ancora_pos_tags()

    extracter = FeatureExtracter('es', attrs=[LOWER, SHAPE, PREFIX, SUFFIX])
    with Model.define_operators({
            '**': clone,
            '>>': chain,
            '+': add,
            '|': concatenate
    }):
        lower_case = Embed(width, vector_length, 5000, column=0)
        prefix = Embed(width, vector_length, 5000, column=2)
        suffix = Embed(width, vector_length, 5000, column=3)

        model = (layerize(flatten_sequences) >> (lower_case + prefix + suffix)
                 >> Residual(ExtractWindow(nW=1) >> Maxout(width))**depth >>
                 Softmax(nr_tag))

    train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag)
    dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag)

    n_train = float(sum(len(x) for x in train_X))
    global epoch_train_acc
    with model.begin_training(train_X, train_y, **cfg) as (trainer, optimizer):
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = min_batch_size
        batch_size = float(min_batch_size)
        for X, y in trainer.iterate(train_X, train_y):
            y = model.ops.flatten(y)

            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            loss = ((yh - y)**2).sum() / y.shape[0]
            if loss > 0.:
                optimizer.set_loss(loss)

            backprop(yh - y, optimizer)

            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001

            epoch_train_acc += (yh.argmax(axis=1) == y.argmax(axis=1)).sum()
            if epoch_train_acc / n_train >= 0.999:
                break
    with model.use_params(trainer.optimizer.averages):
        print(model.evaluate(dev_X, model.ops.flatten(dev_y)))
Example #6
0
def main(width=32, nr_vector=1000):
    train_data, check_data, nr_tag = ancora_pos_tags(encode_words=True)

    model = with_flatten(
        chain(HashEmbed(width, 1000), ReLu(width, width), ReLu(width, width),
              Softmax(nr_tag, width)))

    train_X, train_y = zip(*train_data)
    dev_X, dev_y = zip(*check_data)
    train_y = [to_categorical(y, nb_classes=nr_tag) for y in train_y]
    dev_y = [to_categorical(y, nb_classes=nr_tag) for y in dev_y]
    with model.begin_training(train_X, train_y) as (trainer, optimizer):
        trainer.each_epoch.append(lambda: print(model.evaluate(dev_X, dev_y)))
        for X, y in trainer.iterate(train_X, train_y):
            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            backprop([yh[i] - y[i] for i in range(len(yh))], optimizer)
    with model.use_params(optimizer.averages):
        print(model.evaluate(dev_X, dev_y))
Example #7
0
def main(width=32, nr_vector=1000):
    train_data, check_data, nr_tag = ancora_pos_tags(encode_words=True)

    model = with_flatten(
                 chain(
                    HashEmbed(width, nr_vector),
                    ReLu(width, width),
                    ReLu(width, width),
                    Softmax(nr_tag, width)))

    train_X, train_y = zip(*train_data)
    dev_X, dev_y = zip(*check_data)
    train_y = [to_categorical(y, nb_classes=nr_tag) for y in train_y]
    dev_y = [to_categorical(y, nb_classes=nr_tag) for y in dev_y]
    with model.begin_training(train_X, train_y) as (trainer, optimizer):
        trainer.each_epoch.append(
            lambda: print(model.evaluate(dev_X, dev_y)))
        for X, y in trainer.iterate(train_X, train_y):
            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            backprop([yh[i]-y[i] for i in range(len(yh))], optimizer)
    with model.use_params(optimizer.averages):
        print(model.evaluate(dev_X, dev_y))
Example #8
0
def main(width=128,
         depth=4,
         vector_length=64,
         max_batch_size=32,
         dropout=0.9,
         drop_decay=1e-4,
         nb_epoch=20,
         L2=1e-5):
    cfg = dict(locals())
    Model.ops = CupyOps()
    train_data, check_data, nr_tag = ancora_pos_tags()

    with Model.define_operators({'**': clone, '>>': chain}):
        model = (layerize(flatten_sequences) >> Embed(width, vector_length) >>
                 (ExtractWindow(nW=1) >> Maxout(width, pieces=3))**depth >>
                 Softmax(nr_tag))

    train_X, train_y = preprocess(model.ops, train_data, nr_tag)
    dev_X, dev_y = preprocess(model.ops, check_data, nr_tag)

    n_train = float(sum(len(x) for x in train_X))
    global epoch_train_acc
    with model.begin_training(train_X, train_y, **cfg) as (trainer, optimizer):
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = 1
        batch_size = 1.
        for X, y in trainer.iterate(train_X, train_y):
            y = model.ops.flatten(y)

            yh, backprop = model.begin_update(X, drop=trainer.dropout)
            backprop(yh - y, optimizer)

            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001

            epoch_train_acc += (yh.argmax(axis=1) == y.argmax(axis=1)).sum()
    with model.use_params(trainer.optimizer.averages):
        print(model.evaluate(dev_X, model.ops.flatten(dev_y)))
Example #9
0
def ancora():
    train_data, check_data, nr_class = datasets.ancora_pos_tags()
    train_X, train_y = zip(*train_data)
    dev_X, dev_y = zip(*check_data)
    return (train_X[:100], train_y[:100]), (dev_X, dev_y)
Example #10
0
def main(
    width=128,
    depth=1,
    vector_length=128,
    min_batch_size=16,
    max_batch_size=16,
    learn_rate=0.001,
    momentum=0.9,
    dropout=0.5,
    dropout_decay=1e-4,
    nb_epoch=20,
    L2=1e-6,
):
    using_gpu = prefer_gpu()
    if using_gpu:
        torch.set_default_tensor_type("torch.cuda.FloatTensor")
    cfg = dict(locals())
    print(cfg)
    train_data, check_data, nr_tag = ancora_pos_tags()
    train_data = list(train_data)
    check_data = list(check_data)

    extracter = FeatureExtracter("es", attrs=[LOWER, SHAPE, PREFIX, SUFFIX])
    with Model.define_operators({"**": clone, ">>": chain, "+": add, "|": concatenate}):
        lower_case = HashEmbed(width, 100, column=0)
        shape = HashEmbed(width // 2, 200, column=1)
        prefix = HashEmbed(width // 2, 100, column=2)
        suffix = HashEmbed(width // 2, 100, column=3)

        model = (
            with_flatten(
                (lower_case | shape | prefix | suffix) >> Maxout(width, pieces=3)
            )
            >> PyTorchBiLSTM(width, width, depth)
            >> with_flatten(Softmax(nr_tag))
        )

    train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag)
    dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag)

    n_train = float(sum(len(x) for x in train_X))
    global epoch_train_acc
    with model.begin_training(train_X[:10], train_y[:10], **cfg) as (
        trainer,
        optimizer,
    ):
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = min_batch_size
        batch_size = float(min_batch_size)
        for X, y in trainer.iterate(train_X, train_y):
            yh, backprop = model.begin_update(X, drop=trainer.dropout)

            gradient = [yh[i] - y[i] for i in range(len(yh))]

            backprop(gradient, optimizer)

            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001
    print(model.evaluate(dev_X, model.ops.flatten(dev_y)))
    with open("/tmp/model.pickle", "wb") as file_:
        pickle.dump(model, file_)
Example #11
0
def main(
    width=100,
    depth=4,
    vector_length=64,
    min_batch_size=1,
    max_batch_size=32,
    learn_rate=0.001,
    momentum=0.9,
    dropout=0.5,
    dropout_decay=1e-4,
    nb_epoch=20,
    L2=1e-6,
):
    cfg = dict(locals())
    print(cfg)
    prefer_gpu()
    train_data, check_data, nr_tag = ancora_pos_tags()

    extracter = FeatureExtracter("es", attrs=[LOWER, SHAPE, PREFIX, SUFFIX])
    Model.lsuv = True
    with Model.define_operators({
            "**": clone,
            ">>": chain,
            "+": add,
            "|": concatenate
    }):
        lower_case = HashEmbed(width, 100, column=0)
        shape = HashEmbed(width // 2, 200, column=1)
        prefix = HashEmbed(width // 2, 100, column=2)
        suffix = HashEmbed(width // 2, 100, column=3)

        model = (with_flatten(
            (lower_case | shape | prefix | suffix) >> Maxout(width, pieces=3),
            pad=depth) >> with_pad_and_mask(
                MultiHeadedAttention(nM=width, nH=4)) >> with_flatten(
                    Softmax(nr_tag)))

    train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag)
    dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag)

    n_train = float(sum(len(x) for x in train_X))
    global epoch_train_acc
    with model.begin_training(train_X[:5000], train_y[:5000], **cfg) as (
            trainer,
            optimizer,
    ):
        trainer.each_epoch.append(track_progress(**locals()))
        trainer.batch_size = min_batch_size
        batch_size = float(min_batch_size)
        for X, y in trainer.iterate(train_X, train_y):
            yh, backprop = model.begin_update(X, drop=trainer.dropout)

            gradient = [yh[i] - y[i] for i in range(len(yh))]

            backprop(gradient, optimizer)

            trainer.batch_size = min(int(batch_size), max_batch_size)
            batch_size *= 1.001
    with model.use_params(trainer.optimizer.averages):
        print(model.evaluate(dev_X, model.ops.flatten(dev_y)))
        with open("/tmp/model.pickle", "wb") as file_:
            pickle.dump(model, file_)