def main(width=32, vector_length=8): train_data, check_data, nr_tag = ancora_pos_tags() model = FeedForward( (layerize(flatten_sequences), BatchNorm(Embed(width, vector_length)), ExtractWindow(nW=2), BatchNorm(ReLu(width)), BatchNorm(ReLu(width)), Softmax(nr_tag))) train_X, train_y = zip(*train_data) dev_X, dev_y = zip(*check_data) dev_y = model.ops.flatten(dev_y) with model.begin_training(train_X, train_y) as (trainer, optimizer): trainer.batch_size = 8 trainer.nb_epoch = 10 trainer.dropout = 0.2 trainer.dropout_decay = 0. trainer.each_epoch.append(lambda: print(model.evaluate(dev_X, dev_y))) for X, y in trainer.iterate(train_X, train_y): y = model.ops.flatten(y) yh, backprop = model.begin_update(X, drop=trainer.dropout) d_loss, loss = categorical_crossentropy(yh, y) optimizer.set_loss(loss) backprop(d_loss, optimizer) with model.use_params(optimizer.averages): print(model.evaluate(dev_X, dev_y))
def main(width=100, depth=4, vector_length=64, min_batch_size=1, max_batch_size=32, learn_rate=0.001, momentum=0.9, dropout=0.5, dropout_decay=1e-4, nb_epoch=20, L2=1e-6): cfg = dict(locals()) print(cfg) if cupy is not None: print("Using GPU") Model.ops = CupyOps() train_data, check_data, nr_tag = ancora_pos_tags() extracter = FeatureExtracter('es', attrs=[LOWER, SHAPE, PREFIX, SUFFIX]) Model.lsuv = True with Model.define_operators({ '**': clone, '>>': chain, '+': add, '|': concatenate }): lower_case = HashEmbed(width, 100, column=0) shape = HashEmbed(width // 2, 200, column=1) prefix = HashEmbed(width // 2, 100, column=2) suffix = HashEmbed(width // 2, 100, column=3) model = (with_flatten( (lower_case | shape | prefix | suffix) >> Maxout(width, pieces=3) >> Residual(ExtractWindow(nW=1) >> Maxout(width, pieces=3))**depth >> Softmax(nr_tag), pad=depth)) train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag) dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag) n_train = float(sum(len(x) for x in train_X)) global epoch_train_acc with model.begin_training(train_X[:5000], train_y[:5000], **cfg) as (trainer, optimizer): trainer.each_epoch.append(track_progress(**locals())) trainer.batch_size = min_batch_size batch_size = float(min_batch_size) for X, y in trainer.iterate(train_X, train_y): yh, backprop = model.begin_update(X, drop=trainer.dropout) gradient = [yh[i] - y[i] for i in range(len(yh))] backprop(gradient, optimizer) trainer.batch_size = min(int(batch_size), max_batch_size) batch_size *= 1.001 with model.use_params(trainer.optimizer.averages): print(model.evaluate(dev_X, model.ops.flatten(dev_y))) with open('/tmp/model.pickle', 'wb') as file_: pickle.dump(model, file_)
def main(width=128, depth=1, vector_length=128, min_batch_size=16, max_batch_size=16, learn_rate=0.001, momentum=0.9, dropout=0.5, dropout_decay=1e-4, nb_epoch=20, L2=1e-6): using_gpu = prefer_gpu() if using_gpu: torch.set_default_tensor_type('torch.cuda.FloatTensor') cfg = dict(locals()) print(cfg) train_data, check_data, nr_tag = ancora_pos_tags() train_data = list(train_data) check_data = list(check_data) extracter = FeatureExtracter('es', attrs=[LOWER, SHAPE, PREFIX, SUFFIX]) with Model.define_operators({ '**': clone, '>>': chain, '+': add, '|': concatenate }): lower_case = HashEmbed(width, 100, column=0) shape = HashEmbed(width // 2, 200, column=1) prefix = HashEmbed(width // 2, 100, column=2) suffix = HashEmbed(width // 2, 100, column=3) model = (with_flatten( (lower_case | shape | prefix | suffix) >> Maxout(width, pieces=3)) >> PyTorchBiLSTM(width, width, depth) >> with_flatten( Softmax(nr_tag))) train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag) dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag) n_train = float(sum(len(x) for x in train_X)) global epoch_train_acc with model.begin_training(train_X[:10], train_y[:10], **cfg) as (trainer, optimizer): trainer.each_epoch.append(track_progress(**locals())) trainer.batch_size = min_batch_size batch_size = float(min_batch_size) for X, y in trainer.iterate(train_X, train_y): yh, backprop = model.begin_update(X, drop=trainer.dropout) gradient = [yh[i] - y[i] for i in range(len(yh))] backprop(gradient, optimizer) trainer.batch_size = min(int(batch_size), max_batch_size) batch_size *= 1.001 print(model.evaluate(dev_X, model.ops.flatten(dev_y))) with open('/tmp/model.pickle', 'wb') as file_: pickle.dump(model, file_)
def main( width=100, depth=4, vector_length=64, min_batch_size=1, max_batch_size=32, learn_rate=0.001, momentum=0.9, dropout=0.5, dropout_decay=1e-4, nb_epoch=20, L2=1e-6, ): cfg = dict(locals()) print(cfg) prefer_gpu() train_data, check_data, nr_tag = ancora_pos_tags() extracter = FeatureExtracter("es", attrs=[LOWER, SHAPE, PREFIX, SUFFIX]) Model.lsuv = True with Model.define_operators({"**": clone, ">>": chain, "+": add, "|": concatenate}): lower_case = HashEmbed(width, 100, column=0) shape = HashEmbed(width // 2, 200, column=1) prefix = HashEmbed(width // 2, 100, column=2) suffix = HashEmbed(width // 2, 100, column=3) model = with_flatten( (lower_case | shape | prefix | suffix) >> Maxout(width, pieces=3) >> Residual(ExtractWindow(nW=1) >> Maxout(width, pieces=3)) ** depth >> Softmax(nr_tag), pad=depth, ) train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag) dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag) n_train = float(sum(len(x) for x in train_X)) global epoch_train_acc with model.begin_training(train_X[:5000], train_y[:5000], **cfg) as ( trainer, optimizer, ): trainer.each_epoch.append(track_progress(**locals())) trainer.batch_size = min_batch_size batch_size = float(min_batch_size) for X, y in trainer.iterate(train_X, train_y): yh, backprop = model.begin_update(X, drop=trainer.dropout) gradient = [yh[i] - y[i] for i in range(len(yh))] backprop(gradient, optimizer) trainer.batch_size = min(int(batch_size), max_batch_size) batch_size *= 1.001 with model.use_params(trainer.optimizer.averages): print(model.evaluate(dev_X, model.ops.flatten(dev_y))) with open("/tmp/model.pickle", "wb") as file_: pickle.dump(model, file_)
def main(width=64, depth=2, vector_length=64, min_batch_size=1, max_batch_size=32, dropout=0.9, dropout_decay=1e-3, nb_epoch=20, L2=1e-6): cfg = dict(locals()) print(cfg) if cupy is not None: print("Using GPU") Model.ops = CupyOps() train_data, check_data, nr_tag = ancora_pos_tags() extracter = FeatureExtracter('es', attrs=[LOWER, SHAPE, PREFIX, SUFFIX]) with Model.define_operators({ '**': clone, '>>': chain, '+': add, '|': concatenate }): lower_case = Embed(width, vector_length, 5000, column=0) prefix = Embed(width, vector_length, 5000, column=2) suffix = Embed(width, vector_length, 5000, column=3) model = (layerize(flatten_sequences) >> (lower_case + prefix + suffix) >> Residual(ExtractWindow(nW=1) >> Maxout(width))**depth >> Softmax(nr_tag)) train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag) dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag) n_train = float(sum(len(x) for x in train_X)) global epoch_train_acc with model.begin_training(train_X, train_y, **cfg) as (trainer, optimizer): trainer.each_epoch.append(track_progress(**locals())) trainer.batch_size = min_batch_size batch_size = float(min_batch_size) for X, y in trainer.iterate(train_X, train_y): y = model.ops.flatten(y) yh, backprop = model.begin_update(X, drop=trainer.dropout) loss = ((yh - y)**2).sum() / y.shape[0] if loss > 0.: optimizer.set_loss(loss) backprop(yh - y, optimizer) trainer.batch_size = min(int(batch_size), max_batch_size) batch_size *= 1.001 epoch_train_acc += (yh.argmax(axis=1) == y.argmax(axis=1)).sum() if epoch_train_acc / n_train >= 0.999: break with model.use_params(trainer.optimizer.averages): print(model.evaluate(dev_X, model.ops.flatten(dev_y)))
def main(width=32, nr_vector=1000): train_data, check_data, nr_tag = ancora_pos_tags(encode_words=True) model = with_flatten( chain(HashEmbed(width, 1000), ReLu(width, width), ReLu(width, width), Softmax(nr_tag, width))) train_X, train_y = zip(*train_data) dev_X, dev_y = zip(*check_data) train_y = [to_categorical(y, nb_classes=nr_tag) for y in train_y] dev_y = [to_categorical(y, nb_classes=nr_tag) for y in dev_y] with model.begin_training(train_X, train_y) as (trainer, optimizer): trainer.each_epoch.append(lambda: print(model.evaluate(dev_X, dev_y))) for X, y in trainer.iterate(train_X, train_y): yh, backprop = model.begin_update(X, drop=trainer.dropout) backprop([yh[i] - y[i] for i in range(len(yh))], optimizer) with model.use_params(optimizer.averages): print(model.evaluate(dev_X, dev_y))
def main(width=32, nr_vector=1000): train_data, check_data, nr_tag = ancora_pos_tags(encode_words=True) model = with_flatten( chain( HashEmbed(width, nr_vector), ReLu(width, width), ReLu(width, width), Softmax(nr_tag, width))) train_X, train_y = zip(*train_data) dev_X, dev_y = zip(*check_data) train_y = [to_categorical(y, nb_classes=nr_tag) for y in train_y] dev_y = [to_categorical(y, nb_classes=nr_tag) for y in dev_y] with model.begin_training(train_X, train_y) as (trainer, optimizer): trainer.each_epoch.append( lambda: print(model.evaluate(dev_X, dev_y))) for X, y in trainer.iterate(train_X, train_y): yh, backprop = model.begin_update(X, drop=trainer.dropout) backprop([yh[i]-y[i] for i in range(len(yh))], optimizer) with model.use_params(optimizer.averages): print(model.evaluate(dev_X, dev_y))
def main(width=128, depth=4, vector_length=64, max_batch_size=32, dropout=0.9, drop_decay=1e-4, nb_epoch=20, L2=1e-5): cfg = dict(locals()) Model.ops = CupyOps() train_data, check_data, nr_tag = ancora_pos_tags() with Model.define_operators({'**': clone, '>>': chain}): model = (layerize(flatten_sequences) >> Embed(width, vector_length) >> (ExtractWindow(nW=1) >> Maxout(width, pieces=3))**depth >> Softmax(nr_tag)) train_X, train_y = preprocess(model.ops, train_data, nr_tag) dev_X, dev_y = preprocess(model.ops, check_data, nr_tag) n_train = float(sum(len(x) for x in train_X)) global epoch_train_acc with model.begin_training(train_X, train_y, **cfg) as (trainer, optimizer): trainer.each_epoch.append(track_progress(**locals())) trainer.batch_size = 1 batch_size = 1. for X, y in trainer.iterate(train_X, train_y): y = model.ops.flatten(y) yh, backprop = model.begin_update(X, drop=trainer.dropout) backprop(yh - y, optimizer) trainer.batch_size = min(int(batch_size), max_batch_size) batch_size *= 1.001 epoch_train_acc += (yh.argmax(axis=1) == y.argmax(axis=1)).sum() with model.use_params(trainer.optimizer.averages): print(model.evaluate(dev_X, model.ops.flatten(dev_y)))
def ancora(): train_data, check_data, nr_class = datasets.ancora_pos_tags() train_X, train_y = zip(*train_data) dev_X, dev_y = zip(*check_data) return (train_X[:100], train_y[:100]), (dev_X, dev_y)
def main( width=128, depth=1, vector_length=128, min_batch_size=16, max_batch_size=16, learn_rate=0.001, momentum=0.9, dropout=0.5, dropout_decay=1e-4, nb_epoch=20, L2=1e-6, ): using_gpu = prefer_gpu() if using_gpu: torch.set_default_tensor_type("torch.cuda.FloatTensor") cfg = dict(locals()) print(cfg) train_data, check_data, nr_tag = ancora_pos_tags() train_data = list(train_data) check_data = list(check_data) extracter = FeatureExtracter("es", attrs=[LOWER, SHAPE, PREFIX, SUFFIX]) with Model.define_operators({"**": clone, ">>": chain, "+": add, "|": concatenate}): lower_case = HashEmbed(width, 100, column=0) shape = HashEmbed(width // 2, 200, column=1) prefix = HashEmbed(width // 2, 100, column=2) suffix = HashEmbed(width // 2, 100, column=3) model = ( with_flatten( (lower_case | shape | prefix | suffix) >> Maxout(width, pieces=3) ) >> PyTorchBiLSTM(width, width, depth) >> with_flatten(Softmax(nr_tag)) ) train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag) dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag) n_train = float(sum(len(x) for x in train_X)) global epoch_train_acc with model.begin_training(train_X[:10], train_y[:10], **cfg) as ( trainer, optimizer, ): trainer.each_epoch.append(track_progress(**locals())) trainer.batch_size = min_batch_size batch_size = float(min_batch_size) for X, y in trainer.iterate(train_X, train_y): yh, backprop = model.begin_update(X, drop=trainer.dropout) gradient = [yh[i] - y[i] for i in range(len(yh))] backprop(gradient, optimizer) trainer.batch_size = min(int(batch_size), max_batch_size) batch_size *= 1.001 print(model.evaluate(dev_X, model.ops.flatten(dev_y))) with open("/tmp/model.pickle", "wb") as file_: pickle.dump(model, file_)
def main( width=100, depth=4, vector_length=64, min_batch_size=1, max_batch_size=32, learn_rate=0.001, momentum=0.9, dropout=0.5, dropout_decay=1e-4, nb_epoch=20, L2=1e-6, ): cfg = dict(locals()) print(cfg) prefer_gpu() train_data, check_data, nr_tag = ancora_pos_tags() extracter = FeatureExtracter("es", attrs=[LOWER, SHAPE, PREFIX, SUFFIX]) Model.lsuv = True with Model.define_operators({ "**": clone, ">>": chain, "+": add, "|": concatenate }): lower_case = HashEmbed(width, 100, column=0) shape = HashEmbed(width // 2, 200, column=1) prefix = HashEmbed(width // 2, 100, column=2) suffix = HashEmbed(width // 2, 100, column=3) model = (with_flatten( (lower_case | shape | prefix | suffix) >> Maxout(width, pieces=3), pad=depth) >> with_pad_and_mask( MultiHeadedAttention(nM=width, nH=4)) >> with_flatten( Softmax(nr_tag))) train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag) dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag) n_train = float(sum(len(x) for x in train_X)) global epoch_train_acc with model.begin_training(train_X[:5000], train_y[:5000], **cfg) as ( trainer, optimizer, ): trainer.each_epoch.append(track_progress(**locals())) trainer.batch_size = min_batch_size batch_size = float(min_batch_size) for X, y in trainer.iterate(train_X, train_y): yh, backprop = model.begin_update(X, drop=trainer.dropout) gradient = [yh[i] - y[i] for i in range(len(yh))] backprop(gradient, optimizer) trainer.batch_size = min(int(batch_size), max_batch_size) batch_size *= 1.001 with model.use_params(trainer.optimizer.averages): print(model.evaluate(dev_X, model.ops.flatten(dev_y))) with open("/tmp/model.pickle", "wb") as file_: pickle.dump(model, file_)