def main(loc, width=64, depth=2, batch_size=128, dropout=0.5, dropout_decay=1e-5, nb_epoch=20): print("Load spaCy") nlp = spacy.load('en', parser=False, entity=False, matcher=False, tagger=False) print("Construct model") Model.ops = CupyOps() with Model.define_operators({'>>': chain, '**': clone, '|': concatenate}): mwe_encode = ExtractWindow(nW=1) >> Maxout(width, width * 3) sent2vec = (get_word_ids >> flatten_add_lengths >> with_getitem( 0, SpacyVectors(nlp, width) >> mwe_encode**depth) >> Pooling( mean_pool, max_pool)) model = (((Arg(0) >> sent2vec) | (Arg(1) >> sent2vec)) >> Maxout(width, width * 4) >> Maxout( width, width)**depth >> Softmax(2, width)) print("Read and parse quora data") rows = read_quora_tsv_data(pathlib.Path(loc)) train, dev = partition(rows, 0.9) train_X, train_y = create_data(model.ops, nlp, train) dev_X, dev_y = create_data(model.ops, nlp, dev) print("Train") with model.begin_training(train_X[:20000], train_y[:20000]) as (trainer, optimizer): trainer.batch_size = batch_size trainer.nb_epoch = nb_epoch trainer.dropout = dropout trainer.dropout_decay = dropout_decay epoch_times = [timer()] epoch_loss = [0.] n_train_words = sum(len(d0) + len(d1) for d0, d1 in train_X) n_dev_words = sum(len(d0) + len(d1) for d0, d1 in dev_X) def track_progress(): stats = get_stats(model, optimizer.averages, dev_X, dev_y, epoch_loss[-1], epoch_times[-1], n_train_words, n_dev_words) stats.append(trainer.dropout) stats = tuple(stats) print( len(epoch_loss), "%.3f loss, %.3f (%.3f) acc, %d/%d=%d wps train, %d/%.3f=%d wps run. d.o.=%.3f" % stats) epoch_times.append(timer()) epoch_loss.append(0.) trainer.each_epoch.append(track_progress) for X, y in trainer.iterate(train_X, train_y): yh, backprop = model.begin_update(X, drop=trainer.dropout) backprop(yh - y, optimizer)
def main(width=100, depth=4, vector_length=64, min_batch_size=1, max_batch_size=32, learn_rate=0.001, momentum=0.9, dropout=0.5, dropout_decay=1e-4, nb_epoch=20, L2=1e-6): cfg = dict(locals()) print(cfg) if cupy is not None: print("Using GPU") Model.ops = CupyOps() train_data, check_data, nr_tag = ancora_pos_tags() extracter = FeatureExtracter('es', attrs=[LOWER, SHAPE, PREFIX, SUFFIX]) Model.lsuv = True with Model.define_operators({ '**': clone, '>>': chain, '+': add, '|': concatenate }): lower_case = HashEmbed(width, 100, column=0) shape = HashEmbed(width // 2, 200, column=1) prefix = HashEmbed(width // 2, 100, column=2) suffix = HashEmbed(width // 2, 100, column=3) model = (with_flatten( (lower_case | shape | prefix | suffix) >> Maxout(width, pieces=3) >> Residual(ExtractWindow(nW=1) >> Maxout(width, pieces=3))**depth >> Softmax(nr_tag), pad=depth)) train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag) dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag) n_train = float(sum(len(x) for x in train_X)) global epoch_train_acc with model.begin_training(train_X[:5000], train_y[:5000], **cfg) as (trainer, optimizer): trainer.each_epoch.append(track_progress(**locals())) trainer.batch_size = min_batch_size batch_size = float(min_batch_size) for X, y in trainer.iterate(train_X, train_y): yh, backprop = model.begin_update(X, drop=trainer.dropout) gradient = [yh[i] - y[i] for i in range(len(yh))] backprop(gradient, optimizer) trainer.batch_size = min(int(batch_size), max_batch_size) batch_size *= 1.001 with model.use_params(trainer.optimizer.averages): print(model.evaluate(dev_X, model.ops.flatten(dev_y))) with open('/tmp/model.pickle', 'wb') as file_: pickle.dump(model, file_)
def main(width=32, vector_length=8): train_data, check_data, nr_tag = ancora_pos_tags() model = FeedForward( (layerize(flatten_sequences), BatchNorm(Embed(width, vector_length)), ExtractWindow(nW=2), BatchNorm(ReLu(width)), BatchNorm(ReLu(width)), Softmax(nr_tag))) train_X, train_y = zip(*train_data) dev_X, dev_y = zip(*check_data) dev_y = model.ops.flatten(dev_y) with model.begin_training(train_X, train_y) as (trainer, optimizer): trainer.batch_size = 8 trainer.nb_epoch = 10 trainer.dropout = 0.2 trainer.dropout_decay = 0. trainer.each_epoch.append(lambda: print(model.evaluate(dev_X, dev_y))) for X, y in trainer.iterate(train_X, train_y): y = model.ops.flatten(y) yh, backprop = model.begin_update(X, drop=trainer.dropout) d_loss, loss = categorical_crossentropy(yh, y) optimizer.set_loss(loss) backprop(d_loss, optimizer) with model.use_params(optimizer.averages): print(model.evaluate(dev_X, dev_y))
def main(nr_epoch=20, nr_sent=0, width=128, depth=3): print("Loading spaCy and preprocessing") nlp = spacy.load('en', parser=False, tagger=False, entity=False) train_sents, dev_sents, _ = datasets.ewtb_pos_tags() train_sents, dev_sents, nr_class = spacy_preprocess( nlp, train_sents, dev_sents) if nr_sent >= 1: train_sents = train_sents[:nr_sent] print("Building the model") with Model.define_operators({'>>': chain, '|': concatenate, '**': clone}): model = (Orth >> SpacyVectors(nlp) >> (ExtractWindow(nW=1) >> BatchNorm(Maxout(width)))**depth >> Softmax(nr_class)) print("Preparing training") dev_X, dev_y = zip(*dev_sents) dev_y = model.ops.flatten(dev_y) train_X, train_y = zip(*train_sents) with model.begin_training(train_X, train_y) as (trainer, optimizer): trainer.nb_epoch = nr_epoch trainer.dropout = 0.9 trainer.dropout_decay = 1e-4 trainer.batch_size = 4 epoch_times = [timer()] epoch_loss = [0.] n_train = sum(len(y) for y in train_y) def track_progress(): start = timer() acc = model.evaluate(dev_X, dev_y) end = timer() with model.use_params(optimizer.averages): avg_acc = model.evaluate(dev_X, dev_y) stats = (epoch_loss[-1], acc, avg_acc, n_train, (end - epoch_times[-1]), n_train / (end - epoch_times[-1]), len(dev_y), (end - start), float(dev_y.shape[0]) / (end - start), trainer.dropout) print( len(epoch_loss), "%.3f loss, %.3f (%.3f) acc, %d/%d=%d wps train, %d/%.3f=%d wps run. d.o.=%.3f" % stats) epoch_times.append(end) epoch_loss.append(0.) trainer.each_epoch.append(track_progress) print("Training") for examples, truth in trainer.iterate(train_X, train_y): truth = model.ops.flatten(truth) guess, finish_update = model.begin_update(examples, drop=trainer.dropout) gradient, loss = categorical_crossentropy(guess, truth) if loss: optimizer.set_loss(loss) finish_update(gradient, optimizer) epoch_loss[-1] += loss / n_train with model.use_params(optimizer.averages): print("End: %.3f" % model.evaluate(dev_X, dev_y))
def main(width=64, depth=2, vector_length=64, min_batch_size=1, max_batch_size=32, dropout=0.9, dropout_decay=1e-3, nb_epoch=20, L2=1e-6): cfg = dict(locals()) print(cfg) if cupy is not None: print("Using GPU") Model.ops = CupyOps() train_data, check_data, nr_tag = ancora_pos_tags() extracter = FeatureExtracter('es', attrs=[LOWER, SHAPE, PREFIX, SUFFIX]) with Model.define_operators({ '**': clone, '>>': chain, '+': add, '|': concatenate }): lower_case = Embed(width, vector_length, 5000, column=0) prefix = Embed(width, vector_length, 5000, column=2) suffix = Embed(width, vector_length, 5000, column=3) model = (layerize(flatten_sequences) >> (lower_case + prefix + suffix) >> Residual(ExtractWindow(nW=1) >> Maxout(width))**depth >> Softmax(nr_tag)) train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag) dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag) n_train = float(sum(len(x) for x in train_X)) global epoch_train_acc with model.begin_training(train_X, train_y, **cfg) as (trainer, optimizer): trainer.each_epoch.append(track_progress(**locals())) trainer.batch_size = min_batch_size batch_size = float(min_batch_size) for X, y in trainer.iterate(train_X, train_y): y = model.ops.flatten(y) yh, backprop = model.begin_update(X, drop=trainer.dropout) loss = ((yh - y)**2).sum() / y.shape[0] if loss > 0.: optimizer.set_loss(loss) backprop(yh - y, optimizer) trainer.batch_size = min(int(batch_size), max_batch_size) batch_size *= 1.001 epoch_train_acc += (yh.argmax(axis=1) == y.argmax(axis=1)).sum() if epoch_train_acc / n_train >= 0.999: break with model.use_params(trainer.optimizer.averages): print(model.evaluate(dev_X, model.ops.flatten(dev_y)))
def main(width=300, depth=4, vector_length=64, min_batch_size=1, max_batch_size=32, dropout=0.9, dropout_decay=1e-3, nb_epoch=20, L2=1e-6, device="cpu"): cfg = dict(locals()) print(cfg, file=sys.stderr) if cupy is not None and device != 'cpu': print("Using GPU", file=sys.stderr) Model.ops = CupyOps() Model.ops.device = device train_data, check_data, tag_map = twitter_ner() dev_words, dev_tags = list(zip(*check_data)) nr_tag = len(tag_map) extracter = FeatureExtracter('en', attrs=[ORTH, LOWER, SHAPE, PREFIX, SUFFIX]) Model.lsuv = True with Model.define_operators({'**': clone, '>>': chain, '+': add, '|': concatenate}): glove = StaticVectors('en', width//2, column=0) lower_case = (HashEmbed(width, 500, column=1) + HashEmbed(width, 100, column=1)) shape = HashEmbed(width//2, 200, column=2) prefix = HashEmbed(width//2, 100, column=3) suffix = HashEmbed(width//2, 100, column=4) model = ( layerize(flatten_sequences) >> (lower_case | shape | prefix | suffix) >> BN(Maxout(width, pieces=3), nO=width) >> Residual(ExtractWindow(nW=1) >> BN(Maxout(width, pieces=3), nO=width)) ** depth >> Softmax(nr_tag)) train_X, train_y = preprocess(model.ops, extracter, train_data, nr_tag) dev_X, dev_y = preprocess(model.ops, extracter, check_data, nr_tag) n_train = float(sum(len(x) for x in train_X)) global epoch_train_acc with model.begin_training(train_X, train_y, **cfg) as (trainer, optimizer): trainer.each_epoch.append(track_progress(**locals())) trainer.batch_size = min_batch_size batch_size = float(min_batch_size) for X, y in trainer.iterate(train_X, train_y): y = model.ops.flatten(y) yh, backprop = model.begin_update(X, drop=trainer.dropout) backprop(yh - y, optimizer) trainer.batch_size = min(int(batch_size), max_batch_size) batch_size *= 1.001 epoch_train_acc += (yh.argmax(axis=1) == y.argmax(axis=1)).sum() #if epoch_train_acc / n_train >= 0.999: # break with model.use_params(trainer.optimizer.averages): print(model.evaluate(dev_X, model.ops.flatten(dev_y)), file=sys.stderr) print_dev_sentences(model, dev_words, dev_tags, dev_X, tag_map)
def main(width=32, nr_vector=1000): train_data, check_data, nr_tag = ancora_pos_tags(encode_words=True) model = with_flatten( chain( HashEmbed(width, 1000), ReLu(width, width), ReLu(width, width), Softmax(nr_tag, width))) train_X, train_y = zip(*train_data) dev_X, dev_y = zip(*check_data) train_y = [to_categorical(y, nb_classes=nr_tag) for y in train_y] dev_y = [to_categorical(y, nb_classes=nr_tag) for y in dev_y] with model.begin_training(train_X, train_y) as (trainer, optimizer): trainer.each_epoch.append( lambda: print(model.evaluate(dev_X, dev_y))) for X, y in trainer.iterate(train_X, train_y): yh, backprop = model.begin_update(X, drop=trainer.dropout) backprop([yh[i]-y[i] for i in range(len(yh))], optimizer) with model.use_params(optimizer.averages): print(model.evaluate(dev_X, dev_y))
def main(width=128, depth=4, vector_length=64, max_batch_size=32, dropout=0.9, drop_decay=1e-4, nb_epoch=20, L2=1e-5): cfg = dict(locals()) Model.ops = CupyOps() train_data, check_data, nr_tag = ancora_pos_tags() with Model.define_operators({'**': clone, '>>': chain}): model = (layerize(flatten_sequences) >> Embed(width, vector_length) >> (ExtractWindow(nW=1) >> Maxout(width, pieces=3))**depth >> Softmax(nr_tag)) train_X, train_y = preprocess(model.ops, train_data, nr_tag) dev_X, dev_y = preprocess(model.ops, check_data, nr_tag) n_train = float(sum(len(x) for x in train_X)) global epoch_train_acc with model.begin_training(train_X, train_y, **cfg) as (trainer, optimizer): trainer.each_epoch.append(track_progress(**locals())) trainer.batch_size = 1 batch_size = 1. for X, y in trainer.iterate(train_X, train_y): y = model.ops.flatten(y) yh, backprop = model.begin_update(X, drop=trainer.dropout) backprop(yh - y, optimizer) trainer.batch_size = min(int(batch_size), max_batch_size) batch_size *= 1.001 epoch_train_acc += (yh.argmax(axis=1) == y.argmax(axis=1)).sum() with model.use_params(trainer.optimizer.averages): print(model.evaluate(dev_X, model.ops.flatten(dev_y)))
def main(depth=2, width=512, nb_epoch=20): if CupyOps.xp != None: Model.ops = CupyOps() # Configuration here isn't especially good. But, for demo.. with Model.define_operators({'**': clone, '>>': chain}): model = ReLu(width)**depth >> Softmax() train_data, dev_data, _ = datasets.mnist() train_X, train_y = model.ops.unzip(train_data) dev_X, dev_y = model.ops.unzip(dev_data) dev_y = to_categorical(dev_y) with model.begin_training(train_X, train_y) as (trainer, optimizer): epoch_loss = [0.] def report_progress(): with model.use_params(optimizer.averages): print(epoch_loss[-1], model.evaluate(dev_X, dev_y), trainer.dropout) epoch_loss.append(0.) trainer.each_epoch.append(report_progress) trainer.nb_epoch = nb_epoch trainer.dropout = 0.75 trainer.batch_size = 128 trainer.dropout_decay = 1e-4 train_X = model.ops.asarray(train_X, dtype='float32') y_onehot = to_categorical(train_y) for X, y in trainer.iterate(train_X, y_onehot): yh, backprop = model.begin_update(X, drop=trainer.dropout) loss = ((yh - y)**2.).sum() / y.shape[0] backprop(yh - y, optimizer) epoch_loss[-1] += loss with model.use_params(optimizer.averages): print('Avg dev.: %.3f' % model.evaluate(dev_X, dev_y)) with open('out.pickle', 'wb') as file_: pickle.dump(model, file_, -1)
def create_embed_relu_relu_softmax(depth, width, vector_length): with Model.define_operators({">>": chain}): model = with_flatten( Embed(width, vector_length) >> ExtractWindow( nW=1) >> ReLu(width) >> ReLu(width) >> Softmax(20)) return model
def create_elu_maxout_softmax(depth, width): with Model.define_operators({">>": chain}): model = ELU(width) >> Maxout(5) >> Softmax() return model
def create_relu_batchnorm_softmax(depth, width): with Model.define_operators({"*": clone, ">>": chain}): model = BatchNorm(ReLu(width, 784)) >> Softmax(10, width) return model
def main(nr_epoch=20, nr_sent=0, width=128, depth=3, max_batch_size=32, dropout=0.3): print("Loading spaCy and preprocessing") nlp = spacy.load("en", parser=False, tagger=False, entity=False) train_sents, dev_sents, _ = datasets.ewtb_pos_tags() train_sents, dev_sents, nr_class = spacy_preprocess( nlp, train_sents, dev_sents) if nr_sent >= 1: train_sents = train_sents[:nr_sent] print("Building the model") with Model.define_operators({">>": chain, "|": concatenate, "**": clone}): model = (Orth >> SpacyVectors(nlp, width) >> (ExtractWindow(nW=1) >> BatchNorm(Maxout(width)))**depth >> Softmax(nr_class)) print("Preparing training") dev_X, dev_y = zip(*dev_sents) dev_y = model.ops.flatten(dev_y) dev_y = to_categorical(dev_y, nb_classes=50) train_X, train_y = zip(*train_sents) with model.begin_training(train_X, train_y) as (trainer, optimizer): trainer.nb_epoch = nr_epoch trainer.dropout = dropout trainer.dropout_decay = 1e-4 trainer.batch_size = 1 epoch_times = [timer()] epoch_loss = [0.0] n_train = sum(len(y) for y in train_y) def track_progress(): start = timer() acc = model.evaluate(dev_X, dev_y) end = timer() with model.use_params(optimizer.averages): avg_acc = model.evaluate(dev_X, dev_y) stats = ( epoch_loss[-1], acc, avg_acc, n_train, (end - epoch_times[-1]), n_train / (end - epoch_times[-1]), len(dev_y), (end - start), float(dev_y.shape[0]) / (end - start), trainer.dropout, ) print( len(epoch_loss), "%.3f train, %.3f (%.3f) dev, %d/%d=%d wps train, %d/%.3f=%d wps run. d.o.=%.3f" % stats, ) epoch_times.append(end) epoch_loss.append(0.0) trainer.each_epoch.append(track_progress) print("Training") batch_size = 1.0 for examples, truth in trainer.iterate(train_X, train_y): truth = to_categorical(model.ops.flatten(truth), nb_classes=50) guess, finish_update = model.begin_update(examples, drop=trainer.dropout) n_correct = (guess.argmax(axis=1) == truth.argmax(axis=1)).sum() finish_update(guess - truth, optimizer) epoch_loss[-1] += n_correct / n_train trainer.batch_size = min(int(batch_size), max_batch_size) batch_size *= 1.001 with model.use_params(optimizer.averages): print("End: %.3f" % model.evaluate(dev_X, dev_y))