def main(): train, dev = datasets.imdb() train_X, train_y = zip(*train) dev_X, dev_y = zip(*dev) model = LinearModel(2) train_y = to_categorical(train_y, nb_classes=2) dev_y = to_categorical(dev_y, nb_classes=2) nlp = spacy.load("en") train_X = [ model.ops.asarray([tok.orth for tok in doc], dtype="uint64") for doc in nlp.pipe(train_X) ] dev_X = [ model.ops.asarray([tok.orth for tok in doc], dtype="uint64") for doc in nlp.pipe(dev_X) ] dev_X = preprocess(model.ops, dev_X) with model.begin_training(train_X, train_y, L2=1e-6) as (trainer, optimizer): trainer.dropout = 0.0 trainer.batch_size = 512 trainer.nb_epoch = 3 trainer.each_epoch.append(lambda: print(model.evaluate(dev_X, dev_y))) for X, y in trainer.iterate(train_X, train_y): keys_vals_lens = preprocess(model.ops, X) scores, backprop = model.begin_update(keys_vals_lens, drop=trainer.dropout) backprop(scores - y, optimizer) with model.use_params(optimizer.averages): print(model.evaluate(dev_X, dev_y))
def main(depth=2, width=512, nb_epoch=30): prefer_gpu() # Configuration here isn't especially good. But, for demo.. with Model.define_operators({"**": clone, ">>": chain}): model = ReLu(width) >> ReLu(width) >> Softmax() train_data, dev_data, _ = datasets.mnist() train_X, train_y = model.ops.unzip(train_data) dev_X, dev_y = model.ops.unzip(dev_data) dev_y = to_categorical(dev_y) with model.begin_training(train_X, train_y, L2=1e-6) as (trainer, optimizer): epoch_loss = [0.0] def report_progress(): with model.use_params(optimizer.averages): print(epoch_loss[-1], model.evaluate(dev_X, dev_y), trainer.dropout) epoch_loss.append(0.0) trainer.each_epoch.append(report_progress) trainer.nb_epoch = nb_epoch trainer.dropout = 0.3 trainer.batch_size = 128 trainer.dropout_decay = 0.0 train_X = model.ops.asarray(train_X, dtype="float32") y_onehot = to_categorical(train_y) for X, y in trainer.iterate(train_X, y_onehot): yh, backprop = model.begin_update(X, drop=trainer.dropout) loss = ((yh - y) ** 2.0).sum() / y.shape[0] backprop(yh - y, optimizer) epoch_loss[-1] += loss with model.use_params(optimizer.averages): print("Avg dev.: %.3f" % model.evaluate(dev_X, dev_y)) with open("out.pickle", "wb") as file_: pickle.dump(model, file_, -1)
def mnist(): train_data, dev_data, _ = datasets.mnist() train_X, train_y = NumpyOps().unzip(train_data) dev_X, dev_y = NumpyOps().unzip(dev_data) dev_y = to_categorical(dev_y, nb_classes=10) train_y = to_categorical(dev_y, nb_classes=10) return (train_X[:1000], train_y[:1000]), (dev_X, dev_y)
def main(use_gpu=False, nb_epoch=100): if use_gpu: Model.ops = CupyOps() Model.Ops = CupyOps train, test = datasets.imdb(limit=2000) print("Load data") train_X, train_y = zip(*train) test_X, test_y = zip(*test) train_y = Model.ops.asarray(to_categorical(train_y, nb_classes=2)) test_y = Model.ops.asarray(to_categorical(test_y, nb_classes=2)) nlp = spacy.load('en_vectors_web_lg') nlp.add_pipe(nlp.create_pipe('sentencizer'), first=True) preprocessor = FeatureExtracter([ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID]) train_X = [preprocessor(list(doc.sents)) for doc in tqdm.tqdm(nlp.pipe(train_X))] test_X = [preprocessor(list(doc.sents)) for doc in tqdm.tqdm(nlp.pipe(test_X))] dev_X = train_X[-1000:] dev_y = train_y[-1000:] train_X = train_X[:-1000] train_y = train_y[:-1000] print("Parse data") n_sent = sum([len(list(sents)) for sents in train_X]) print("%d sentences" % n_sent) model = build_model(2, width=128, conv_depth=2, depth=2, train_X=train_X, train_y=train_y) with model.begin_training(train_X[:100], train_y[:100]) as (trainer, optimizer): epoch_loss = [0.] def report_progress(): with model.use_params(optimizer.averages): print(epoch_loss[-1], epoch_var[-1], model.evaluate(dev_X, dev_y), trainer.dropout) epoch_loss.append(0.) epoch_var.append(0.) trainer.each_epoch.append(report_progress) batch_sizes = compounding(64, 64, 1.01) trainer.dropout = 0.3 trainer.batch_size = int(next(batch_sizes)) trainer.dropout_decay = 0.0 trainer.nb_epoch = nb_epoch #optimizer.alpha = 0.1 #optimizer.max_grad_norm = 10.0 #optimizer.b1 = 0.0 #optimizer.b2 = 0.0 epoch_var = [0.] for X, y in trainer.iterate(train_X, train_y): yh, backprop = model.begin_update(X, drop=trainer.dropout) losses = ((yh-y)**2.).sum(axis=1) / y.shape[0] epoch_var[-1] += losses.var() loss = losses.mean() backprop((yh-y)/yh.shape[0], optimizer) epoch_loss[-1] += loss trainer.batch_size = int(next(batch_sizes)) with model.use_params(optimizer.averages): print('Avg dev.: %.3f' % model.evaluate(dev_X, dev_y))
def main(use_gpu=False, nb_epoch=50): if use_gpu: Model.ops = CupyOps() Model.Ops = CupyOps train, test = datasets.imdb() print("Load data") train_X, train_y = zip(*train) test_X, test_y = zip(*test) train_y = to_categorical(train_y, nb_classes=2) test_y = to_categorical(test_y, nb_classes=2) nlp = Language() dev_X = train_X[-1000:] dev_y = train_y[-1000:] train_X = train_X[:-1000] train_y = train_y[:-1000] print("Parse data") train_X = [nlp.make_doc(x) for x in train_X] dev_X = [nlp.make_doc(x) for x in dev_X] model = build_model(2, 1) print("Begin training") with model.begin_training(train_X, train_y, L2=1e-6) as (trainer, optimizer): epoch_loss = [0.] def report_progress(): with model.use_params(optimizer.averages): print(epoch_loss[-1], model.evaluate(dev_X, dev_y), trainer.dropout) epoch_loss.append(0.) trainer.each_epoch.append(report_progress) trainer.nb_epoch = nb_epoch trainer.dropout = 0.0 trainer.batch_size = 128 trainer.dropout_decay = 0.0 for X, y in trainer.iterate(train_X[:1000], train_y[:1000]): yh, backprop = model.begin_update(X, drop=trainer.dropout) loss = ((yh - y)**2.).sum() / y.shape[0] backprop((yh - y) / y.shape[0], optimizer) epoch_loss[-1] += loss with model.use_params(optimizer.averages): print('Avg dev.: %.3f' % model.evaluate(dev_X, dev_y)) with open('out.pickle', 'wb') as file_: pickle.dump(model, file_, -1)
def create_data(ops, nlp, rows): Xs = [] ys = [] for (text1, text2), label in rows: Xs.append((nlp(text1), nlp(text2))) ys.append(label) return Xs, to_categorical(ops.asarray(ys))
def instances(): lengths = numpy.asarray([5, 4], dtype="int32") keys = numpy.arange(9, dtype="uint64") values = numpy.ones(9, dtype="float") X = (keys, values, lengths) y = numpy.asarray([0, 2], dtype="int32") return X, to_categorical(y, nb_classes=3)
def main(use_gpu=False, nb_epoch=50): if use_gpu: Model.ops = CupyOps() Model.Ops = CupyOps train, test = datasets.imdb() print("Load data") train_X, train_y = zip(*train) test_X, test_y = zip(*test) train_y = to_categorical(train_y, nb_classes=2) test_y = to_categorical(test_y, nb_classes=2) nlp = Language() dev_X = train_X[-1000:] dev_y = train_y[-1000:] train_X = train_X[:-1000] train_y = train_y[:-1000] print("Parse data") train_X = [nlp.make_doc(x) for x in train_X] dev_X = [nlp.make_doc(x) for x in dev_X] model = build_model(2, 1) print("Begin training") with model.begin_training(train_X, train_y, L2=1e-6) as (trainer, optimizer): epoch_loss = [0.0] def report_progress(): with model.use_params(optimizer.averages): print(epoch_loss[-1], model.evaluate(dev_X, dev_y), trainer.dropout) epoch_loss.append(0.0) trainer.each_epoch.append(report_progress) trainer.nb_epoch = nb_epoch trainer.dropout = 0.0 trainer.batch_size = 128 trainer.dropout_decay = 0.0 for X, y in trainer.iterate(train_X[:1000], train_y[:1000]): yh, backprop = model.begin_update(X, drop=trainer.dropout) loss = ((yh - y) ** 2.0).sum() / y.shape[0] backprop((yh - y) / y.shape[0], optimizer) epoch_loss[-1] += loss with model.use_params(optimizer.averages): print("Avg dev.: %.3f" % model.evaluate(dev_X, dev_y)) with open("out.pickle", "wb") as file_: pickle.dump(model, file_, -1)
def preprocess(ops, nlp, rows, get_ids): '''Parse the texts with spaCy. Make one-hot vectors for the labels.''' Xs = [] ys = [] for (text1, text2), label in rows: Xs.append((get_ids([nlp(text1)])[0], get_ids([nlp(text2)])[0])) ys.append(label) return Xs, to_categorical(ys, nb_classes=2)
def preprocess(ops, nlp, rows, get_ids): """Parse the texts with spaCy. Make one-hot vectors for the labels.""" Xs = [] ys = [] for (text1, text2), label in rows: Xs.append((get_ids([nlp(text1)])[0], get_ids([nlp(text2)])[0])) ys.append(label) return Xs, to_categorical(ys, nb_classes=2)
def preprocess(ops, nlp, rows): '''Parse the texts with spaCy. Make one-hot vectors for the labels.''' Xs = [] ys = [] for (text1, text2), label in rows: Xs.append((nlp(text1), nlp(text2))) ys.append(label) return Xs, to_categorical(ops.asarray(ys))
def main(width=32, nr_vector=1000): train_data, check_data, nr_tag = ancora_pos_tags(encode_words=True) model = with_flatten( chain(HashEmbed(width, 1000), ReLu(width, width), ReLu(width, width), Softmax(nr_tag, width))) train_X, train_y = zip(*train_data) dev_X, dev_y = zip(*check_data) train_y = [to_categorical(y, nb_classes=nr_tag) for y in train_y] dev_y = [to_categorical(y, nb_classes=nr_tag) for y in dev_y] with model.begin_training(train_X, train_y) as (trainer, optimizer): trainer.each_epoch.append(lambda: print(model.evaluate(dev_X, dev_y))) for X, y in trainer.iterate(train_X, train_y): yh, backprop = model.begin_update(X, drop=trainer.dropout) backprop([yh[i] - y[i] for i in range(len(yh))], optimizer) with model.use_params(optimizer.averages): print(model.evaluate(dev_X, dev_y))
def get_characters_loss(ops, docs, prediction, nr_char=10): target_ids = numpy.vstack([doc.to_utf8_array(nr_char=nr_char) for doc in docs]) target_ids = target_ids.reshape((-1,)) target = ops.asarray(to_categorical(target_ids, nb_classes=256), dtype="f") target = target.reshape((-1, 256*nr_char)) diff = prediction - target loss = (diff**2).sum() d_target = diff / float(prediction.shape[0]) return loss, d_target
def preprocess(ops, nlp, rows): '''Parse the texts with spaCy. Make one-hot vectors for the labels.''' Xs = [] ys = [] print(len(rows[0])) for (text1, text2), label in rows: Xs.append((ops.asarray([t.orth for t in nlp(text1)]), ops.asarray([t.orth for t in nlp(text2)]))) ys.append(label) return Xs, to_categorical(ops.asarray(ys))
def main(depth=2, width=512, nb_epoch=30): prefer_gpu() torch.set_num_threads(1) train_data, dev_data, _ = datasets.mnist() train_X, train_y = Model.ops.unzip(train_data) dev_X, dev_y = Model.ops.unzip(dev_data) dev_y = to_categorical(dev_y) model = PyTorchWrapper( PyTorchFeedForward( depth=depth, width=width, input_size=train_X.shape[1], output_size=dev_y.shape[1], )) with model.begin_training(train_X, train_y, L2=1e-6) as (trainer, optimizer): epoch_loss = [0.0] def report_progress(): # with model.use_params(optimizer.averages): print(epoch_loss[-1], model.evaluate(dev_X, dev_y), trainer.dropout) epoch_loss.append(0.0) trainer.each_epoch.append(report_progress) trainer.nb_epoch = nb_epoch trainer.dropout = 0.3 trainer.batch_size = 128 trainer.dropout_decay = 0.0 train_X = model.ops.asarray(train_X, dtype="float32") y_onehot = to_categorical(train_y) for X, y in trainer.iterate(train_X, y_onehot): yh, backprop = model.begin_update(X, drop=trainer.dropout) loss = ((yh - y)**2.0).sum() / y.shape[0] backprop(yh - y, optimizer) epoch_loss[-1] += loss with model.use_params(optimizer.averages): print("Avg dev.: %.3f" % model.evaluate(dev_X, dev_y)) with open("out.pickle", "wb") as file_: pickle.dump(model, file_, -1)
def main(depth=2, width=512, nb_epoch=30): prefer_gpu() torch.set_num_threads(1) train_data, dev_data, _ = datasets.mnist() train_X, train_y = Model.ops.unzip(train_data) dev_X, dev_y = Model.ops.unzip(dev_data) dev_y = to_categorical(dev_y) model = PyTorchWrapper( PyTorchFeedForward( depth=depth, width=width, input_size=train_X.shape[1], output_size=dev_y.shape[1], ) ) with model.begin_training(train_X, train_y, L2=1e-6) as (trainer, optimizer): epoch_loss = [0.0] def report_progress(): # with model.use_params(optimizer.averages): print(epoch_loss[-1], model.evaluate(dev_X, dev_y), trainer.dropout) epoch_loss.append(0.0) trainer.each_epoch.append(report_progress) trainer.nb_epoch = nb_epoch trainer.dropout = 0.3 trainer.batch_size = 128 trainer.dropout_decay = 0.0 train_X = model.ops.asarray(train_X, dtype="float32") y_onehot = to_categorical(train_y) for X, y in trainer.iterate(train_X, y_onehot): yh, backprop = model.begin_update(X, drop=trainer.dropout) loss = ((yh - y) ** 2.0).sum() / y.shape[0] backprop(yh - y, optimizer) epoch_loss[-1] += loss with model.use_params(optimizer.averages): print("Avg dev.: %.3f" % model.evaluate(dev_X, dev_y)) with open("out.pickle", "wb") as file_: pickle.dump(model, file_, -1)
def main(depth=2, width=512, nb_epoch=30): if CupyOps.xp != None: Model.ops = CupyOps() Model.Ops = CupyOps # Configuration here isn't especially good. But, for demo.. with Model.define_operators({'**': clone, '>>': chain}): model = ReLu(width) >> ReLu(width) >> Softmax() train_data, dev_data, _ = datasets.mnist() train_X, train_y = model.ops.unzip(train_data) dev_X, dev_y = model.ops.unzip(dev_data) dev_y = to_categorical(dev_y) with model.begin_training(train_X, train_y, L2=1e-6) as (trainer, optimizer): epoch_loss = [0.] def report_progress(): with model.use_params(optimizer.averages): print(epoch_loss[-1], model.evaluate(dev_X, dev_y), trainer.dropout) epoch_loss.append(0.) trainer.each_epoch.append(report_progress) trainer.nb_epoch = nb_epoch trainer.dropout = 0.3 trainer.batch_size = 128 trainer.dropout_decay = 0.0 train_X = model.ops.asarray(train_X, dtype='float32') y_onehot = to_categorical(train_y) for X, y in trainer.iterate(train_X, y_onehot): yh, backprop = model.begin_update(X, drop=trainer.dropout) loss = ((yh - y)**2.).sum() / y.shape[0] backprop(yh - y, optimizer) epoch_loss[-1] += loss with model.use_params(optimizer.averages): print('Avg dev.: %.3f' % model.evaluate(dev_X, dev_y)) with open('out.pickle', 'wb') as file_: pickle.dump(model, file_, -1)
def main(width=32, nr_vector=1000): train_data, check_data, nr_tag = ancora_pos_tags(encode_words=True) model = with_flatten( chain( HashEmbed(width, nr_vector), ReLu(width, width), ReLu(width, width), Softmax(nr_tag, width))) train_X, train_y = zip(*train_data) dev_X, dev_y = zip(*check_data) train_y = [to_categorical(y, nb_classes=nr_tag) for y in train_y] dev_y = [to_categorical(y, nb_classes=nr_tag) for y in dev_y] with model.begin_training(train_X, train_y) as (trainer, optimizer): trainer.each_epoch.append( lambda: print(model.evaluate(dev_X, dev_y))) for X, y in trainer.iterate(train_X, train_y): yh, backprop = model.begin_update(X, drop=trainer.dropout) backprop([yh[i]-y[i] for i in range(len(yh))], optimizer) with model.use_params(optimizer.averages): print(model.evaluate(dev_X, dev_y))
def get_loss(ops, Yh, Y_docs, Xmask): Y_ids = docs2ids(Y_docs) guesses = Yh.argmax(axis=-1) #print("Tru", Y_ids[0][:5], "Sys", guesses[0, :5]) nC = Yh.shape[-1] Y = [to_categorical(y, nb_classes=nC) for y in Y_ids] nL = max(Yh.shape[1], max(y.shape[0] for y in Y)) Y, _ = pad_sequences(ops, Y, pad_to=nL) is_accurate = (Yh.argmax(axis=-1) == Y.argmax(axis=-1)) d_loss = Yh - Y for i, doc in enumerate(Y_docs): is_accurate[i, len(doc):] = 0 d_loss[i, len(doc):] = 0 return d_loss, is_accurate.sum()
def preprocess(ops, nlp, rows): '''Parse the texts with spaCy. Make one-hot vectors for the labels.''' Xs = [] ys = [] for (text1, text2), label in rows: doc1 = nlp(text1.lower()) doc2 = nlp(text2.lower()) #tokens1 = [token for token in doc1 # if not token.is_punct # and not token.is_space] #tokens2 = [token for token in doc2 # if not token.is_punct # and not token.is_space] Xs.append((doc1, doc2)) ys.append(label) return Xs, to_categorical(ops.asarray(ys, dtype='float32'))
def get_loss(Xh, original_docs, masked_docs, loss_mask): ''' Calculate loss ''' ''' Convert original docs to (nB, nL, nTGT) with one hot encoding ''' X_ids = docs2ids(original_docs) nb_classes = Xh.shape[-1] X = [to_categorical(y, nb_classes=nb_classes) for y in X_ids] X, _ = pad_sequences(Model.ops, X) ''' Loss calculation only on the masked positions ''' dXh = Xh - X dXh = dXh * Model.ops.xp.expand_dims(loss_mask, axis=2) ''' Calculate number of accurate, inaccurate tokens ''' accurate = Xh.argmax(axis=-1) == X.argmax(axis=-1) inaccurate = Xh.argmax(axis=-1) != X.argmax(axis=-1) accurate = accurate * loss_mask inaccurate = inaccurate * loss_mask return dXh, accurate.sum(), accurate.sum() + inaccurate.sum()
def get_loss(ops, Yh, Y_docs, Xmask, epoch=False, d={}): Y_ids = docs2ids(Y_docs) guesses = Yh.argmax(axis=-1) nC = Yh.shape[-1] Y = [to_categorical(y, nb_classes=nC) for y in Y_ids] nL = max(Yh.shape[1], max(y.shape[0] for y in Y)) Y, _ = pad_sequences(ops, Y, pad_to=nL) if epoch: print(' '.join(get_model_sentence(Yh, d)[2])) print(Y_docs[2]) is_accurate = (Yh.argmax(axis=-1) == Y.argmax(axis=-1)) is_not_accurate = (Yh.argmax(axis=-1) != Y.argmax(axis=-1)) d_loss = Yh - Y for i, doc in enumerate(Y_docs): is_accurate[i, len(doc):] = 0 is_not_accurate[i, len(doc):] = 0 d_loss[i, len(doc):] = 0 total = is_accurate.sum() + is_not_accurate.sum() return d_loss, is_accurate.sum(), total
def preprocess(ops, data, nr_tag): Xs, ys = zip(*data) Xs = [ops.asarray(remapping(x)) for x in Xs] ys = [ops.asarray(to_categorical(y, nb_classes=nr_tag)) for y in ys] return Xs, ys
def preprocess(ops, get_feats, data, nr_tag, npad=4): Xs, ys = zip(*data) Xs = [ops.asarray(x) for x in get_feats(Xs)] ys = [ops.asarray(to_categorical(y, nb_classes=nr_tag)) for y in ys] return Xs, ys
def main(nr_epoch=20, nr_sent=0, width=128, depth=3, max_batch_size=32, dropout=0.3): print("Loading spaCy and preprocessing") nlp = spacy.load("en", parser=False, tagger=False, entity=False) train_sents, dev_sents, _ = datasets.ewtb_pos_tags() train_sents, dev_sents, nr_class = spacy_preprocess(nlp, train_sents, dev_sents) if nr_sent >= 1: train_sents = train_sents[:nr_sent] print("Building the model") with Model.define_operators({">>": chain, "|": concatenate, "**": clone}): model = ( Orth >> SpacyVectors(nlp, width) >> (ExtractWindow(nW=1) >> BatchNorm(Maxout(width))) ** depth >> Softmax(nr_class) ) print("Preparing training") dev_X, dev_y = zip(*dev_sents) dev_y = model.ops.flatten(dev_y) dev_y = to_categorical(dev_y, nb_classes=50) train_X, train_y = zip(*train_sents) with model.begin_training(train_X, train_y) as (trainer, optimizer): trainer.nb_epoch = nr_epoch trainer.dropout = dropout trainer.dropout_decay = 1e-4 trainer.batch_size = 1 epoch_times = [timer()] epoch_loss = [0.0] n_train = sum(len(y) for y in train_y) def track_progress(): start = timer() acc = model.evaluate(dev_X, dev_y) end = timer() with model.use_params(optimizer.averages): avg_acc = model.evaluate(dev_X, dev_y) stats = ( epoch_loss[-1], acc, avg_acc, n_train, (end - epoch_times[-1]), n_train / (end - epoch_times[-1]), len(dev_y), (end - start), float(dev_y.shape[0]) / (end - start), trainer.dropout, ) print( len(epoch_loss), "%.3f train, %.3f (%.3f) dev, %d/%d=%d wps train, %d/%.3f=%d wps run. d.o.=%.3f" % stats, ) epoch_times.append(end) epoch_loss.append(0.0) trainer.each_epoch.append(track_progress) print("Training") batch_size = 1.0 for examples, truth in trainer.iterate(train_X, train_y): truth = to_categorical(model.ops.flatten(truth), nb_classes=50) guess, finish_update = model.begin_update(examples, drop=trainer.dropout) n_correct = (guess.argmax(axis=1) == truth.argmax(axis=1)).sum() finish_update(guess - truth, optimizer) epoch_loss[-1] += n_correct / n_train trainer.batch_size = min(int(batch_size), max_batch_size) batch_size *= 1.001 with model.use_params(optimizer.averages): print("End: %.3f" % model.evaluate(dev_X, dev_y))
def get_loss(Yh, Y): Y = [to_categorical(y, nb_classes=2) for y in Y] Y = Model.ops.xp.asarray(Y)[:, 0, :] is_accurate = (Yh.argmax(axis=-1) == Y.argmax(axis=-1)) dYh = Yh - Y return dYh, is_accurate.sum()
def main(nr_epoch=20, nr_sent=0, width=128, depth=3, max_batch_size=32, dropout=0.3): print("Loading spaCy and preprocessing") nlp = spacy.load('en', parser=False, tagger=False, entity=False) train_sents, dev_sents, _ = datasets.ewtb_pos_tags() train_sents, dev_sents, nr_class = spacy_preprocess(nlp, train_sents, dev_sents) if nr_sent >= 1: train_sents = train_sents[:nr_sent] print("Building the model") with Model.define_operators({'>>': chain, '|': concatenate, '**': clone}): model = ( Orth >> SpacyVectors(nlp, width) >> (ExtractWindow(nW=1) >> BatchNorm(Maxout(width))) ** depth >> Softmax(nr_class) ) print("Preparing training") dev_X, dev_y = list(zip(*dev_sents)) dev_y = model.ops.flatten(dev_y) dev_y = to_categorical(dev_y, nb_classes=50) train_X, train_y = list(zip(*train_sents)) with model.begin_training(train_X, train_y) as (trainer, optimizer): trainer.nb_epoch = nr_epoch trainer.dropout = dropout trainer.dropout_decay = 1e-4 trainer.batch_size = 1 epoch_times = [timer()] epoch_loss = [0.] n_train = sum(len(y) for y in train_y) def track_progress(): start = timer() acc = model.evaluate(dev_X, dev_y) end = timer() with model.use_params(optimizer.averages): avg_acc = model.evaluate(dev_X, dev_y) stats = ( epoch_loss[-1], acc, avg_acc, n_train, (end-epoch_times[-1]), n_train / (end-epoch_times[-1]), len(dev_y), (end-start), float(dev_y.shape[0]) / (end-start), trainer.dropout) print( len(epoch_loss), "%.3f train, %.3f (%.3f) dev, %d/%d=%d wps train, %d/%.3f=%d wps run. d.o.=%.3f" % stats) epoch_times.append(end) epoch_loss.append(0.) trainer.each_epoch.append(track_progress) print("Training") batch_size = 1. for examples, truth in trainer.iterate(train_X, train_y): truth = to_categorical(model.ops.flatten(truth), nb_classes=50) guess, finish_update = model.begin_update(examples, drop=trainer.dropout) n_correct = (guess.argmax(axis=1) == truth.argmax(axis=1)).sum() finish_update(guess-truth, optimizer) epoch_loss[-1] += n_correct / n_train trainer.batch_size = min(int(batch_size), max_batch_size) batch_size *= 1.001 with model.use_params(optimizer.averages): print("End: %.3f" % model.evaluate(dev_X, dev_y))
def main(use_gpu=False): if use_gpu: Model.ops = CupyOps() Model.Ops = CupyOps train, test = datasets.imdb() print("Load data") train_X, train_y = zip(*train) test_X, test_y = zip(*test) train_y = to_categorical(train_y, nb_classes=2) test_y = to_categorical(test_y, nb_classes=2) nlp = spacy.load('en') nlp.vocab.lex_attr_getters[PREFIX] = lambda string: string[:3] for word in nlp.vocab: word.prefix_ = word.orth_[:3] dev_X = train_X[-1000:] dev_y = train_y[-1000:] train_X = train_X[:-1000] train_y = train_y[:-1000] #train_X = train_X[:1000] #train_y = train_y[:1000] print("Parse data") train_X = list(nlp.pipe(train_X)) dev_X = list(nlp.pipe(dev_X)) n_sent = sum([len(list(doc.sents)) for doc in train_X]) print("%d sentences" % n_sent) hpsearch = BestFirstFinder(nonlin=[SELU], width=[64], depth=[2], conv_depth=[2], batch_size=[128], learn_rate=[0.001], L2=[1e-6], beta1=[0.9], beta2=[0.999], dropout=[0.2]) for hp in hpsearch.configs: for _ in range(3): model = build_model(2, train_X=train_X, train_y=train_y, **hp) with model.begin_training(train_X[:100], train_y[:100]) as (_, sgd): pass _, (model_data, train_acc, dev_acc) = train_epoch(model, sgd, hp, train_X, train_y, dev_X, dev_y, device_id=-1 if not use_gpu else 0) print('0', dev_acc * 100, train_acc * 100, hp) hpsearch.enqueue(model_data, train_acc, dev_acc) hpsearch.temperature = 0.0 print("Train") total = 0 temperature = 0.0 while True: for model, sgd, hp in hpsearch: _, (new_model, train_acc, dev_acc) = train_epoch(model, sgd, hp, train_X, train_y, dev_X, dev_y, device_id=-1 if not use_gpu else 0, temperature=hpsearch.temperature) hp = new_model[-1] print( '%d,%d,%d:\t%.2f\t%.2f\t%.2f\t%d\t%.2f\t%.3f\t%d\t%d\t%.3f\t%.3f\t%.3f' % (total, hp['epochs'], hp['parent'], hpsearch.best_acc * 100, dev_acc * 100, train_acc * 100, int( hp['batch_size']), hp['dropout'], hp['learn_rate'], hp['width'], hp['depth'], hpsearch.temperature, hpsearch.queue[0][0], hpsearch.queue[-1][0])) total += 1 hpsearch.enqueue(new_model, train_acc, dev_acc)