def build_model(nr_class, width, **kwargs): with Model.define_operators({'|': concatenate, '>>': chain, '**': clone}): model = (FeatureExtracter([ORTH]) >> flatten_add_lengths >> with_getitem(0, uniqued(HashEmbed(width, 10000, column=0))) >> Pooling(mean_pool) >> Softmax(nr_class)) model.lsuv = False return model
def main(use_gpu=False, nb_epoch=100): if use_gpu: Model.ops = CupyOps() Model.Ops = CupyOps train, test = datasets.imdb(limit=2000) print("Load data") train_X, train_y = zip(*train) test_X, test_y = zip(*test) train_y = Model.ops.asarray(to_categorical(train_y, nb_classes=2)) test_y = Model.ops.asarray(to_categorical(test_y, nb_classes=2)) nlp = spacy.load('en_vectors_web_lg') nlp.add_pipe(nlp.create_pipe('sentencizer'), first=True) preprocessor = FeatureExtracter([ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID]) train_X = [preprocessor(list(doc.sents)) for doc in tqdm.tqdm(nlp.pipe(train_X))] test_X = [preprocessor(list(doc.sents)) for doc in tqdm.tqdm(nlp.pipe(test_X))] dev_X = train_X[-1000:] dev_y = train_y[-1000:] train_X = train_X[:-1000] train_y = train_y[:-1000] print("Parse data") n_sent = sum([len(list(sents)) for sents in train_X]) print("%d sentences" % n_sent) model = build_model(2, width=128, conv_depth=2, depth=2, train_X=train_X, train_y=train_y) with model.begin_training(train_X[:100], train_y[:100]) as (trainer, optimizer): epoch_loss = [0.] def report_progress(): with model.use_params(optimizer.averages): print(epoch_loss[-1], epoch_var[-1], model.evaluate(dev_X, dev_y), trainer.dropout) epoch_loss.append(0.) epoch_var.append(0.) trainer.each_epoch.append(report_progress) batch_sizes = compounding(64, 64, 1.01) trainer.dropout = 0.3 trainer.batch_size = int(next(batch_sizes)) trainer.dropout_decay = 0.0 trainer.nb_epoch = nb_epoch #optimizer.alpha = 0.1 #optimizer.max_grad_norm = 10.0 #optimizer.b1 = 0.0 #optimizer.b2 = 0.0 epoch_var = [0.] for X, y in trainer.iterate(train_X, train_y): yh, backprop = model.begin_update(X, drop=trainer.dropout) losses = ((yh-y)**2.).sum(axis=1) / y.shape[0] epoch_var[-1] += losses.var() loss = losses.mean() backprop((yh-y)/yh.shape[0], optimizer) epoch_loss[-1] += loss trainer.batch_size = int(next(batch_sizes)) with model.use_params(optimizer.averages): print('Avg dev.: %.3f' % model.evaluate(dev_X, dev_y))
def build_text_classifier(nr_class, width=64, **cfg): nr_vector = cfg.get('nr_vector', 5000) pretrained_dims = cfg.get('pretrained_dims', 0) with Model.define_operators({ '>>': chain, '+': add, '|': concatenate, '**': clone }): if cfg.get('low_data') and pretrained_dims: model = (SpacyVectors >> flatten_add_lengths >> with_getitem( 0, Affine(width, pretrained_dims)) >> ParametricAttention(width) >> Pooling(sum_pool) >> Residual(ReLu(width, width))**2 >> zero_init( Affine(nr_class, width, drop_factor=0.0)) >> logistic) return model lower = HashEmbed(width, nr_vector, column=1) prefix = HashEmbed(width // 2, nr_vector, column=2) suffix = HashEmbed(width // 2, nr_vector, column=3) shape = HashEmbed(width // 2, nr_vector, column=4) trained_vectors = (FeatureExtracter( [ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID]) >> with_flatten( uniqued((lower | prefix | suffix | shape) >> LN( Maxout(width, width + (width // 2) * 3)), column=0))) if pretrained_dims: static_vectors = ( SpacyVectors >> with_flatten(Affine(width, pretrained_dims))) # TODO Make concatenate support lists vectors = concatenate_lists(trained_vectors, static_vectors) vectors_width = width * 2 else: vectors = trained_vectors vectors_width = width static_vectors = None cnn_model = ( vectors >> with_flatten( LN(Maxout(width, vectors_width)) >> Residual( (ExtractWindow(nW=1) >> LN(Maxout(width, width * 3))))**2, pad=2) >> flatten_add_lengths >> ParametricAttention(width) >> Pooling(sum_pool) >> Residual(zero_init(Maxout(width, width))) >> zero_init(Affine(nr_class, width, drop_factor=0.0))) linear_model = ( _preprocess_doc >> LinearModel(nr_class, drop_factor=0.)) model = ((linear_model | cnn_model) >> zero_init( Affine(nr_class, nr_class * 2, drop_factor=0.0)) >> logistic) model.nO = nr_class model.lsuv = False return model
def Tok2Vec(width, embed_size, **kwargs): pretrained_vectors = kwargs.get('pretrained_vectors', None) cnn_maxout_pieces = kwargs.get('cnn_maxout_pieces', 2) cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH] with Model.define_operators({ '>>': chain, '|': concatenate, '**': clone, '+': add, '*': reapply }): norm = HashEmbed(width, embed_size, column=cols.index(NORM), name='embed_norm') prefix = HashEmbed(width, embed_size // 2, column=cols.index(PREFIX), name='embed_prefix') suffix = HashEmbed(width, embed_size // 2, column=cols.index(SUFFIX), name='embed_suffix') shape = HashEmbed(width, embed_size // 2, column=cols.index(SHAPE), name='embed_shape') if pretrained_vectors is not None: glove = StaticVectors(pretrained_vectors, width, column=cols.index(ID)) embed = uniqued((glove | norm | prefix | suffix | shape) >> LN( Maxout(width, width * 5, pieces=3)), column=cols.index(ORTH)) else: embed = uniqued((norm | prefix | suffix | shape) >> LN( Maxout(width, width * 4, pieces=3)), column=cols.index(ORTH)) convolution = Residual( ExtractWindow( nW=1) >> LN(Maxout(width, width * 3, pieces=cnn_maxout_pieces))) tok2vec = (FeatureExtracter(cols) >> with_flatten( embed >> convolution**4, pad=4)) # Work around thinc API limitations :(. TODO: Revise in Thinc 7 tok2vec.nO = width tok2vec.embed = embed return tok2vec
def build_model(nr_class, width, depth, conv_depth, **kwargs): with Model.define_operators({'|': concatenate, '>>': chain, '**': clone}): embed = ((HashEmbed(width, 5000, column=1) | HashEmbed(width // 2, 750, column=2) | HashEmbed(width // 2, 750, column=3) | HashEmbed(width // 2, 750, column=4)) >> Maxout(width)) sent2vec = ( FeatureExtracter([ORTH, LOWER, PREFIX, SUFFIX, SHAPE]) >> flatten_add_lengths >> with_getitem( 0, uniqued(embed, column=0) >> Residual(ExtractWindow(nW=1) >> SELU(width))**conv_depth) >> ParametricAttention(width) >> Pooling(sum_pool) >> Residual( SELU(width))**depth) model = ( foreach_sentence(sent2vec, drop_factor=2.0) >> flatten_add_lengths >> ParametricAttention(width, hard=False) >> Pooling(sum_pool) >> Residual(SELU(width))**depth >> Softmax(nr_class)) model.lsuv = False return model