def build_model(nr_class, width, depth, conv_depth, **kwargs): with Model.define_operators({"|": concatenate, ">>": chain, "**": clone}): embed = (HashEmbed(width, 5000, column=1) | StaticVectors("spacy_pretrained_vectors", width, column=5) | HashEmbed(width // 2, 750, column=2) | HashEmbed(width // 2, 750, column=3) | HashEmbed(width // 2, 750, column=4)) >> LN(Maxout(width)) sent2vec = (flatten_add_lengths >> with_getitem( 0, embed >> Residual(ExtractWindow(nW=1) >> LN(Maxout(width)))** conv_depth, ) >> ParametricAttention(width) >> Pooling(sum_pool) >> Residual( LN(Maxout(width)))**depth) model = ( foreach(sent2vec, drop_factor=2.0) >> flatten_add_lengths # This block would allow the model to learn some cross-sentence # features. It's not useful on this problem. It might make more # sense to use a BiLSTM here, following Liang et al (2016). # >> with_getitem(0, # Residual(ExtractWindow(nW=1) >> LN(Maxout(width))) ** conv_depth # ) >> ParametricAttention(width, hard=False) >> Pooling(sum_pool) >> Residual(LN(Maxout(width)))**depth >> Softmax(nr_class)) model.lsuv = False return model
def build_text_classifier(nr_class, width=64, **cfg): nr_vector = cfg.get('nr_vector', 5000) pretrained_dims = cfg.get('pretrained_dims', 0) with Model.define_operators({ '>>': chain, '+': add, '|': concatenate, '**': clone }): if cfg.get('low_data') and pretrained_dims: model = (SpacyVectors >> flatten_add_lengths >> with_getitem( 0, Affine(width, pretrained_dims)) >> ParametricAttention(width) >> Pooling(sum_pool) >> Residual(ReLu(width, width))**2 >> zero_init( Affine(nr_class, width, drop_factor=0.0)) >> logistic) return model lower = HashEmbed(width, nr_vector, column=1) prefix = HashEmbed(width // 2, nr_vector, column=2) suffix = HashEmbed(width // 2, nr_vector, column=3) shape = HashEmbed(width // 2, nr_vector, column=4) trained_vectors = (FeatureExtracter( [ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID]) >> with_flatten( uniqued((lower | prefix | suffix | shape) >> LN( Maxout(width, width + (width // 2) * 3)), column=0))) if pretrained_dims: static_vectors = ( SpacyVectors >> with_flatten(Affine(width, pretrained_dims))) # TODO Make concatenate support lists vectors = concatenate_lists(trained_vectors, static_vectors) vectors_width = width * 2 else: vectors = trained_vectors vectors_width = width static_vectors = None cnn_model = ( vectors >> with_flatten( LN(Maxout(width, vectors_width)) >> Residual( (ExtractWindow(nW=1) >> LN(Maxout(width, width * 3))))**2, pad=2) >> flatten_add_lengths >> ParametricAttention(width) >> Pooling(sum_pool) >> Residual(zero_init(Maxout(width, width))) >> zero_init(Affine(nr_class, width, drop_factor=0.0))) linear_model = ( _preprocess_doc >> LinearModel(nr_class, drop_factor=0.)) model = ((linear_model | cnn_model) >> zero_init( Affine(nr_class, nr_class * 2, drop_factor=0.0)) >> logistic) model.nO = nr_class model.lsuv = False return model
def build_model(nr_class, width, depth, conv_depth, **kwargs): with Model.define_operators({'|': concatenate, '>>': chain, '**': clone}): embed = ((HashEmbed(width, 5000, column=1) | StaticVectors('spacy_pretrained_vectors', width, column=5) | HashEmbed(width // 2, 750, column=2) | HashEmbed(width // 2, 750, column=3) | HashEmbed(width // 2, 750, column=4)) >> LN(Maxout(width))) sent2vec = (flatten_add_lengths >> with_getitem( 0, embed >> Residual(ExtractWindow(nW=1) >> LN(Maxout(width)))** conv_depth) >> ParametricAttention(width) >> Pooling(sum_pool) >> Residual(LN(Maxout(width)))**depth) model = (foreach(sent2vec, drop_factor=2.0) >> flatten_add_lengths >> ParametricAttention(width, hard=False) >> Pooling(sum_pool) >> Residual(LN(Maxout(width)))**depth >> Softmax(nr_class)) model.lsuv = False return model
def build_model(nr_class, width, depth, conv_depth, vectors_name, **kwargs): with Model.define_operators({"|": concatenate, ">>": chain, "**": clone}): embed = (HashEmbed(width, 5000, column=1) | StaticVectors(vectors_name, width, column=5) | HashEmbed(width // 2, 750, column=2) | HashEmbed(width // 2, 750, column=3) | HashEmbed(width // 2, 750, column=4)) >> LN(Maxout(width)) sent2vec = (with_flatten(embed) >> Residual( prepare_self_attention(Affine(width * 3, width), nM=width, nH=4) >> MultiHeadedAttention() >> with_flatten( Maxout(width, width, pieces=3))) >> flatten_add_lengths >> ParametricAttention(width, hard=False) >> Pooling(mean_pool) >> Residual(LN(Maxout(width)))) model = (foreach(sent2vec, drop_factor=2.0) >> Residual( prepare_self_attention(Affine(width * 3, width), nM=width, nH=4) >> MultiHeadedAttention() >> with_flatten(LN(Affine(width, width)))) >> flatten_add_lengths >> ParametricAttention( width, hard=False) >> Pooling(mean_pool) >> Residual( LN(Maxout(width)))**2 >> Softmax(nr_class)) model.lsuv = False return model
def build_text_classifier(nr_class, width=64, **cfg): depth = cfg.get("depth", 2) nr_vector = cfg.get("nr_vector", 5000) pretrained_dims = cfg.get("pretrained_dims", 0) with Model.define_operators({ ">>": chain, "+": add, "|": concatenate, "**": clone }): if cfg.get("low_data") and pretrained_dims: model = (SpacyVectors >> flatten_add_lengths >> with_getitem( 0, Affine(width, pretrained_dims)) >> ParametricAttention(width) >> Pooling(sum_pool) >> Residual(ReLu(width, width))**2 >> zero_init( Affine(nr_class, width, drop_factor=0.0)) >> logistic) return model lower = HashEmbed(width, nr_vector, column=1) prefix = HashEmbed(width // 2, nr_vector, column=2) suffix = HashEmbed(width // 2, nr_vector, column=3) shape = HashEmbed(width // 2, nr_vector, column=4) trained_vectors = FeatureExtracter( [ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID]) >> with_flatten( uniqued( (lower | prefix | suffix | shape) >> LN( Maxout(width, width + (width // 2) * 3)), column=0, )) if pretrained_dims: static_vectors = SpacyVectors >> with_flatten( Affine(width, pretrained_dims)) # TODO Make concatenate support lists vectors = concatenate_lists(trained_vectors, static_vectors) vectors_width = width * 2 else: vectors = trained_vectors vectors_width = width static_vectors = None tok2vec = vectors >> with_flatten( LN(Maxout(width, vectors_width)) >> Residual( (ExtractWindow(nW=1) >> LN(Maxout(width, width * 3))))**depth, pad=depth, ) cnn_model = ( tok2vec >> flatten_add_lengths >> ParametricAttention(width) >> Pooling(sum_pool) >> Residual(zero_init(Maxout(width, width))) >> zero_init(Affine(nr_class, width, drop_factor=0.0))) linear_model = build_bow_text_classifier(nr_class, ngram_size=cfg.get( "ngram_size", 1), exclusive_classes=False) if cfg.get("exclusive_classes"): output_layer = Softmax(nr_class, nr_class * 2) else: output_layer = (zero_init( Affine(nr_class, nr_class * 2, drop_factor=0.0)) >> logistic) model = (linear_model | cnn_model) >> output_layer model.tok2vec = chain(tok2vec, flatten) model.nO = nr_class model.lsuv = False return model