def test_dropout(data): model = Dropout(0.2) model.initialize(data, data) Y, backprop = model(data, is_train=False) assert_data_match(Y, data) dX = backprop(Y) assert_data_match(dX, data)
def build_text_classifier_v2( tok2vec: Model[List[Doc], List[Floats2d]], linear_model: Model[List[Doc], Floats2d], nO: Optional[int] = None, ) -> Model[List[Doc], Floats2d]: exclusive_classes = not linear_model.attrs["multi_label"] with Model.define_operators({">>": chain, "|": concatenate}): width = tok2vec.maybe_get_dim("nO") attention_layer = ParametricAttention( width) # TODO: benchmark performance difference of this layer maxout_layer = Maxout(nO=width, nI=width) norm_layer = LayerNorm(nI=width) cnn_model = ( tok2vec >> list2ragged() >> attention_layer >> reduce_sum() >> residual(maxout_layer >> norm_layer >> Dropout(0.0))) nO_double = nO * 2 if nO else None if exclusive_classes: output_layer = Softmax(nO=nO, nI=nO_double) else: output_layer = Linear(nO=nO, nI=nO_double) >> Logistic() model = (linear_model | cnn_model) >> output_layer model.set_ref("tok2vec", tok2vec) if model.has_dim("nO") is not False: model.set_dim("nO", nO) model.set_ref("output_layer", linear_model.get_ref("output_layer")) model.set_ref("attention_layer", attention_layer) model.set_ref("maxout_layer", maxout_layer) model.set_ref("norm_layer", norm_layer) model.attrs["multi_label"] = not exclusive_classes model.init = init_ensemble_textcat return model
def test_dropout_gives_zero_gradients(W_b_input): model = chain(get_model(W_b_input), Dropout(1.0)) nr_batch, nr_out, nr_in = get_shape(W_b_input) W, b, input_ = W_b_input for node in model.walk(): if node.name == "dropout": node.attrs["dropout_rate"] = 1.0 fwd_dropped, finish_update = model.begin_update(input_) grad_BO = numpy.ones((nr_batch, nr_out), dtype="f") grad_BI = finish_update(grad_BO) assert all(val == 0.0 for val in grad_BI.flatten())
def test_chain(ops): data = numpy.asarray([[1, 2, 3, 4]], dtype="f") model = chain(Linear(1), Dropout(), Linear(1)) model.ops = ops model.initialize(data, data) Y, backprop = model(data, is_train=True) backprop(Y) # Layers with and without nO/nI model = chain(Linear(1), Dropout(), Linear(1, 1)) model.initialize(data, data) # Setting dim on model model = chain(Linear(1), Dropout(), Linear(1)) model.set_dim("nO", 1) model.initialize(data, None) model = chain(Linear(1, 1), Dropout(), Linear(1, 1)) model.set_dim("nI", 1) model.initialize(None, data) # Not enough arguments with pytest.raises(TypeError): chain(Linear()) with pytest.raises(TypeError): chain()
def build_text_classifier_lowdata( width: int, dropout: Optional[float], nO: Optional[int] = None) -> Model[List[Doc], Floats2d]: # Don't document this yet, I'm not sure it's right. # Note, before v.3, this was the default if setting "low_data" and "pretrained_dims" with Model.define_operators({">>": chain, "**": clone}): model = (StaticVectors(width) >> list2ragged() >> ParametricAttention(width) >> reduce_sum() >> residual( Relu(width, width))**2 >> Linear(nO, width)) if dropout: model = model >> Dropout(dropout) model = model >> Logistic() return model
def test_set_dropout(): model = Dropout() assert model.attrs["dropout_rate"] == 0.0 set_dropout_rate(model, 0.2) assert model.attrs["dropout_rate"] == 0.2
def test_dropout_gives_zero_activations(W_b_input): model = chain(get_model(W_b_input), Dropout(1.0)) nr_batch, nr_out, nr_in = get_shape(W_b_input) W, b, input_ = W_b_input fwd_dropped, _ = model.begin_update(input_) assert all(val == 0.0 for val in fwd_dropped.flatten())
def TextCatEnsemble_v1( width: int, embed_size: int, pretrained_vectors: Optional[bool], exclusive_classes: bool, ngram_size: int, window_size: int, conv_depth: int, dropout: Optional[float], nO: Optional[int] = None, ) -> Model: # Don't document this yet, I'm not sure it's right. cols = [ORTH, LOWER, PREFIX, SUFFIX, SHAPE, ID] with Model.define_operators({">>": chain, "|": concatenate, "**": clone}): lower = HashEmbed(nO=width, nV=embed_size, column=cols.index(LOWER), dropout=dropout, seed=10) prefix = HashEmbed( nO=width // 2, nV=embed_size, column=cols.index(PREFIX), dropout=dropout, seed=11, ) suffix = HashEmbed( nO=width // 2, nV=embed_size, column=cols.index(SUFFIX), dropout=dropout, seed=12, ) shape = HashEmbed( nO=width // 2, nV=embed_size, column=cols.index(SHAPE), dropout=dropout, seed=13, ) width_nI = sum( layer.get_dim("nO") for layer in [lower, prefix, suffix, shape]) trained_vectors = FeatureExtractor(cols) >> with_array( uniqued( (lower | prefix | suffix | shape) >> Maxout( nO=width, nI=width_nI, normalize=True), column=cols.index(ORTH), )) if pretrained_vectors: static_vectors = StaticVectors(width) vector_layer = trained_vectors | static_vectors vectors_width = width * 2 else: vector_layer = trained_vectors vectors_width = width tok2vec = vector_layer >> with_array( Maxout(width, vectors_width, normalize=True) >> residual((expand_window(window_size=window_size) >> Maxout( nO=width, nI=width * ((window_size * 2) + 1), normalize=True)))**conv_depth, pad=conv_depth, ) cnn_model = (tok2vec >> list2ragged() >> ParametricAttention(width) >> reduce_sum() >> residual(Maxout(nO=width, nI=width)) >> Linear(nO=nO, nI=width) >> Dropout(0.0)) linear_model = build_bow_text_classifier( nO=nO, ngram_size=ngram_size, exclusive_classes=exclusive_classes, no_output_layer=False, ) nO_double = nO * 2 if nO else None if exclusive_classes: output_layer = Softmax(nO=nO, nI=nO_double) else: output_layer = Linear(nO=nO, nI=nO_double) >> Dropout(0.0) >> Logistic() model = (linear_model | cnn_model) >> output_layer model.set_ref("tok2vec", tok2vec) if model.has_dim("nO") is not False: model.set_dim("nO", nO) model.set_ref("output_layer", linear_model.get_ref("output_layer")) model.attrs["multi_label"] = not exclusive_classes return model