def MaxoutWindowEncoder_v1(width: int, window_size: int, maxout_pieces: int, depth: int) -> Model[Floats2d, Floats2d]: """Encode context using convolutions with maxout activation, layer normalization and residual connections. width (int): The input and output width. These are required to be the same, to allow residual connections. This value will be determined by the width of the inputs. Recommended values are between 64 and 300. window_size (int): The number of words to concatenate around each token to construct the convolution. Recommended value is 1. maxout_pieces (int): The number of maxout pieces to use. Recommended values are 2 or 3. depth (int): The number of convolutional layers. Recommended value is 4. """ cnn = chain( expand_window(window_size=window_size), Maxout( nO=width, nI=width * ((window_size * 2) + 1), nP=maxout_pieces, dropout=0.0, normalize=True, ), ) model = clone(residual(cnn), depth) model.set_dim("nO", width) model.attrs["receptive_field"] = window_size * depth return model
def test_clone_gives_distinct_ids(nH, nI): model = clone(Linear(nH), 5) assert len(model.layers) == 5 seen_ids = set() for node in model.walk(): assert node.id not in seen_ids seen_ids.add(node.id) assert len(seen_ids) == 6
def test_clone_changes_predictions(nH, nI): model1 = Linear(nH) model = clone(model1, 10) ones = numpy.ones((10, nI), dtype="f") model.initialize(X=ones) output_from_cloned = model.predict(ones) output_from_orig = model1.predict(ones) assert output_from_cloned.sum() != output_from_orig.sum()
def test_clone(model1, nI): ones = numpy.ones((10, nI), dtype="f") model1.nI = None model = clone(model1, 10) model.begin_training(ones) output_from_cloned = model(ones) output_from_orig = model1(ones) assert output_from_cloned.sum() != output_from_orig.sum()
def MishWindowEncoder(config): from thinc.v2v import Mish nO = config["width"] nW = config["window_size"] depth = config["depth"] cnn = chain(ExtractWindow(nW=nW), LayerNorm(Mish(nO, nO * ((nW * 2) + 1)))) model = clone(Residual(cnn), depth) model.nO = nO return model
def MaxoutWindowEncoder(config): nO = config["width"] nW = config["window_size"] nP = config["pieces"] depth = config["depth"] cnn = chain(ExtractWindow(nW=nW), LayerNorm(Maxout(nO, nO * ((nW * 2) + 1), pieces=nP))) model = clone(Residual(cnn), depth) model.nO = nO model.receptive_field = nW * depth return model
def MishWindowEncoder_v1( width: int, window_size: int, depth: int ) -> Model[List[Floats2d], List[Floats2d]]: """Encode context using convolutions with mish activation, layer normalization and residual connections. width (int): The input and output width. These are required to be the same, to allow residual connections. This value will be determined by the width of the inputs. Recommended values are between 64 and 300. window_size (int): The number of words to concatenate around each token to construct the convolution. Recommended value is 1. depth (int): The number of convolutional layers. Recommended value is 4. """ cnn = chain( expand_window(window_size=window_size), Mish(nO=width, nI=width * ((window_size * 2) + 1), dropout=0.0, normalize=True), ) model = clone(residual(cnn), depth) model.set_dim("nO", width) return model
def create_relu_softmax(width, dropout, nI, nO): return chain(clone(ReLu(nO=width, dropout=dropout), 2), Softmax(10, width))
def test_clone_noop(): model = clone(Linear(), 0) assert len(model.layers) == 0 assert model.name == "noop"