コード例 #1
0
    def __init__(self,
                 embed_dim,
                 num_heads,
                 ff_dim,
                 prenorm=False,
                 act=lambda x: x.relu()):
        self.num_heads = num_heads
        self.head_size = embed_dim // num_heads
        assert self.head_size * self.num_heads == embed_dim
        self.prenorm, self.act = prenorm, act

        self.query = (Tensor.uniform(embed_dim,
                                     embed_dim), Tensor.zeros(embed_dim))
        self.key = (Tensor.uniform(embed_dim,
                                   embed_dim), Tensor.zeros(embed_dim))
        self.value = (Tensor.uniform(embed_dim,
                                     embed_dim), Tensor.zeros(embed_dim))

        self.out = (Tensor.uniform(embed_dim,
                                   embed_dim), Tensor.zeros(embed_dim))

        self.ff1 = (Tensor.uniform(embed_dim, ff_dim), Tensor.zeros(ff_dim))
        self.ff2 = (Tensor.uniform(ff_dim, embed_dim), Tensor.zeros(embed_dim))

        self.ln1 = (Tensor.ones(embed_dim), Tensor.zeros(embed_dim))
        self.ln2 = (Tensor.ones(embed_dim), Tensor.zeros(embed_dim))
コード例 #2
0
ファイル: nn.py プロジェクト: ufuu/tinygrad
  def __init__(self, sz, eps=1e-5, track_running_stats=False, training=False, momentum=0.1):
    self.eps, self.track_running_stats, self.training, self.momentum = eps, track_running_stats, training, momentum

    self.weight, self.bias = Tensor.ones(sz), Tensor.zeros(sz)

    self.running_mean, self.running_var = Tensor.zeros(sz, requires_grad=False), Tensor.ones(sz, requires_grad=False)
    self.num_batches_tracked = Tensor.zeros(1, requires_grad=False)
コード例 #3
0
ファイル: nn.py プロジェクト: ycechungAI/tinygrad
  def __init__(self, sz, eps=1e-5, affine=True, track_running_stats=True, momentum=0.1):
    assert affine == True, "BatchNorm2D is only supported with affine"
    self.eps, self.track_running_stats, self.momentum = eps, track_running_stats, momentum

    self.weight, self.bias = Tensor.ones(sz), Tensor.zeros(sz)

    self.running_mean, self.running_var = Tensor.zeros(sz, requires_grad=False), Tensor.ones(sz, requires_grad=False)
    self.num_batches_tracked = Tensor.zeros(1, requires_grad=False)
コード例 #4
0
ファイル: nn.py プロジェクト: xywei/tinygrad
  def __init__(self, sz, eps=0.001):
    self.eps = Tensor([eps], requires_grad=False)
    self.two = Tensor([2], requires_grad=False)
    self.weight = Tensor.ones(sz)
    self.bias = Tensor.zeros(sz)

    self.running_mean = Tensor.zeros(sz, requires_grad=False)
    self.running_var = Tensor.ones(sz, requires_grad=False)
    self.num_batches_tracked = Tensor.zeros(1, requires_grad=False)
コード例 #5
0
 def __init__(self, layers=12, embed_dim=192, num_heads=3):
     self.embedding = (Tensor.uniform(embed_dim, 3, 16,
                                      16), Tensor.zeros(embed_dim))
     self.embed_dim = embed_dim
     self.cls = Tensor.ones(1, 1, embed_dim)
     self.pos_embedding = Tensor.ones(1, 197, embed_dim)
     self.tbs = [
         TransformerBlock(embed_dim=embed_dim,
                          num_heads=num_heads,
                          ff_dim=embed_dim * 4,
                          prenorm=True,
                          act=lambda x: x.gelu()) for i in range(layers)
     ]
     self.encoder_norm = (Tensor.uniform(embed_dim),
                          Tensor.zeros(embed_dim))
     self.head = (Tensor.uniform(embed_dim, 1000), Tensor.zeros(1000))
コード例 #6
0
ファイル: test_tensor.py プロジェクト: skoshx/tinygrad
 def test_dropout(self):
     Tensor.training = True
     n, rate = 1_000_000, 0.1
     w = Tensor.ones(n).dropout(rate)
     non_zeros = np.count_nonzero(w.cpu().data)
     expected = n * (1 - rate)
     np.testing.assert_allclose(non_zeros, expected, rtol=1e-3)