Ejemplo n.º 1
0
 def test_rmsprop(self):
     np.random.seed(1337)
     model = TinyBobNet()
     optimizer = optim.RMSprop(model.parameters(), lr=0.0002)
     train(model,
           X_train,
           Y_train,
           optimizer,
           steps=1000,
           device=self.device)
     assert evaluate(model, X_test, Y_test, device=self.device) > 0.95
Ejemplo n.º 2
0
 def test_conv(self):
     np.random.seed(1337)
     model = TinyConvNet()
     optimizer = optim.Adam(model.parameters(), lr=0.001)
     train(model,
           X_train,
           Y_train,
           optimizer,
           steps=200,
           device=self.device)
     assert evaluate(model, X_test, Y_test, device=self.device) > 0.95
Ejemplo n.º 3
0
def train_one_step(model,X,Y):
  params = get_parameters(model)
  pcount = 0
  for p in params:
    pcount += np.prod(p.shape)
  optimizer = optim.Adam(params, lr=0.001)
  print("stepping %r with %.1fM params bs %d" % (type(model), pcount/1e6, BS))
  st = time.time()
  train(model, X, Y, optimizer, steps=1, BS=BS)
  et = time.time()-st
  print("done in %.2f ms" % (et*1000.))
Ejemplo n.º 4
0
 def test_sgd(self):
   np.random.seed(1337)
   model = TinyBobNet()
   optimizer = optim.SGD(model.parameters(), lr=0.001)
   train(model, X_train, Y_train, optimizer, steps=1000)
   assert evaluate(model, X_test, Y_test) > 0.95
Ejemplo n.º 5
0
  lossfn = lambda out,y: out.mul(y).mean() + lmbd*(model.weight1.abs() + model.weight2.abs()).sum()
  X_train, Y_train, X_test, Y_test = fetch_mnist()
  steps = len(X_train)//BS
  np.random.seed(1337)
  if QUICK:
    steps = 1
    X_test, Y_test = X_test[:BS], Y_test[:BS]
  
  model = BigConvNet()
 
  if len(sys.argv) > 1:
    try:
      model.load(sys.argv[1])
      print('Loaded weights "'+sys.argv[1]+'", evaluating...')
      evaluate(model, X_test, Y_test, BS=BS)
    except:
      print('could not load weights "'+sys.argv[1]+'".')
 
  if GPU:
    params = get_parameters(model)
    [x.cuda_() for x in params]

  for lr, epochs in zip(lrs, epochss):
    optimizer = optim.Adam(model.parameters(), lr=lr)
    for epoch in range(1,epochs+1):
      #first epoch without augmentation
      X_aug = X_train if epoch == 1 else augment_img(X_train)
      train(model, X_aug, Y_train, optimizer, steps=steps, lossfn=lossfn, gpu=GPU, BS=BS)
      accuracy = evaluate(model, X_test, Y_test, BS=BS)
      model.save('examples/checkpoint'+str("%.0f" % (accuracy*1.0e6)))
Ejemplo n.º 6
0
    ds_Y = np.copy(ds[:, 1:])
    ds_X_train, ds_X_test = ds_X[0:8000], ds_X[8000:]
    ds_Y_train, ds_Y_test = ds_Y[0:8000], ds_Y[8000:]

    return ds_X_train, ds_Y_train, ds_X_test, ds_Y_test


from tinygrad.optim import Adam
if __name__ == "__main__":
    model = Transformer(10, 6, 2, 128, 4)

    X_train, Y_train, X_test, Y_test = make_dataset()
    lr = 0.003
    for i in range(10):
        optim = Adam(get_parameters(model), lr=lr)
        train(model, X_train, Y_train, optim, 50, BS=64)
        acc, Y_test_preds = evaluate(model,
                                     X_test,
                                     Y_test,
                                     num_classes=10,
                                     return_predict=True)
        lr /= 1.2
        print(f'reducing lr to {lr:.4f}')
        if acc > 0.998:
            wrong = 0
            for k in range(len(Y_test_preds)):
                if (Y_test_preds[k] != Y_test[k]).any():
                    wrong += 1
                    a, b, c, x = X_test[k, :2], X_test[k, 2:4], Y_test[
                        k, -3:], Y_test_preds[k, -3:]
                    print(
Ejemplo n.º 7
0
    def forward(self, x):
        bs = x.shape[0]
        xnp = x.cpu().data
        onehot = np.zeros((bs, x.shape[1], self.maxlen + self.syms),
                          dtype=np.float32)
        for i in range(x.shape[1]):
            onehot[range(bs), i, i] = 1
            onehot[range(bs), i, self.maxlen + xnp[:, i]] = 1
        onehot = onehot.reshape(bs * x.shape[1], self.maxlen + self.syms)

        x = Tensor(onehot, device=x.device).dot(
            self.embed).reshape(shape=(bs, x.shape[1], -1))
        for t in self.tbs:
            x = t(x)
        x = x.reshape(shape=(-1, x.shape[-1])).dot(self.final).logsoftmax()
        return x.reshape(shape=(bs, -1, x.shape[-1]))


from tinygrad.optim import Adam
if __name__ == "__main__":
    model = Transformer(10, 6, 2, 128, 4)

    #in1 = Tensor.zeros(20, 6, 128)
    #ret = model.forward(in1)
    #print(ret.shape)

    X_train, Y_train, X_test, Y_test = make_dataset()
    optim = Adam(get_parameters(model), lr=0.001)
    train(model, X_train, Y_train, optim, 100)
Ejemplo n.º 8
0
            ds.append([
                i // 10, i % 10, j // 10, j % 10, s // 100, (s // 10) % 10,
                s % 10
            ])
    random.shuffle(ds)
    ds = np.array(ds)
    ds_X = ds[:, 0:6]
    ds_Y = np.copy(ds[:, 1:])
    ds_X_train, ds_X_test = ds_X[0:8000], ds_X[8000:]
    ds_Y_train, ds_Y_test = ds_Y[0:8000], ds_Y[8000:]

    return ds_X_train, ds_Y_train, ds_X_test, ds_Y_test


from tinygrad.optim import Adam
if __name__ == "__main__":
    model = Transformer(10, 6, 2, 128, 4)

    X_train, Y_train, X_test, Y_test = make_dataset()
    optim = Adam(get_parameters(model), lr=0.001)

    for i in range(5):
        train(model,
              X_train,
              Y_train,
              optim,
              500,
              BS=32,
              device=Device.GPU if os.getenv("GPU") else Device.CPU)
        evaluate(model, X_test, Y_test, num_classes=10)
Ejemplo n.º 9
0
from tinygrad.optim import Adam

class ComposeTransforms:
  def __init__(self, trans):
    self.trans = trans

  def __call__(self, x):
    for t in self.trans:
      x = t(x)
    return x

if __name__ == "__main__":
  model = ResNet18(num_classes=10, pretrained=True)

  X_train, Y_train, X_test, Y_test = fetch_mnist()
  X_train = X_train.reshape(-1, 28, 28).astype(np.uint8)
  X_test = X_test.reshape(-1, 28, 28).astype(np.uint8)
  lr = 5e-5
  transform = ComposeTransforms([
    lambda x: [Image.fromarray(xx, mode='L').resize((64, 64)) for xx in x],
    lambda x: np.stack([np.asarray(xx) for xx in x], 0),
    lambda x: x / 255.0,
    lambda x: np.tile(np.expand_dims(x, 1), (1, 3, 1, 1)).astype(np.float32),
  ])
  for i in range(10):
    optim = Adam(get_parameters(model), lr=lr)
    train(model, X_train, Y_train, optim, 50, BS=32, transform=transform)
    acc, Y_test_preds = evaluate(model, X_test, Y_test, num_classes=10, return_predict=True, transform=transform)
    lr /= 1.2
    print(f'reducing lr to {lr:.4f}')
Ejemplo n.º 10
0
        self.final = Tensor.uniform(embed_dim, syms)

    def forward(self, x):
        bs = x.shape[0]
        xnp = x.cpu().data
        onehot = np.zeros((bs, x.shape[1], self.maxlen + self.syms),
                          dtype=np.float32)
        for i in range(x.shape[1]):
            onehot[range(bs), i, i] = 1
            onehot[range(bs), i, self.maxlen + xnp[:, i]] = 1
        onehot = onehot.reshape(bs * x.shape[1], self.maxlen + self.syms)

        x = Tensor(onehot, device=x.device).dot(
            self.embed).reshape(shape=(bs, x.shape[1], -1))
        for t in self.tbs:
            x = t(x)
        x = x.reshape(shape=(-1, x.shape[-1])).dot(self.final).logsoftmax()
        return x.reshape(shape=(bs, -1, x.shape[-1]))


from tinygrad.optim import Adam
if __name__ == "__main__":
    model = Transformer(10, 6, 2, 128, 4)

    X_train, Y_train, X_test, Y_test = make_dataset()
    optim = Adam(get_parameters(model), lr=0.001)

    for i in range(5):
        train(model, X_train, Y_train, optim, 500, BS=32)
        evaluate(model, X_test, Y_test, num_classes=10)