def __init__(self, nin, nonlin=True): self.w = [ Value(random.uniform(-1, 1), requires_grad=True) for _ in range(nin) ] self.b = Value(0, requires_grad=True) self.nonlin = nonlin
def test_Tensor_sanity_check_scalar(): x = Tensor([[Value(-4.0)]]) two_1 = Tensor([[Value(2)]]) two_2 = Tensor([[Value(2)]]) z = two_1 * x + two_2 + x q = z.relu() + z * x h = (z * z).relu() y = h + q + q * x y.data[0][0].backward() xmg, ymg = x.data[0][0], y.data[0][0] x = torch.Tensor([-4.0]).double() x.requires_grad = True z = 2 * x + 2 + x q = z.relu() + z * x h = (z * z).relu() y = h + q + q * x y.backward() xpt, ypt = x, y # forward pass went well assert ymg.data == ypt.data.item() # backward pass went well assert xmg.grad == xpt.grad.item()
def __pow__(self, other): other = other if isinstance(other, Value) else Value(other) out = Value(self.data**other.data, (self, other), f'pow') def _backward(): self.grad += (other.data * self.data**(other.data - 1)) * out.grad if other.data != 0: other.grad += (self.data**other.data) * math.log(abs( other.data)) * out.grad out._backward = _backward return out
def compare(func, *inputs): """Compare torch and micrograd implementations for func, applied to inputs. `func` takes in any number of arguments, each of which is a Value or size-1 Tesnor and returns any number of outputs. :param func: a mathematical function using Value or Tensor primitive functions. :param inputs: a list of scalars, which are arguments to func. :returns: :rtype: """ assert len(inputs) > 0 vs = [Value(x) for x in inputs] ts = [torch.Tensor([x]) for x in inputs] for t in ts: t.requires_grad = True # micrograd, pytorch output ov = func(*vs) ot = func(*ts) assert ov.data == ot.data.item(), f'values: {ov} != {ot}' ov.backward() ot.backward() for v, t in zip(vs, ts): assert v.grad == t.grad.item(), f'gradients: {v} != {t}'
def test_sanity_check(): x = Value(-4.0, _op="x") z = 2 * x + 2 + x q = z.relu() + z * x h = (z * z).relu() y = h + q + q * x y.backward() xmg, ymg = x, y x = torch.Tensor([-4.0]).double() x.requires_grad = True z = 2 * x + 2 + x q = z.relu() + z * x h = (z * z).relu() y = h + q + q * x y.backward() xpt, ypt = x, y # forward pass went well assert ymg.data == ypt.data.item() print(f"{ymg.data = }\n{ypt.data.item() = }\n") # backward pass went well print(f"{xmg.grad = }\n{xpt.grad.item() = }\n") assert xmg.grad == xpt.grad.item()
def test_sanity_check(): x = Value(-4.0) z = 2 * x + 2 + x z.backward() assert z.data == -10.0 assert x.grad == 3.0
def test_more_ops(): a = Value(-4.0) b = Value(2.0) c = a + b d = a * b + b**3 c += c + 1 c += 1 + c + (-a) d += d * 2 + (b + a).relu() d += 3 * d + (b - a).relu() e = c - d f = e**2 g = f / 2.0 g += 10.0 / f g.backward() amg, bmg, gmg = a, b, g a = torch.Tensor([-4.0]).double() b = torch.Tensor([2.0]).double() a.requires_grad = True b.requires_grad = True c = a + b d = a * b + b**3 c = c + c + 1 c = c + 1 + c + (-a) d = d + d * 2 + (b + a).relu() d = d + 3 * d + (b - a).relu() e = c - d f = e**2 g = f / 2.0 g = g + 10.0 / f g.backward() apt, bpt, gpt = a, b, g tol = 1e-6 # forward pass went print(f"{gmg.data = }\n{gpt.data.item() = }\n") print(f"{amg.grad = }\n{apt.grad.item() = }\n") print(f"{bmg.grad = }\n{bpt.grad.item() = }\n") assert abs(gmg.data - gpt.data.item()) < tol # backward pass went well assert abs(amg.grad - apt.grad.item()) < tol assert abs(bmg.grad - bpt.grad.item()) < tol
def log(self, **kwargs): out = Value(math.log(self.data), (self, ), f'log') def _backward(): self.grad += 1 / self.data * out.grad out._backward = _backward return out
def sigmoid(self): out = Value(math.e**self / (math.e**self + 1), (self, ), f'sigmoid') def _backward(): self.grad += math.e**self / ((math.e**self + 1) * (math.e**self + 1)) out._backward = _backward return out
def __init__(self, in_features, out_features, bias: bool = True): """Initializing model""" stddev = 1 / np.sqrt(in_features) row = [ Value(item) for item in np.random.uniform(-stddev, stddev, size=out_features) ] matrix = Tensor([list(row) for _ in range(in_features)]) self.W = matrix self.b = row if bias is True else 0
def test_higher_order(): x = Value(3) y = Value(2) f = 2 * x * x * y + y * x x.grad = y.grad = 0 f.backward() dfx = x.grad dfy = y.grad assert dfx.data == 4 * x.data * y.data + y.data assert dfy.data == 2 * x.data * x.data + x.data x.grad = y.grad = 0 dfx.backward() dfxx = x.grad dfxy = y.grad x.grad = y.grad = 0 dfy.backward() dfyx = x.grad dfyy = y.grad print(dfxx, dfyx, dfyy, dfxy) assert dfyx.data == dfxy.data == (4 * x.data + 1) assert dfyy == 0 assert dfxx.data == 4 * y.data
def test_higher_order(): x = Value(3) y = x**3 # This might seem redundant, but better we do this to ensure gradient is # 0 before backward pass. x.grad = 0 y.backward() dy = x.grad assert dy.data == (x.data**2) * 3 x.grad = 0 dy.backward() d2y = x.grad assert d2y.data == x.data * 6 x.grad = 0 d2y.backward() d3y = x.grad assert d3y.data == 6 x.grad = 0 d3y.backward() d4y = x.grad assert d4y.data == 0
def test_sanity_check(): x = Value(-4.0) z = 2 * x + 2 + x q = z.relu() + z * x h = (z * z).relu() y = h + q + q * x y.backward() xmg, ymg = x, y x = torch.Tensor([-4.0]) x.requires_grad = True z = 2 * x + 2 + x q = z.relu() + z * x h = (z * z).relu() y = h + q + q * x y.backward() xpt, ypt = x, y # forward pass went well assert ymg.data == ypt.data.item() # backward pass went well assert xmg.grad == xpt.grad.item()
def __repr__(self): return f"Layer of [{', '.join(str(n) for n in self.neurons)}]" class MLP(Module): def __init__(self, nin, nouts): sz = [nin] + nouts self.layers = [ Layer(sz[i], sz[i + 1], nonlin=i != len(nouts) - 1) for i in range(len(nouts)) ] def __call__(self, x): for layer in self.layers: x = layer(x) return x def parameters(self): return [p for layer in self.layers for p in layer.parameters()] def __repr__(self): return f"MLP of [{', '.join(str(layer) for layer in self.layers)}]" if __name__ == '__main__': n = Neuron(2, nonlin=False) x = [Value(1.0), Value(-2.0)] y = n(x) print(y)
# w = Value(3.0, name='w') # x = Value(-4.0, name='x') # y = Value(2.0, name='y') # l = (w*x - y)**2 # l.name = 'MSEloss' # print("l:", l) # l.backward() # draw_dot(l).render('test0') # %% # LOGloss import math from micrograd.engine import Value from micrograd.trace_graph import draw_dot w = Value(3.0, name='w') x = Value(-4.0, name='x') y = Value(1.0, name='y') # Assume True y = 1 dot = Value(w.data * x.data, name='dotprod') print("dot:", dot) print("dot.sigmoid():", dot.sigmoid()) l = dot.cross_entropy(1) # l.name = 'LOGloss' print("loss:", l) l.backward() draw_dot(l).render('test0') # %% import numpy as np
def __rpow__(self, other): if not isinstance(other, (int, float, Value)): return NotImplemented other = other if isinstance(other, Value) else Value(other) return other**self
def __init__(self, nin, nonlin=True): self.w = [Value(random.uniform(-1, 1)) for _ in range(nin)] self.b = Value(random.uniform(-1, 1)) self.nonlin = nonlin
from micrograd.engine import Value a = Value(-4.0) b = Value(2.0) c = a + b d = a * b + b**3 c += c + 1 c += 1 + c + (-a) d += d * 2 + (b + a).relu() d += 3 * d + (b - a).relu() e = c - d f = e**2 g = f / 2.0 g += 10.0 / f print(f'{g.data:.4f}') # prints 24.7041, the outcome of this forward pass g.backward() print(f'{a.grad:.4f}') # prints 138.8338, i.e. the numerical value of dg/da print(f'{b.grad:.4f}') # prints 645.5773, i.e. the numerical value of dg/dbc
def __init__(self, nin, nonlin=True, seed=None): if seed: random.seed(seed) self.w = [Value(random.uniform(-1, 1)) for _ in range(nin)] self.b = Value(0) self.nonlin = nonlin