def test_tanh(): print("gradient check: Tanh") x = np.random.rand(5 * 8).reshape((5, 8)).astype('float32') y = np.random.rand(5 * 8).reshape((5, 8)).astype('float32') act = activation.Tanh() sqaure_loss_func = loss.SquareLoss() y_ = act(x) square_loss = sqaure_loss_func(y_, y) torch_x = torch.Tensor(x) torch_x.requires_grad = True act_torch = nn.Tanh() square_loss_func_torch = nn.MSELoss() y_torch = act_torch(torch_x) sqaure_loss_torch = square_loss_func_torch(y_torch, torch.Tensor(y)) print("Value:\ntorch:{},mine:{}, delta:{}".format( sqaure_loss_torch.item(), square_loss, (sqaure_loss_torch.item() - square_loss))) # --- my grad --- grad_sigmoid = sqaure_loss_func.backward() grad_x = act.backward(grad_sigmoid) # --- torch grad --- sqaure_loss_torch.backward() grad_x_torch = torch_x.grad.data.numpy() print(grad_x_torch - grad_x)
def test_tanh_forward(): data = saved_data[9] t0 = data[0] gt = data[1] student = activation.Tanh() student(t0) closeness_test(student.state, gt, "tanh.state")
def test_tanh_derivative(): data = saved_data[10] t0 = data[0] gt = data[1] student = activation.Tanh() student(t0) closeness_test(student.derivative(), gt, "tanh.derivative()")
else: epochs = int(input("Please input how many epochs you want to train over [DEFAULT = 4000] : ") or "4000") print("") criterion = loss.LossMSE() model = sequential.Sequential( layer.Linear(2, 25), activation.ReLU(), layer.Linear(25, 50), activation.ReLU(), layer.Linear(50, 50), activation.ReLU(), layer.Linear(50, 25), activation.ReLU(), layer.Linear(25, 1), activation.Tanh() ) train_target[((train_input-0.5)**2).sum(1) < 1/(2*math.pi)] = -1 train_target[((train_input-0.5)**2).sum(1) >= 1/(2*math.pi)] = 1 test_target[((test_input-0.5)**2).sum(1) < 1/(2*math.pi)] = -1 test_target[((test_input-0.5)**2).sum(1) >= 1/(2*math.pi)] = 1 ps = model.parameters() optim = optimizer.SGD(model.parameters()) #for plotting later levels = [-1, -0.5, 0, 0.5, 1] #for accuracy computation sigmoid = False #Normalization mu, std = train_input.mean(0), train_input.std(0) train_input.sub_(mu).div_(std)
def op(input, name=None): return layer.mixed(input=[layer.identity_projection(input=input)], name=name, act=act) op = wrap_name_default(op_name)(op) op.__doc__ = type(act).__doc__ globals()[op_name] = op __all__.append(op_name) __register_unary_math_op__('exp', act.Exp()) __register_unary_math_op__('log', act.Log()) __register_unary_math_op__('abs', act.Abs()) __register_unary_math_op__('sigmoid', act.Sigmoid()) __register_unary_math_op__('tanh', act.Tanh()) __register_unary_math_op__('square', act.Square()) __register_unary_math_op__('relu', act.Relu()) __register_unary_math_op__('sqrt', act.Sqrt()) __register_unary_math_op__('reciprocal', act.Reciprocal()) __register_unary_math_op__('softmax', act.Softmax()) def __add__(layeroutput, other): if is_compatible_with(other, float): return layer.slope_intercept(input=layeroutput, intercept=other) if not isinstance(other, Layer): raise TypeError("Layer can only be added with" " another Layer or a number") if layeroutput.size == other.size: return layer.mixed(input=[
reportString += "output (training result) suffix: " + result_ext + nl reportString += "noise multiplier: " + str(noise_mult) + nl reportString += "Initial Error: " + str(cost.MSE(data_scaled, input)) + nl reportString += "epoch: " + str(n_iteration) + nl reportString += "learning rate: " + str(alpha) + nl for i in range(0, n_iteration): n = np.random.randint(0, input.shape[0]) sample = np.copy(input[n]).reshape(1, input[n].shape[0]) clean = data_scaled[n].reshape(sample.shape) print(i, "iteration") # --- FORWARD PASS --- # - HIDDEN LAYER - code = a.Tanh(np.dot(sample, wh)) im_code = util.Scale(code, -1, 1, 0, 255).astype(np.uint8) # - OUTPUT LAYER - output = a.Tanh(np.dot(code, wo)) # --- BACKPROPAGATION --- w = np.copy(wh) e2 = clean - output g2 = (1 - np.power(output, 2)) * e2 w_delta_2 = alpha * np.dot(code.transpose(), g2) e1 = np.dot(wo, g2.transpose()).transpose() g1 = (1 - np.power(code, 2)) * e1