예제 #1
0
def rmsprop_2d(x1, x2, s1, s2):
    g1, g2, eps = 0.2 * x1, 4 * x2, 1e-6
    s1 = gamma * s1 + (1 - gamma) * g1**2
    s2 = gamma * s2 + (1 - gamma) * g2**2
    x1 -= eta / math.sqrt(s1 + eps) * g1
    x2 -= eta / math.sqrt(s2 + eps) * g2
    return x1, x2, s1, s2


def f_2d(x1, x2):
    return 0.1 * x1**2 + 2 * x2**2


eta, gamma = 0.4, 0.9
d2l.show_trace_2d(f_2d, d2l.train_2d(rmsprop_2d))

#7.6.2-从零开始实现
features, labels = d2l.get_data_ch7()


def init_rmsprop_states():
    s_w = nd.zeros((features.shape[1], 1))
    s_b = nd.zeros(1)
    return (s_w, s_b)


def rmsprop(params, states, hyperparams):
    gamma, eps = hyperparams['gamma'], 1e-6
    for p, s in zip(params, states):
        s[:] = gamma * s + (1 - gamma) * p.grad.square()
예제 #2
0
#7.4-动量法
#7.4.1-梯度下降的问题
#%matplotlib inline
import d2lzh as d2l
from mxnet import nd

eta = 0.4

def f_2d(x1, x2):
    return 0.1 * x1 ** 2 + 2 * x2 ** 2

def gd_2d(x1, x2, s1, s2):
    return (x1-eta*0.2*x1, x2-eta*4*x2, 0, 0)

d2l.show_trace_2d(f_2d, d2l.train_2d(gd_2d))

#学习率调大,自变量在竖直方向上不断越过最优解并逐渐发散
eta = 0.6
d2l.show_trace_2d(f_2d, d2l.train_2d(gd_2d))

#7.4.2-动量法
def momentum_2d(x1, x2, v1, v2):
    v1 = gamma * v1 + eta * 0.2 * x1
    v2 = gamma * v2 + eta * 4* x2
    return x1-v1, x2-v2, v1, v2

eta, gamma = 0.4, 0.5
d2l.show_trace_2d(f_2d, d2l.train_2d(momentum_2d))

eta =0.6
d2l.show_trace_2d(f_2d, d2l.train_2d(momentum_2d))
예제 #3
0
# FIXME y = 0.2*x_1^2 + 2*x2^2 测试
eta = 2         # 比之前的大,但是效果很好,不会发散
def adagrad_2d(x1, x2, s1, s2):
    g1, g2, eps = 0.2*x1, 4*x2, 1e-6
    s1 += g1**2
    s2 += g2**2
    x1 -= eta/math.sqrt(s1+eps) * g1
    x2 -= eta/math.sqrt(s2+eps) * g2
    return x1, x2, s1, s2

def fx(x1, x2):
    return 0.1*x1**2 + 2*x2**2

d2l.plt.figure(figsize=(15,5))                 # 设置图片大小7
d2l.show_trace_2d(fx, d2l.train_2d(adagrad_2d))



# 手动写一个
features, labels = d2l.get_data_ch7()
def inin_adagrad_states():
    s_w = nd.zeros((features.shape[1], 1))
    s_b = nd.zeros(1)
    return (s_w, s_b)

def adagrad(params, states, hyperparams):
    eps = 1e-6
    for p,s in zip(params, states):
        s[:] += p.grad.square()
        p[:] -= hyperparams['lr'] * p.grad / (s+eps).sqrt()
예제 #4
0
def adagrad_2d(x1, x2, s1, s2):
    g1, g2, eps = 0.2 * x1, 4 * x2, 1e-6
    s1 += g1**2
    s2 += g2**2
    x1 -= eta / math.sqrt(s1 + eps) * g1
    x2 -= eta / math.sqrt(s2 + eps) * g2
    return x1, x2, s1, s2


def f_2d(x1, x2):
    return 0.1 * x1**2 + 2 * x2**2


eta = 0.4
d2l.show_trace_2d(f_2d, d2l.train_2d(adagrad_2d))

# In[4]:

eta = 2
d2l.show_trace_2d(f_2d, d2l.train_2d(adagrad_2d))

# In[6]:

features, labels = d2l.get_data_ch7()


def init_adagrad_states():
    s_w = nd.zeros((features.shape[1], 1))
    s_b = nd.zeros(1)
    return (s_w, s_b)