def neural_net_train(features,labels,num_iter = 2000,opt_method = 'forward_backward') :
    layer_sizes = [2,5,5,1]
    l2_reg = 2.0
    param_scale = 0.5

    init_params = neural_net_init(param_scale,layer_sizes)

    '''def plain_objective(params) :
        return lms_loss(params,features,labels,l2_reg)'''
    plain_objective = gen_objective(features,labels,l2_reg)
    objective_grad = auto_grad(plain_objective)

    print("          Iteration|      Train accuracy")

    optimized_params = init_params
    gd_step = 0.2
    for i in range(num_iter) :
        if opt_method == 'forward_backward' :
            optimized_params_ori = optimized_params
            value_old = plain_objective(optimized_params)
            flattened_grad,unflatten,x = flatten_func(objective_grad,optimized_params)
            x -= flattened_grad(x) * gd_step
            optimized_params = unflatten(x)
            value_new = plain_objective(optimized_params)
            if value_new < value_old :
                gd_step *= 1.618
            else :
                gd_step *= 0.618
                optimized_params = optimized_params_ori
        elif opt_method == 'stepest' :
            value_old = plain_objective(optimized_params)
            flattened_grad,unflatten,x = flatten_func(objective_grad,optimized_params)
            local_gd_step = gd_step
            best_gd_step = 0.0
            for j in range(10) :
                x_test = x - flattened_grad(x) * local_gd_step
                last_optimized_params = unflatten(x_test)
                value_new = plain_objective(last_optimized_params)
                if value_new < value_old :
                    best_gd_step = local_gd_step
                    local_gd_step *= 1.618
                else :
                    local_gd_step *= 0.618
            if auto_np.abs(best_gd_step - 0.0) < 0.00000000001 :
                gd_step *= 0.618
            x -= flattened_grad(x) * best_gd_step
            optimized_params = unflatten(x)
        print_perf(optimized_params,i,features,labels)

    return optimized_params
Esempio n. 2
0
def adam(grad,
         init_params,
         callback=None,
         num_iters=100,
         step_size=0.001,
         b1=0.9,
         b2=0.999,
         eps=10**-8):
    """Adam as described in http://arxiv.org/pdf/1412.6980.pdf.
    It's basically RMSprop with momentum and some correction terms."""
    flattened_grad, unflatten, x = flatten_func(grad, init_params)

    m = np.zeros(len(x))
    v = np.zeros(len(x))
    for i in range(num_iters):
        g = flattened_grad(x, i)
        if callback:
            es = callback(unflatten(x), i, unflatten(g))
            if es:
                i = num_iters - 1
        m = (1 - b1) * g + b1 * m  # First  moment estimate.
        v = (1 - b2) * (g**2) + b2 * v  # Second moment estimate.
        mhat = m / (1 - b1**(i + 1))  # Bias correction.
        vhat = v / (1 - b2**(i + 1))
        x = x - step_size * mhat / (np.sqrt(vhat) + eps)

    return unflatten(x)
Esempio n. 3
0
def sgd(grad,
        init_params,
        subopt=None,
        callback=None,
        break_cond=None,
        num_iters=200,
        step_size=0.1,
        mass=0.9):
    """Stochastic gradient descent with momentum.
    grad() must have signature grad(x, i), where i is the iteration number."""
    flattened_grad, unflatten, x = flatten_func(grad, init_params)

    # dynamic step sizes
    if np.isscalar(step_size):
        step_size = np.ones(num_iters) * step_size
    assert len(step_size) == num_iters, "step schedule needs to match num iter"

    velocity = np.zeros(len(x))
    for i in range(num_iters):
        g = flattened_grad(x, i)
        if callback: callback(unflatten(x), i, unflatten(g))
        velocity = mass * velocity - (1.0 - mass) * g
        x = x + step_size[i] * velocity

        if subopt is not None:
            x = subopt(x, g, i)

        if break_cond is not None:
            if break_cond(x, i, g):
                break
    return unflatten(x)
Esempio n. 4
0
File: recnn.py Progetto: wz1070/jets
def adam(grad,
         init_params,
         callback=None,
         num_iters=100,
         step_size=0.001,
         b1=0.9,
         b2=0.999,
         eps=10**-8):
    flattened_grad, unflatten, x = flatten_func(grad, init_params)

    m = np.zeros(len(x))
    v = np.zeros(len(x))

    for i in range(num_iters):
        g = flattened_grad(x, i)

        if callback:
            callback(unflatten(x), i, unflatten(g))

        m = (1 - b1) * g + b1 * m  # First  moment estimate.
        v = (1 - b2) * (g**2) + b2 * v  # Second moment estimate.
        mhat = m / (1 - b1**(i + 1))  # Bias correction.
        vhat = v / (1 - b2**(i + 1))
        x = x - step_size * mhat / (np.sqrt(vhat) + eps)

    return unflatten(x)
Esempio n. 5
0
def sgd(grad, init_params, callback=None, num_iters=200, step_size=0.1, mass=0.9):
    """Stochastic gradient descent with momentum.
    grad() must have signature grad(x, i), where i is the iteration number."""
    flattened_grad, unflatten, x = flatten_func(grad, init_params)

    velocity = np.zeros(len(x))
    for i in range(num_iters):
        g = flattened_grad(x, i)
        if callback: callback(unflatten(x), i, unflatten(g))
        velocity = mass * velocity - (1.0 - mass) * g
        x = x + step_size * velocity
    return unflatten(x)
Esempio n. 6
0
def rmsprop(grad, init_params, callback=None, num_iters=100,
            step_size=0.1, gamma=0.9, eps=10**-8):
    """Root mean squared prop: See Adagrad paper for details."""
    flattened_grad, unflatten, x = flatten_func(grad, init_params)

    avg_sq_grad = np.ones(len(x))
    for i in range(num_iters):
        g = flattened_grad(x, i)
        if callback: callback(unflatten(x), i, unflatten(g))
        avg_sq_grad = avg_sq_grad * gamma + g**2 * (1 - gamma)
        x = x - step_size * g/(np.sqrt(avg_sq_grad) + eps)
    return unflatten(x)
Esempio n. 7
0
def rmsprop(grad, init_params, callback=None, num_iters=100,
            step_size=0.1, gamma=0.9, eps=10**-8):
    """Root mean squared prop: See Adagrad paper for details."""
    flattened_grad, unflatten, x = flatten_func(grad, init_params)

    avg_sq_grad = np.ones(len(x))
    for i in range(num_iters):
        g = flattened_grad(x, i)
        if callback: callback(unflatten(x), i, unflatten(g))
        avg_sq_grad = avg_sq_grad * gamma + g**2 * (1 - gamma)
        x = x - step_size * g/(np.sqrt(avg_sq_grad) + eps)
    return unflatten(x)
Esempio n. 8
0
def sgd(grad, init_params, callback=None, num_iters=200, step_size=0.1, mass=0.9):
    """Stochastic gradient descent with momentum.
    grad() must have signature grad(x, i), where i is the iteration number."""
    flattened_grad, unflatten, x = flatten_func(grad, init_params)

    velocity = np.zeros(len(x))
    for i in range(num_iters):
        g = flattened_grad(x, i)
        if callback: callback(unflatten(x), i, unflatten(g))
        velocity = mass * velocity - (1.0 - mass) * g
        x = x + step_size * velocity
    return unflatten(x)
Esempio n. 9
0
def batch_adam(
        grad, init_params, callback=None, max_iters=1e5,
        step_size=0.001, b1=0.9, b2=0.999, eps=10**-8,
        validation_grad=None, stop_criterion=1e-3, patience=50,
        early_stop_freq=1):
    """
    Adam as described in http://arxiv.org/pdf/1412.6980.pdf.
    It's basically RMSprop with momentum and some correction terms.

    """
    flattened_grad, unflatten, x = flatten_func(grad, init_params)
    # initial settings for variables
    m, v = np.zeros(len(x)), np.zeros(len(x))
    cur_iter = 0
    reset_patience = patience
    oldg, g = 0, 1
    # early stop on patience, old gradients not too diff.
    while (cur_iter < max_iters) and (l2(oldg - g) > stop_criterion) and (patience > 0):
        oldg = copy(g)  # save last iter grad
        g = flattened_grad(x, cur_iter)  # pass iter for batch training
        if callback:
            callback(unflatten(x), cur_iter, unflatten(g))
        m = (1 - b1) * g + b1 * m  # First  moment estimate.
        v = (1 - b2) * (g**2) + b2 * v  # Second moment estimate.
        mhat = m / (1 - b1**(cur_iter + 1))    # Bias correction.
        vhat = v / (1 - b2**(cur_iter + 1))
        x = x - step_size*mhat/(np.sqrt(vhat) + eps)
        # check the validation error
        if (not validation_grad is None) and \
                (((cur_iter % early_stop_freq) == 0) or (cur_iter+1 == max_iters)):

            valoss, _ = validation_grad(x)
            # we want to save the best one (in case of bad regions)
            if cur_iter == 0:
                best_loss = valoss
                best_x = x
            else:
                if valoss < best_loss:
                    best_loss = valoss
                    best_x = x

            # update patience
            patience = patience - 1 if valoss > best_loss else reset_patience

        else:  # if no validation_grad, always save
            best_x = x

        cur_iter += 1
    return unflatten(best_x)
Esempio n. 10
0
    def prepare_updates(self, cost, params, epsilon,
                        grad=None, diag_hess=None, fd_hess=False, A=1,
                        callbacks=[], callback_every=1000, **kwargs):
        self.theta = params
        if grad is not None:
            if diag_hess is None:
                if self.precondition and not fd_hess:
                    raise ValueError("If precondition=True you must also prepare a function for" \
                                     " computing the diagonal of the Hessian! Alternatively specify fd_hess=True" \
                                     " in which case a noisy finite difference approximation will be used" \
                                     " note that this can bias the MCMC sampler!")
                else:
                    self.flattened_hess = None
            else:
                self.flattened_hess = diag_hess
            self.flattened_grad = grad            
            self.unflatten = lambda x: x
        else:
            gradient = autograd.grad(cost)
            self.flattened_grad, self.unflatten, self.theta = flatten_func(gradient, params)
            self.hess = autograd.grad(self.flattened_grad)
            self.flattened_hess = lambda x, *inputs: np.diag(self.hess(x, *inputs)).reshape((-1,))

        self.epsilon = epsilon
        self.A = A

        self.g  = np.ones_like(params)
        self.g2  = np.ones_like(params)

        # note that xi here is not the same as in the thermostat!
        self.xi  = np.ones_like(params) * self.A
        self.xi_acc  = np.ones_like(params) * self.A

        self.updates = np.zeros_like(params)
        self.count = 1
        self.callback_every = callback_every
        self.callbacks = callbacks
            
        def Ggrad(*args, **kwargs):
            saved = lambda: None
            def return_val_save_aux(*args, **kwargs):
                val, saved.aux = G(*args, **kwargs)
                return val
            gradval = elementwise_grad(return_val_save_aux, 0)(*args, **kwargs)
            return gradval, saved.aux
        self.Ggrad = Ggrad

        return self.updates
Esempio n. 11
0
def adam(grad, init_params, callback=None, num_iters=100,
         step_size=0.001, b1=0.9, b2=0.999, eps=10**-8):
    """Adam as described in http://arxiv.org/pdf/1412.6980.pdf.
    It's basically RMSprop with momentum and some correction terms."""
    flattened_grad, unflatten, x = flatten_func(grad, init_params)

    m = np.zeros(len(x))
    v = np.zeros(len(x))
    for i in range(num_iters):
        g = flattened_grad(x, i)
        if callback: callback(unflatten(x), i, unflatten(g))
        m = (1 - b1) * g      + b1 * m  # First  moment estimate.
        v = (1 - b2) * (g**2) + b2 * v  # Second moment estimate.
        mhat = m / (1 - b1**(i + 1))    # Bias correction.
        vhat = v / (1 - b2**(i + 1))
        x = x - step_size*mhat/(np.sqrt(vhat) + eps)
    return unflatten(x)
Esempio n. 12
0
    def __init__(self,
                 grad,
                 init_params,
                 callback=None,
                 step_size=0.01,
                 b1=0.9,
                 b2=0.999,
                 eps=10**-8):
        self.grad = grad
        self.init_params = copy.copy(init_params)
        self.callback = callback
        self.step_size = step_size
        self.b1 = b1
        self.b2 = b2
        self.eps = eps

        self.flattened_grad, self.unflatten, self.x = flatten_func(
            self.grad, self.init_params)
        self.reset()
Esempio n. 13
0
File: recnn.py Progetto: wz1070/jets
def sgd(grad,
        init_params,
        callback=None,
        num_iters=200,
        step_size=0.1,
        mass=0.9):
    flattened_grad, unflatten, x = flatten_func(grad, init_params)

    velocity = np.zeros(len(x))

    for i in range(num_iters):
        g = flattened_grad(x, i)

        if callback:
            callback(unflatten(x), i, unflatten(g))

        velocity = mass * velocity - (1.0 - mass) * g
        x = x + step_size * velocity

    return unflatten(x)
Esempio n. 14
0
def adam(grad,
         init_params,
         subopt=None,
         callback=None,
         break_cond=None,
         num_iters=100,
         step_size=0.001,
         b1=0.9,
         b2=0.999,
         eps=10**-8):
    """Adam as described in http://arxiv.org/pdf/1412.6980.pdf.
    It's basically RMSprop with momentum and some correction terms."""
    flattened_grad, unflatten, x = flatten_func(grad, init_params)

    # dynamic step sizes
    if np.isscalar(step_size):
        step_size = np.ones(num_iters) * step_size
    assert len(step_size) == num_iters, "step schedule needs to match num iter"

    m = np.zeros(len(x))
    v = np.zeros(len(x))
    for i in range(num_iters):
        g = flattened_grad(x, i)
        if callback: callback(unflatten(x), i, unflatten(g))
        m = (1 - b1) * g + b1 * m  # First  moment estimate.
        v = (1 - b2) * (g**2) + b2 * v  # Second moment estimate.
        mhat = m / (1 - b1**(i + 1))  # Bias correction.
        vhat = v / (1 - b2**(i + 1))
        x = x - step_size[i] * mhat / (np.sqrt(vhat) + eps)

        # do line search on last
        if subopt is not None:
            x = subopt(x, g, i)

        if break_cond is not None:
            if break_cond(x, i, g):
                break

    return unflatten(x)
def gradient_descent(g, w, alpha, max_its, beta, version):
    g_flat, unflatten, w = flatten_func(g, w)
    grad = compute_grad(g_flat)
    w_hist = []
    w_hist.append(unflatten(w))
    z = np.zeros((np.shape(w)))

    for k in range(max_its):
        grad_eval = grad(w)
        grad_eval.shape = np.shape(w)

        if version == 'normalized':
            grad_norm = np.linalg.norm(grad_eval)
            if grad_norm == 0:
                grad_norm += 10**-6 * np.sign(2 * np.random.rand(1) - 1)
            grad_eval /= grad_norm

        z = beta * z + grad_eval
        w = w - alpha * z
        w_hist.append(unflatten(w))

    return w_hist
Esempio n. 16
0
    def prepare_updates(self, cost, params, epsilon,
                        grad=None, A=1, callbacks=[],
                        callback_every=1000, **kwargs):
        self.theta = params
        if grad is not None:            
            self.flattened_grad = grad            
            self.unflatten = lambda x: x
        else:
            gradient = autograd.grad(cost)
            self.flattened_grad, self.unflatten, self.theta = flatten_func(gradient, params)
            self.hess = autograd.grad(self.flattened_grad)
            self.flattened_hess = lambda x, *inputs: np.diag(self.hess(x, *inputs)).reshape((-1,))
            
        self.epsilon = epsilon
        self.A = A

        self.p   = self._srng.normal(size=params.shape)
        self.xi  = np.ones_like(params) * self.A
        self.xi_acc  = np.ones_like(params) * self.A
        self.updates = np.zeros_like(params)
        self.count = 1
        self.callback_every = callback_every
        self.callbacks = callbacks
        return self.updates
    dim = len(params['weights'])
    cov = alpha * np.eye(dim)
    log_alpha = -np.log(np.sqrt(2 * np.pi)) - np.log(
        np.sqrt(alpha)) - 0.5 * (params['bias']**2) / (alpha)
    log_prior = -np.log(np.sqrt(2 * np.pi)) - 0.5 * np.log(np.linalg.det(
        cov)) - 0.5 * np.dot(np.dot(params['weights'].T, (np.linalg.inv(cov))),
                             (params['weights']))
    log_likelihood = y_train * np.log(y_pred) + (1 - y_train) * np.log(1 -
                                                                       y_pred)
    return np.sum(log_likelihood) + log_prior + log_alpha


# Build a function that returns gradients of training loss using autograd.

init_params = {'weights': np.array(np.ones(x_train.shape[1])), 'bias': 1}
flattened_obj, unflatten, flattened_init_params = flatten_func(
    training_loss, init_params)
# Check the gradients numerically, just to be safe.

training_gradient_fun = grad(flattened_obj)

n_iter = 10000
warmup = 1000
delta = 0.01
path_length = 1.0
n_steps = int(path_length / delta)

import hamiltonian1 as hmc1
import hamiltonian2 as hmc2

print 'Descriptors: ' + str(x_n)
print 'Params: n_iter: ' + str(n_iter) + ', warmup: ' + str(
Esempio n. 18
0
def harmonic_synthesis(
        source_features,
        target_features,
        basis_size=8,
        gain_penalty=10.0,
        rate_penalty=0.0,
        rms_weight=1.0,
        dissonance_weight=1e-8,  #dissonance is large
        debug=False,
        max_iters=100,
        **kwargs):
    """
    Reconstruct audio from descriptors based on approximate matching
    """
    if debug:
        from librosa.display import specshow
        import matplotlib.pyplot as plt

    n_fft = source_features['metadata']['n_fft']
    hop_length = source_features['metadata']['hop_length']
    sr = source_features['metadata']['sr']

    gain = np.ones((1, basis_size)) / basis_size
    rate = np.ones((1, basis_size))
    source_length = source_features['peak_f'].shape[1]

    target_peak_f = target_features['peak_f']
    target_peak_power = target_features['peak_power']

    start_frame = np.random.randint(source_length, size=basis_size)

    source_peak_power = source_features['peak_power'][:, start_frame]
    source_peak_f = source_features['peak_f'][:, start_frame]

    source_power = source_features['rms'][:, start_frame]
    target_power = target_features['rms']

    def reconstruct_peaks(gain, rate):
        reconstruction_peak_power = np.abs(source_peak_power * gain *
                                           rate).ravel()
        reconstruction_peak_f = np.abs(source_peak_f * rate).ravel()
        return reconstruction_peak_power, reconstruction_peak_f

    def reconstruct_power(gain, rate):
        return np.abs(gain * rate * source_power).sum()

    def dissonance_loss(gain, rate):
        reconstruct_peak_f, reconstruct_peak_power = reconstruct_peaks(
            gain, rate)
        return v_x_dissonance_sethares(reconstruct_peak_f, target_peak_f,
                                       reconstruct_peak_power,
                                       target_peak_power)

    def power_loss(gain, rate):
        return np.abs(reconstruct_power(gain, rate) - target_power)**2

    def reconstruct_loss(gain, rate, rms_weight, dissonance_weight):
        diss_loss = dissonance_loss(gain, rate)
        pow_loss = power_loss(gain, rate)
        total_loss = dissonance_weight * diss_loss + rms_weight * pow_loss
        if debug:
            print('gain', gain, 'rate', rate)
            print('loss', total_loss, 'diss loss', diss_loss, 'power loss',
                  pow_loss)

        return total_loss

    def reconstruct_penalty(gain, rate, gain_penalty, rate_penalty):
        return gain_penalty * np.abs(gain).mean() + rate_penalty * np.abs(
            np.log2(rate)).mean()

    def objective(gain, rate, rms_weight, dissonance_weight, gain_penalty,
                  rate_penalty):
        return reconstruct_loss(gain, rate, rms_weight,
                                dissonance_weight) + reconstruct_penalty(
                                    gain, rate, gain_penalty, rate_penalty)

    def local_objective(params):
        gain, rate = params
        return objective(gain, rate, rms_weight, dissonance_weight,
                         gain_penalty, rate_penalty)

    result = local_objective([gain, rate])
    fun, unflatten, flat_params = flatten_func(local_objective, [gain, rate])
    jac = grad(fun)

    result = minimize(
        fun,
        flat_params,
        method='L-BFGS-B',
        jac=jac,
        bounds=[[0, None]] * (basis_size * 2),
        # callback=callback_fun,
        options=dict(maxiter=max_iters, disp=True, gtol=1e-3))

    gain, rate = unflatten(result.x)

    return dict(
        start_frame=start_frame.ravel(),
        start_time=librosa.core.frames_to_time(start_frame, sr, hop_length,
                                               n_fft).ravel(),
        start_sample=librosa.core.frames_to_samples(start_frame, hop_length,
                                                    n_fft).ravel(),
        gain=np.sqrt(gain).ravel(),
        rate=rate.ravel(),
    )
Esempio n. 19
0
            print '{:15}|{:20}|{:20}|'.format(e, te, ve)
        if i % 10 == 0:
            print('[%03d][%03d/%03d]') % (e, i % opt['num_batches'],
                                          opt['num_batches'])
        gc.collect()

    p = sgd(objective_grad,
            p,
            step_size=lr,
            num_iters=opt['num_batches'],
            callback=stats)
print '[opt] ', time.time() - s
params = p

print '[flat params] ...'
flat_f, unflatten, flat_params = flatten_func(objective, params)

print '[flat hess] ...'
flat_hess = hessian(flat_f)

h = None
print '[compute hess] ...'
for i in np.random.permutation(np.arange(
        opt['num_batches']))[:opt['hessian_num_batches']]:
    if h is None:
        h = flat_hess(flat_params, i)
    else:
        np.add(h, flat_hess(flat_params, i), h)
    print '[progress] ', i, ' dt: ', time.time() - s
    gc.collect()
h = h.squeeze() / float(opt['hessian_num_batches'] * opt['batch_size'])
Esempio n. 20
0
    for log_proportion, mean, cov_sqrt in zip(*unpack_gmm_params(params)):
        alpha = np.minimum(1.0, np.exp(log_proportion) * 10)
        plot_ellipse(ax, mean, cov_sqrt, alpha)

if __name__ == '__main__':

    init_params = init_gmm_params(num_components=10, D=2, scale=0.1)

    data = make_pinwheel(radial_std=0.3, tangential_std=0.05, num_classes=3,
                         num_per_class=100, rate=0.4)

    def objective(params):
        return -gmm_log_likelihood(params, data)

    flattened_obj, unflatten, flattened_init_params =\
        flatten_func(objective, init_params)

    fig = plt.figure(figsize=(12,8), facecolor='white')
    ax = fig.add_subplot(111, frameon=False)
    plt.show(block=False)

    def callback(flattened_params):
        params = unflatten(flattened_params)
        print("Log likelihood {}".format(-objective(params)))
        ax.cla()
        ax.plot(data[:, 0], data[:, 1], 'k.')
        ax.set_xticks([])
        ax.set_yticks([])
        plot_gaussian_mixture(params, ax)
        plt.draw()
        plt.pause(1.0/60.0)