Ejemplo n.º 1
0
def Pre_zero(ZERO_DET = ZERO_DET, size = (2,8192), fs = 8192, fmin = 20, fmax = 4000):
    (D, *N) = size  # N is a list
    low_f_max = fmin
    high_f_min = fmax
    # Interpolation
    freqs = fftfreq(N[-1], 1./fs)
    asd_zero = np.interp(freqs[(freqs>=ZERO_DET[:,0].min())&(freqs<=high_f_min)], ZERO_DET[:,0], ZERO_DET[:,1]) 

    shiftsize = int(low_f_max - ZERO_DET[:,0].min())
    xf = fftfreq(N[-1], 1./fs)
    xf_noise = xf[xf>=0]
    slc, slc_, slc__ = (xf_noise >= low_f_max)&(xf_noise<=high_f_min), (xf_noise < low_f_max), (xf_noise > high_f_min)

    if ctx == mx.gpu():
        asd_zero = nd.array(asd_zero, ctx = ctx, dtype='float64')
        asd_pos = nd.square(asd_zero)[shiftsize * N[-1]//8192:]
        asd_neg = nd.square(asd_zero)[shiftsize * N[-1]//8192:][::-1]    
    elif ctx == mx.cpu():
        asd_pos = np.square(asd_zero)[shiftsize:]
        asd_neg = np.square(asd_zero)[shiftsize:][::-1]            
    else:
        raise

    assert slc_.argmin() == slc.argmax()
    low_f = slc_.argmin()
    high_f = slc[slc.argmax():].argmin()+slc.argmax()
    high_f_ = N[-1]//2 - slc__.argmax()
    assert asd_pos.shape[0] == high_f - low_f
#    print(asd_neg)
    return (asd_pos, asd_neg, low_f, high_f, high_f_, size, fs, fmin, fmax)
    def update(self, index, weight, grad, state):
        assert (isinstance(weight, NDArray))
        assert (isinstance(grad, NDArray))
        self._update_count(index)
        lr = self._get_lr(index)
        wd = self._get_wd(index)

        is_sparse = grad.stype == 'row_sparse'
        history = state[0]
        cache_history = state[1]

        # if self._full_sync:
        #     print("full sync")
        # else:
        #     print("local sgd")

        if is_sparse:
            kwargs = {
                'epsilon': self.float_stable_eps,
                'rescale_grad': self.rescale_grad
            }
            if self.clip_gradient:
                kwargs['clip_gradient'] = self.clip_gradient
            if self._full_sync:
                sparse.adaalter_update(weight,
                                       grad,
                                       history,
                                       out=weight,
                                       lr=lr,
                                       wd=wd,
                                       **kwargs)
            else:
                sparse.local_adaalter_update(weight,
                                             grad,
                                             history,
                                             cache_history,
                                             out=weight,
                                             lr=lr,
                                             wd=wd,
                                             **kwargs)
            # raise NotImplementedError('AdaAlter has not been implemented for sparse nd')
        else:
            grad[:] = grad * self.rescale_grad
            if self.clip_gradient is not None:
                grad[:] = clip(grad, -self.clip_gradient, self.clip_gradient)
            div = grad / sqrt(history + self.float_stable_eps)
            weight[:] += (div + weight * wd) * -lr

            if self._full_sync:
                history[:] += square(grad)
            else:
                cache_history[:] += square(grad)
Ejemplo n.º 3
0
def step_HMC(exe, exe_params, exe_grads, label_key, noise_precision, prior_precision, L=10,
             eps=1E-6):
    init_params = {k: v.copyto(v.context) for k, v in exe_params.items()}
    end_params = {k: v.copyto(v.context) for k, v in exe_params.items()}
    init_momentums = {k: mx.random.normal(0, 1, v.shape) for k, v in init_params.items()}
    end_momentums = {k: v.copyto(v.context) for k, v in init_momentums.items()}
    init_potential = calc_potential(exe, init_params, label_key, noise_precision, prior_precision)

    # 0. Calculate Initial Energy and Kinetic
    init_kinetic = sum([nd.sum(nd.square(momentum)) / 2.0
                        for momentum in init_momentums.values()]).asscalar()
    # 1. Make a half step for momentum at the beginning
    exe.copy_params_from(end_params)
    exe.forward(is_train=True)
    exe.backward()
    for k, v in exe_grads.items():
        v.wait_to_read()
    for k, momentum in end_momentums.items():
        momentum[:] = momentum - (eps / 2) * exe_grads[k]
    # 2. Alternate full steps for position and momentum
    for i in range(L):
        # 2.1 Full step for position
        for k, param in exe_params.items():
            param[:] = param + eps * end_momentums[k]
        # 2.2 Full step for the momentum, except at the end of trajectory we perform a half step
        exe.forward(is_train=True)
        exe.backward()
        for v in exe_grads.values():
            v.wait_to_read()
        if i != L - 1:
            for k, momentum in end_momentums.items():
                momentum[:] = momentum - eps * exe_grads[k]
        else:
            for k, momentum in end_momentums.items():
                # We should reverse the sign of the momentum at the end
                momentum[:] = -(momentum - eps / 2.0 * exe_grads[k])
    copy_param(exe, end_params)
    # 3. Calculate acceptance ratio and accept/reject the move
    end_potential = calc_potential(exe, end_params, label_key, noise_precision, prior_precision)
    end_kinetic = sum([nd.sum(nd.square(momentum)) / 2.0
                       for momentum in end_momentums.values()]).asscalar()
    # print init_potential, init_kinetic, end_potential, end_kinetic
    r = numpy.random.rand(1)
    if r < numpy.exp(-(end_potential + end_kinetic) + (init_potential + init_kinetic)):
        exe.copy_params_from(end_params)
        return end_params, 1
    else:
        exe.copy_params_from(init_params)
        return init_params, 0
Ejemplo n.º 4
0
def step_HMC(exe, exe_params, exe_grads, label_key, noise_precision, prior_precision, L=10,
             eps=1E-6):
    init_params = {k: v.copyto(v.context) for k, v in exe_params.items()}
    end_params = {k: v.copyto(v.context) for k, v in exe_params.items()}
    init_momentums = {k: mx.random.normal(0, 1, v.shape) for k, v in init_params.items()}
    end_momentums = {k: v.copyto(v.context) for k, v in init_momentums.items()}
    init_potential = calc_potential(exe, init_params, label_key, noise_precision, prior_precision)

    # 0. Calculate Initial Energy and Kinetic
    init_kinetic = sum([nd.sum(nd.square(momentum)) / 2.0
                        for momentum in init_momentums.values()]).asscalar()
    # 1. Make a half step for momentum at the beginning
    exe.copy_params_from(end_params)
    exe.forward(is_train=True)
    exe.backward()
    for k, v in exe_grads.items():
        v.wait_to_read()
    for k, momentum in end_momentums.items():
        momentum[:] = momentum - (eps / 2) * exe_grads[k]
    # 2. Alternate full steps for position and momentum
    for i in range(L):
        # 2.1 Full step for position
        for k, param in exe_params.items():
            param[:] = param + eps * end_momentums[k]
        # 2.2 Full step for the momentum, except at the end of trajectory we perform a half step
        exe.forward(is_train=True)
        exe.backward()
        for v in exe_grads.values():
            v.wait_to_read()
        if i != L - 1:
            for k, momentum in end_momentums.items():
                momentum[:] = momentum - eps * exe_grads[k]
        else:
            for k, momentum in end_momentums.items():
                # We should reverse the sign of the momentum at the end
                momentum[:] = -(momentum - eps / 2.0 * exe_grads[k])
    copy_param(exe, end_params)
    # 3. Calculate acceptance ratio and accept/reject the move
    end_potential = calc_potential(exe, end_params, label_key, noise_precision, prior_precision)
    end_kinetic = sum([nd.sum(nd.square(momentum)) / 2.0
                       for momentum in end_momentums.values()]).asscalar()
    # print init_potential, init_kinetic, end_potential, end_kinetic
    r = numpy.random.rand(1)
    if r < numpy.exp(-(end_potential + end_kinetic) + (init_potential + init_kinetic)):
        exe.copy_params_from(end_params)
        return end_params, 1
    else:
        exe.copy_params_from(init_params)
        return init_params, 0
    def inference_g(self, observed_arr):
        '''
        Inference with generator.

        Args:
            observed_arr:       `mxnet.ndarray` of observed data points.
        
        Returns:
            Tuple data.
            - re-parametric data.
            - encoded data points.
            - re-encoded data points.
        '''
        encoded_arr = self.model.encoder(observed_arr)
        decoded_arr = self.model.decoder(encoded_arr)
        re_encoded_arr = self.re_encoder_model(decoded_arr)

        anomaly_arr = nd.square(encoded_arr - re_encoded_arr)
        anomaly_arr = nd.expand_dims(nd.exp(anomaly_arr.mean(axis=1)), axis=1)
        mean_arr = nd.expand_dims(decoded_arr.mean(axis=1), axis=1)
        gauss_arr = nd.random.normal_like(data=observed_arr, loc=0, scale=3.0)

        re_param_arr = mean_arr + (gauss_arr * anomaly_arr)

        kl_arr = -0.5 * (1 + nd.log(anomaly_arr) - mean_arr + anomaly_arr)
        re_param_arr = re_param_arr + kl_arr

        return re_param_arr, encoded_arr, re_encoded_arr
Ejemplo n.º 6
0
    def train(self, inputs, action, sampled_q):
        inputs = copy.deepcopy(inputs)
        action = copy.deepcopy(action)
        sampled_q = copy.deepcopy(sampled_q)

        inputs = nd.array(inputs, ctx=CTX)
        action = nd.array(action, ctx=CTX)
        sampled_q = nd.array(sampled_q, ctx=CTX)
        sampled_q = sampled_q.reshape(shape=(sampled_q.shape[0],))

        with mx.autograd.record():
            loss_vec = []
            outputs = self.qnet(inputs, loss_vec)
            loss = 0.
            for element in loss_vec:
                loss = loss + element
            # print 'loss_dropout:', loss
            td_error = nd.sum(data=outputs * action, axis=1) - sampled_q
            for i in range(self.minibatch_size):
                if nd.abs(td_error[i]) < 1.0:
                    loss = loss + 0.5 * nd.square(td_error[i])
                else:
                    loss = loss + nd.abs(td_error[i]) - 0.5
            # print loss
        loss.backward()
        self.trainer.step(batch_size=self.minibatch_size, ignore_stale_grad=True)
Ejemplo n.º 7
0
 def goodness_of_function_optimizer_function(self):
     for param, sqr in zip(self.__params, self.__sqrs):
         g = param.grad / self.__batch_size
         # 注意 这里不是 +=
         sqr[:] = self.__gamma * sqr + (1. - self.__gamma) * nd.square(g)
         div = self.__learning_rate * g / nd.sqrt(sqr + self.__eps_stable)
         param[:] -= div
Ejemplo n.º 8
0
def adagrad(params, sqrs, lr, batch_size):
    eps_stable = 1e-7
    for param, sqr in zip(params, sqrs):
        g = param.grad / batch_size
        sqr[:] = sqr + nd.square(g)
        div = lr * g / (nd.sqrt(eps_stable + sqr))
        param[:] -= div
Ejemplo n.º 9
0
 def forward(self, feat):
     square_sum = nd.sum(nd.square(feat), axis=self.axis, keepdims=True)
     inv_norm = nd.rsqrt(nd.maximum(square_sum, self.epsilon))
     l2_res = nd.multiply(feat, inv_norm)
     # print(l2_res.shape)
     return nd.multiply(l2_res.transpose([0, 2, 3, 1]),
                        self.scale.data()).transpose([0, 3, 1, 2])
Ejemplo n.º 10
0
def rmsprop(params, sqrs, lr, gamma, batch_size):
    eps_stable = 1e-8
    for param, sqr in zip(params, sqrs):
        g = param.grad / batch_size
        sqr[:] = gamma * sqr + (1. - gamma) * nd.square(g)
        div = lr * g / nd.sqrt(sqr + eps_stable)
        param[:] -= div
Ejemplo n.º 11
0
def adagrad(params, sqrs, lr, batch_size):
    eps_stable = 1e-7
    for param, sqr in zip(params, sqrs):
        g = param.grad / batch_size
        sqr[:] += nd.square(g)
        div = lr * g / nd.sqrt(sqr + eps_stable)
        param[:] -= div
    def observe_reward_value(
        self, 
        state_arr, 
        action_arr,
        meta_data_arr=None,
    ):
        '''
        Compute the reward value.
        
        Args:
            state_arr:              Tensor of state.
            action_arr:             Tensor of action.
            meta_data_arr:          Meta data of actions.

        Returns:
            Reward value.
        '''
        if state_arr is not None:
            mse_arr = nd.mean(
                nd.square(
                    nd.flatten(state_arr),
                    nd.flatten(action_arr)
                ),
                axis=0, 
                exclude=True
            )
            reward_value_arr = 1 / mse_arr
            reward_value_arr = nd.expand_dims(reward_value_arr, axis=1)
        else:
            reward_value_arr = nd.zeros((
                action_arr.shape[0],
                1
            ), ctx=action_arr.context)

        return reward_value_arr
def loss_fe_fn(data, label):
    y_data_list = loss_fe_forward(data)
    y_label_list = loss_fe_forward(label)
    loss = nd.zeros(shape=data.shape[0], ctx=data.context, dtype=data.dtype)
    for i in range(len(y_data_list)):
        loss = loss + nd.sum(nd.square(y_data_list[i] - y_label_list[i]),
                             axis=[1, 2, 3])
    return loss
Ejemplo n.º 14
0
def adadelta(params, sqrs, deltas, roh, batch_size):
    eps_stable = 1e-5
    for param, sqr, delta in zip(params, sqrs, deltas):
        g = param.grad / batch_size
        sqr[:] = roh * sqr + (1. - roh) * nd.square(g)
        g_next = nd.sqrt(delta + eps_stable) / nd.sqrt(sqr + eps_stable) * g
        delta[:] = roh * delta + (1. - roh) * g_next * g_next
        param[:] -= g_next
Ejemplo n.º 15
0
    def hybrid_forward(self, F, pred, label, sample_weight=None):

        #label = _reshape_like(F, label, pred)
        #loss = F.square(pred-label)
        #loss = _apply_weighting(F, loss, self._weight/2, sample_weight)
        loss = F.sqrt(F.square(pred - label))
        #return F.mean(loss, axis=self._batch_axis, exclude=True)
        return loss
Ejemplo n.º 16
0
def adadelta(params, sqrs, deltas, batch_size, rho):
    eps_stable = 1e-5
    for param, sqr, delta in zip(params, sqrs, deltas):
        g = param.grad / batch_size
        sqr[:] = rho * sqr + (1. - rho) * nd.square(g)
        cur_delta = nd.sqrt(delta + eps_stable) / nd.sqrt(sqr + eps_stable) * g
        delta[:] = rho * delta + (1. - rho) * cur_delta * cur_delta
        param[:] -= cur_delta
Ejemplo n.º 17
0
def regression_student_grad(student_outputs, teacher_pred, teacher_noise_precision):
    student_mean = student_outputs[0]
    student_var = student_outputs[1]
    grad_mean = nd.exp(-student_var) * (student_mean - teacher_pred)

    grad_var = (1 - nd.exp(-student_var) * (nd.square(student_mean - teacher_pred)
                                            + 1.0 / teacher_noise_precision)) / 2
    return [grad_mean, grad_var]
Ejemplo n.º 18
0
 def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
     mean = in_data[0]
     var = in_data[1]
     if self.implicit_backward:
         action = out_data[0]
     else:
         action = in_data[3]
     score = in_data[2]
     grad_mu = in_grad[0]
     grad_var = in_grad[1]
     self.assign(
         grad_mu, req[0], -(action - mean) * score.reshape(
             (score.shape[0], 1)) * self.grad_scale / var)
     self.assign(
         grad_var, req[1],
         self.grad_scale *
         ((-nd.square(action - mean) / (2.0 * nd.square(var)) + 1.0 /
           (2.0 * var)) * score.reshape((score.shape[0], 1)) -
          numpy.float32(self.entropy_regularization) / (2.0 * var)))
Ejemplo n.º 19
0
 def goodness_of_function_optimizer_function(self):
     for param, v, sqr in zip(self.__params, self.__vs, self.__sqrs):
         g = param.grad / self.__batch_size
         v[:] = self.__beta1 * v + (1 - self.__beta1) * g
         sqr[:] = self.__beta2 * sqr + (1 - self.__beta2) * nd.square(g)
         v_hat = v / (1 - self.__beta1**self.__t)
         sqr_hat = sqr / (1 - self.__beta2**self.__t)
         div = self.__learning_rate * v_hat / nd.sqrt(sqr_hat +
                                                      self.__eps_stable)
         param[:] -= div
Ejemplo n.º 20
0
def regression_student_grad(student_outputs, teacher_pred,
                            teacher_noise_precision):
    student_mean = student_outputs[0]
    student_var = student_outputs[1]
    grad_mean = nd.exp(-student_var) * (student_mean - teacher_pred)

    grad_var = (1 - nd.exp(-student_var) *
                (nd.square(student_mean - teacher_pred) +
                 1.0 / teacher_noise_precision)) / 2
    return [grad_mean, grad_var]
Ejemplo n.º 21
0
    def hybrid_forward(self,
                       F,
                       input_logits,
                       target_logits,
                       sample_weight=None):
        input_softmax = F.softmax(input_logits, axis=1)
        target_softmax = F.softmax(target_logits, axis=1)

        loss = F.square(input_softmax - target_softmax)

        return F.mean(loss, axis=self._batch_axis, exclude=True)
Ejemplo n.º 22
0
def nd_global_norm(t_list):
    """Computes the global norm of multiple tensors.

    Given a tuple or list of tensors t_list, this operation returns the global norm of the elements
     in all tensors in t_list. The global norm is computed as:

    ``global_norm = sqrt(sum([l2norm(t)**2 for t in t_list]))``

    Any entries in t_list that are of type None are ignored.

    Parameters
    ----------
    t_list: list or tuple
        The NDArray list

    Returns
    -------
    ret: NDArray
        The global norm. The shape of the NDArray will be (1,)

    Examples
    --------
    >>> x = mx.nd.ones((2, 3))
    >>> y = mx.nd.ones((5, 6))
    >>> z = mx.nd.ones((4, 2, 3))
    >>> print(nd_global_norm([x, y, z]).asscalar())
    7.74597
    >>> xnone = None
    >>> ret = nd_global_norm([x, y, z, xnone])
    >>> print(ret.asscalar())
    7.74597
    """
    ret = None
    for arr in t_list:
        if arr is not None:
            if ret is None:
                ret = nd.square(nd.norm(arr))
            else:
                ret += nd.square(nd.norm(arr))
    ret = nd.sqrt(ret)
    return ret
Ejemplo n.º 23
0
    def train(self, s_batch, a_batch_one_hot, V_trace, advantage):
        batch_size = s_batch.shape[0]
        action_indx = np.argmax(a_batch_one_hot,axis=1).tolist()
        action_stats = [action_indx.count(action_indx[i]) for i in range(batch_size)]
        action_bp_rate = (1 - np.array(action_stats)/float(batch_size))**2

        s_batch = copy.deepcopy(s_batch)
        a_batch_one_hot = copy.deepcopy(a_batch_one_hot)
        V_trace_batch = copy.deepcopy(V_trace)
        advantage_batch = copy.deepcopy(advantage)

        s_batch = nd.array(s_batch, ctx=CTX)
        a_batch_one_hot = nd.array(a_batch_one_hot, ctx=CTX)
        V_trace_batch = nd.array(V_trace_batch, ctx=CTX)
        advantage_batch = nd.array(advantage_batch, ctx=CTX)
        action_bp_rate = nd.softmax(nd.array(action_bp_rate, ctx=CTX))

        self.actorcritic.collect_params().zero_grad()
        self.reset_noise()
        with mx.autograd.record():
            loss_vec = []
            probs, values, top_decisions = self.actorcritic.forward(s_batch, loss_vec)
            loss = 0.
            for element in loss_vec:
                loss = loss + element
            # print 'loss_dropout:', loss
            logprob = nd.log(nd.sum(data=probs * a_batch_one_hot, axis=1)+1e-5)
            entropy = -nd.sum(nd.sum(data=probs*nd.log(probs+1e-5), axis=1), axis=0)
            top_decision_entropy = -nd.sum(nd.sum(data=top_decisions*nd.log(top_decisions+1e-5), axis=1), axis=0)
            entropy_loss = - entropy
            top_decision_entropy_loss = - top_decision_entropy
            actorloss = -nd.sum(action_bp_rate*(logprob*advantage_batch), axis=0) 
            criticloss = nd.sum(action_bp_rate*nd.square(values-V_trace_batch), axis=0)
            # actorloss = -nd.sum(logprob*advantage_batch, axis=0) 
            # criticloss = nd.sum(nd.square(values-V_trace_batch), axis=0)
            loss = actorloss + 0.3*criticloss + 0.001*entropy_loss
            
            # loss = actorloss + 0.3*criticloss + 0.0001*top_decision_entropy_loss
        loss.backward()

        # CTname = threading.currentThread().getName()

        # print(CTname + ' actorloss : '+str(actorloss))
        # print(CTname + ' criticloss : '+str(criticloss))
        # print(CTname + ' entropy_loss : '+str(entropy_loss))

        grads_list = []
        for name, value in self.actorcritic.collect_params().items():
            if name.find('batchnorm') < 0:
                # grads_list.append(mx.nd.array(value.grad().asnumpy()))
                grads_list.append(value.grad())

        return grads_list, batch_size
Ejemplo n.º 24
0
def adam(params, vs, sqrs, batch_size, lr, t):
    eps_stable = 1e-5
    beta1 = 0.9
    beta2 = 0.999
    for param, v, sqr in zip(params, vs, sqrs):
        g = param.grad / batch_size
        v[:] = beta1 * v + (1 - beta1) * g
        sqr[:] = beta2 * sqr + (1 - beta2) * nd.square(g)
        v_bias_corr = v / (1 - beta1**t)
        sqr_bias_corr = sqr / (1 - beta2**t)
        div = lr * v_bias_corr / nd.sqrt(sqr_bias_corr + eps_stable)
        param[:] -= div
Ejemplo n.º 25
0
    def update(self, index, weight, grad, state):
        assert (isinstance(weight, NDArray))
        assert (isinstance(grad, NDArray))
        self._update_count(index)
        lr = self._get_lr(index)
        wd = self._get_wd(index)
        t = self._index_update_count[index]

        with bulk(self._bulk):
            # preprocess grad
            grad *= self.rescale_grad
            if self.clip_gradient is not None:
                grad = clip(grad, -self.clip_gradient, self.clip_gradient)

            mean, var = state
            mean *= self.beta1
            mean += (1. - self.beta1) * grad
            var *= self.beta2
            var += (1. - self.beta2) * square(grad)

            r1 = weight.norm()
            if not self.bias_correction:
                r1 = minimum(maximum(r1, self.lower_bound), self.upper_bound)
                sqrt_var = sqrt(var)
                sqrt_var += self.epsilon
                g = mean / sqrt_var
                g += wd * weight
            else:
                # apply bias correction
                mean_hat = mean / (1. - power(self.beta1, t))
                var_hat = var / (1. - power(self.beta2, t))
                if self._eps_after_sqrt:
                    sqrt(var_hat, out=var_hat)
                    var_hat += self.epsilon
                else:
                    var_hat += self.epsilon
                    sqrt(var_hat, out=var_hat)
                mean_hat /= var_hat
                mean_hat += wd * weight
                g = mean_hat

            r2 = g.norm()

            # calculate lamb_trust_ratio
            ratio = r1 / r2
            # becomes NaN if ratio == NaN or 0, otherwise 0
            nan_or_zero = 1 - ratio / ratio
            r = where(nan_or_zero, ones_like(ratio), ratio)
            lr *= r

            # update weight
            g *= lr
            weight[:] -= g
Ejemplo n.º 26
0
def regression_student_grad(student_outputs, teacher_pred, teacher_noise_precision):
    student_mean = student_outputs[0]
    student_var = student_outputs[1]
    grad_mean = nd.exp(-student_var) * (student_mean - teacher_pred)

    grad_var = (1 - nd.exp(-student_var) * (nd.square(student_mean - teacher_pred)
                                            + 1.0 / teacher_noise_precision)) / 2
    # print student_mean
    # print teacher_pred
    # print grad_mean.asnumpy(), grad_var.asnumpy()
    # ch = raw_input()
    return [grad_mean, grad_var]
Ejemplo n.º 27
0
def adam(params, lr, vals, sqrs, iter, batch_size, beta1=0.9, beta2=0.999):
    eps_stable = 1e-8
    for param, val, sqr in zip(params, vals, sqrs):
        g = param.grad / batch_size
        val[:] = beta1 * val + (1 - beta1) * g
        sqr[:] = beta2 * sqr + (1 - beta2) * nd.square(g)
        #val_next = val / (1 - nd.power(beta1, iter))
        val_next = val / (1. - beta1**iter)
        #sqr_next = sqr / (1. - nd.power(beta2, iter))
        sqr_next = sqr / (1. - beta2**iter)
        g_next = lr * val_next / (nd.sqrt(sqr_next) + eps_stable)
        param[:] -= g_next
Ejemplo n.º 28
0
    def BN(X,gamma,beta,momentum=0.9,eps=1e-5,scope_name="",is_training=True):

        if len(X.shape)==2 :
            mean = nd.mean(X,axis=0)
            variance = nd.mean(nd.square(X-mean),axis=0)

            if is_training:
                Normalized_X=(X-mean)/nd.sqrt(variance+eps)
            elif is_training==False and not os.path.exists(path1) and epoch==0: #not param
                Normalized_X = (X - mean) / nd.sqrt(variance + eps)
            else:
                Normalized_X=(X-MOVING_MEANS[scope_name] / nd.sqrt(MOVING_VARS[scope_name]+eps))

            out=gamma*Normalized_X+beta

        #pay attention that when it comes to (2D) CNN , We normalize batch_size * height * width over each channel, so that gamma and beta have the lengths the same as channel_count ,
        #referenced by http://gluon.mxnet.io/chapter04_convolutional-neural-networks/cnn-batch-norm-scratch.html
        elif len(X.shape)==4:
            N , C , H , W = X.shape

            mean = nd.mean(X , axis=(0,2,3)) #normalize batch_size * height * width over each channel
            variance = nd.mean(nd.square(X-mean.reshape((1,C,1,1))),axis=(0,2,3))

            if is_training:
                Normalized_X = (X-mean.reshape((1,C,1,1)))/nd.sqrt(variance.reshape((1,C,1,1))+eps)
            elif is_training == False and not os.path.exists(path1) and epoch==0:  # load param , when epoch=0
                Normalized_X = (X-mean.reshape((1,C,1,1)))/nd.sqrt(variance.reshape((1,C,1,1))+eps)
            else:
                Normalized_X = (X - MOVING_MEANS[scope_name].reshape((1, C, 1, 1))) / nd.sqrt(MOVING_VARS[scope_name].reshape((1, C, 1, 1)) + eps)

            out=gamma.reshape((1,C,1,1))*Normalized_X+beta.reshape((1,C,1,1))

        if scope_name not in MOVING_MEANS and scope_name not in MOVING_VARS:
            MOVING_MEANS[scope_name] = mean
            MOVING_VARS[scope_name] = variance
        else:
            MOVING_MEANS[scope_name] = MOVING_MEANS[scope_name] * momentum + mean * (1.0 - momentum)
            MOVING_VARS[scope_name] = MOVING_VARS[scope_name] * momentum + variance * (1.0 - momentum)

        return out
Ejemplo n.º 29
0
    def forward(self, is_train, req, in_data, out_data, aux):
        data_input = in_data[0]
        batch_size = data_input.shape[0]
        label_input = in_data[1]
        center_input = in_data[2]

        label_index = self.class_index[label_input]
        batch_center = center_input[label_index]
        batch_diff = data_input - batch_center

        loss = nd.sum(nd.square(batch_diff)) / batch_size / 2
        self.assign(out_data[0], req[0], loss)
        self.assign(out_data[1], req[0], batch_diff)
Ejemplo n.º 30
0
def tinyimagenet200_iterator(args, logger):
    assert (args.name == 'tinyimagenet200')
    os.environ['MXNET_CPU_WORKER_NTHREADS'] = '%d' % args.num_threads

    train_size = 100000
    classes = 200

    data_shape = (3, 64,
                  64) if len(args.data_shape) == 0 else tuple(args.data_shape)
    batch_size = args.batch_size

    # compute mean and std based on 10% of the training data
    stats_iter = mx.image.ImageIter(train_size / 10,
                                    data_shape,
                                    path_imgrec=args.train_rec,
                                    path_imgidx=args.train_idx,
                                    shuffle=True)
    sample = stats_iter.next().data[0].transpose(axes=(1, 0, 2, 3)).reshape(
        (3, -1))
    mean_rgb = sample.mean(axis=-1, keepdims=True)
    std_rgb = nd.sqrt(
        nd.mean(nd.square(sample - mean_rgb), axis=-1, keepdims=True))
    mean_rgb, std_rgb = mean_rgb.reshape((-1, )).asnumpy(), std_rgb.reshape(
        (-1, )).asnumpy()

    train_aug = get_train_aug(data_shape, mean_rgb, std_rgb, args)
    train_iter = MultiSequenceImageIter(args.train_rec,
                                        args.train_idx,
                                        data_shape=data_shape,
                                        batch_size=batch_size,
                                        shuffle=True,
                                        aug_list=train_aug,
                                        logger=logger)

    val_aug = mx.image.CreateAugmenter(data_shape=data_shape,
                                       mean=mean_rgb,
                                       std=std_rgb)
    # val_aug = mx.image.CreateAugmenter(data_shape=data_shape, mean=True, std=True)
    val_iter = mx.image.ImageIter(batch_size,
                                  data_shape,
                                  path_imgrec=args.test_rec,
                                  path_imgidx=args.test_idx,
                                  shuffle=False,
                                  aug_list=val_aug)
    # train_aug = get_train_aug(data_shape, args)
    # train_iter = MultiSequenceImageIter(args.train_rec, args.train_idx, data_shape, batch_size, shuffle=True, aug_list=train_aug, logger=logger)

    # val_aug = mx.image.CreateAugmenter(data_shape=data_shape, mean=True, std=True)
    # val_iter = MultiSequenceImageIter(args.test_rec, args.test_idx, data_shape, batch_size, shuffle=False, aug_list=val_aug, logger=logger)

    return train_iter, val_iter, classes, train_size
Ejemplo n.º 31
0
    def var(array,W=_W,B=None,square=0,sqrt=0,V=False,order='NCHW',sizz=0):
        arrs=array.shape
        ashp=W.shape
        xi=(-2,-1)
        x2=(-2,-1,-3)
        sb=(ashp[1],1,1)
        WV=ashp[-2:]
        print(sb)

        mnc=mnd.tile(mnd.reshape(mnd.array([WV[0]*WV[1]]), shape=(1,1,1)),ashp[1])
        print(mnc)

        if V:
            print(W.eval())
        print(arrs,ashp)
        mul=(mnd.broadcast_mul(array,W))
        if V:
            print('Wsamp',W[-1,-1])
            print('array*w',mul[0,-1])
        size=mnd.sum(W,axis=xi,keepdims=True)#shape=(outputs, channel)
        if V:
            print("sizesamp",size.shape,size)
        if B is None:
            B=mnd.zeros(W.shape[0:2],dtype=np.float32)#channel
        B=mnd.reshape(B,(*B.shape,*[1 for _ in range(len(ashp)-len(B.shape))]))
        if sizz==1:
            mean=mnd.sum(mul,axis=xi,keepdims=True)/size
        else:
            mean=mnd.sum(mul,axis=xi,keepdims=True)/mnc
        if V:
            print("meansamp",mean[0,-1])
        if square:
            i=mnd.square(mnd.broadcast_add(mnd.broadcast_minus(mul,mean),B))
        else:
            i=mnd.broadcast_add(mnd.broadcast_minus(mul,mean),B)
        di=i/size
        if V==2:
            print("i",i,"i")
            print("di",di,"di")
        if V:
            print('isamp',i.shape,i[-1,-1,])
        out=mnd.sum(mnd.broadcast_add(i,B),axis=x2)
        #out=np.rollaxis(np.sum(i+B,axis=x2),-1,1)
        #print(out.shape)
        if sqrt:
            out=mnd.sqrt(out)
        out=mnd.swapaxes(out, 3, 1)
        #print(out.shape,(arrs[0],ashp[0],arrs[1],arrs[2]))
        assert out.shape==(arrs[0],ashp[0],arrs[1],arrs[2])
        return(out)
Ejemplo n.º 32
0
    def forward(self):

        # 2-step
        diff = nd.subtract(nd.expand_dims(self.dataset,axis=0),nd.expand_dims(self.centroid,axis=1))
        sqr = nd.square(diff)
        distance = nd.sum(sqr,axis=2)
        clustering = nd.argmin(distance,axis=0)
        # 3-step
        '''
        Because mxnet's nd.where did not return the location. I wrote the np.where function.
        '''
        for j in range(self.centroid_numbers):
            self.centroid[j][:]=nd.mean(nd.take(self.dataset,nd.array(np.reshape(np.where(np.equal(clustering.asnumpy(), j)), (-1,)), ctx=self.ctx),axis=0),axis=0)
        return clustering , self.centroid
Ejemplo n.º 33
0
    def hybrid_forward(self, F, fts, ys, ftt, yt):
        """
        Semantic Alignment Loss
        :param F: Function
        :param yt: label for the target domain [N]
        :param ftt: features for the target domain [N, K]
        :param ys: label for the source domain [M]
        :param fts: features for the source domain [M, K]
        :return:
        """
        if self._fn:
            # Normalize ft
            fts = F.L2Normalization(fts, mode='instance')
            ftt = F.L2Normalization(ftt, mode='instance')

        fts_rpt = F.broadcast_to(fts.expand_dims(axis=0),
                                 shape=(self._bs_tgt, self._bs_src,
                                        self._embed_size))
        ftt_rpt = F.broadcast_to(ftt.expand_dims(axis=1),
                                 shape=(self._bs_tgt, self._bs_src,
                                        self._embed_size))

        dists = F.sum(F.square(ftt_rpt - fts_rpt), axis=2)

        yt_rpt = F.broadcast_to(yt.expand_dims(axis=1),
                                shape=(self._bs_tgt,
                                       self._bs_src)).astype('int32')
        ys_rpt = F.broadcast_to(ys.expand_dims(axis=0),
                                shape=(self._bs_tgt,
                                       self._bs_src)).astype('int32')

        y_same = F.equal(yt_rpt, ys_rpt).astype('float32')
        y_diff = F.not_equal(yt_rpt, ys_rpt).astype('float32')

        intra_cls_dists = dists * y_same
        inter_cls_dists = dists * y_diff

        max_dists = F.max(dists, axis=1, keepdims=True)
        max_dists = F.broadcast_to(max_dists,
                                   shape=(self._bs_tgt, self._bs_src))
        revised_inter_cls_dists = F.where(y_same, max_dists, inter_cls_dists)

        max_intra_cls_dist = F.max(intra_cls_dists, axis=1)
        min_inter_cls_dist = F.min(revised_inter_cls_dists, axis=1)

        loss = F.relu(max_intra_cls_dist - min_inter_cls_dist + self._margin)

        return loss
Ejemplo n.º 34
0
def student_loss(student_mean, student_var, teacher_pred, teacher_noise_precision):
    return (0.5 * (student_var + nd.exp(-student_var) * (nd.square(teacher_pred - student_mean)
                                                         + 1 / teacher_noise_precision))).asnumpy()[
        0]
Ejemplo n.º 35
0
def main():
    parser = argparse.ArgumentParser(description='Script to test the trained network on a game.')
    parser.add_argument('-r', '--rom', required=False, type=str,
                        default=os.path.join('roms', 'breakout.bin'),
                        help='Path of the ROM File.')
    parser.add_argument('-v', '--visualization', action='store_true',
                        help='Visualize the runs.')
    parser.add_argument('--lr', required=False, type=float, default=0.01,
                        help='Learning rate of the AdaGrad optimizer')
    parser.add_argument('--eps', required=False, type=float, default=0.01,
                        help='Eps of the AdaGrad optimizer')
    parser.add_argument('--clip-gradient', required=False, type=float, default=None,
                        help='Clip threshold of the AdaGrad optimizer')
    parser.add_argument('--double-q', action='store_true',
                        help='Use Double DQN only if specified')
    parser.add_argument('--wd', required=False, type=float, default=0.0,
                        help='Weight of the L2 Regularizer')
    parser.add_argument('-c', '--ctx', required=False, type=str, default='gpu',
                        help='Running Context. E.g `-c gpu` or `-c gpu1` or `-c cpu`')
    parser.add_argument('-d', '--dir-path', required=False, type=str, default='',
                        help='Saving directory of model files.')
    parser.add_argument('--start-eps', required=False, type=float, default=1.0,
                        help='Eps of the epsilon-greedy policy at the beginning')
    parser.add_argument('--replay-start-size', required=False, type=int, default=50000,
                        help='The step that the training starts')
    parser.add_argument('--kvstore-update-period', required=False, type=int, default=1,
                        help='The period that the worker updates the parameters from the sever')
    parser.add_argument('--kv-type', required=False, type=str, default=None,
                        help='type of kvstore, default will not use kvstore, could also be dist_async')
    parser.add_argument('--optimizer', required=False, type=str, default="adagrad",
                        help='type of optimizer')
    args = parser.parse_args()

    if args.dir_path == '':
        rom_name = os.path.splitext(os.path.basename(args.rom))[0]
        args.dir_path = 'dqn-%s-lr%g' % (rom_name, args.lr)
    replay_start_size = args.replay_start_size
    max_start_nullops = 30
    replay_memory_size = 1000000
    history_length = 4
    rows = 84
    cols = 84

    ctx = parse_ctx(args.ctx)
    q_ctx = mx.Context(*ctx[0])

    game = AtariGame(rom_path=args.rom, resize_mode='scale', replay_start_size=replay_start_size,
                     resized_rows=rows, resized_cols=cols, max_null_op=max_start_nullops,
                     replay_memory_size=replay_memory_size, display_screen=args.visualization,
                     history_length=history_length)

    ##RUN NATURE
    freeze_interval = 10000
    epoch_num = 200
    steps_per_epoch = 250000
    update_interval = 4
    discount = 0.99

    eps_start = args.start_eps
    eps_min = 0.1
    eps_decay = (eps_start - eps_min) / 1000000
    eps_curr = eps_start
    freeze_interval /= update_interval
    minibatch_size = 32
    action_num = len(game.action_set)

    data_shapes = {'data': (minibatch_size, history_length) + (rows, cols),
                   'dqn_action': (minibatch_size,), 'dqn_reward': (minibatch_size,)}
    dqn_sym = dqn_sym_nature(action_num)
    qnet = Base(data_shapes=data_shapes, sym_gen=dqn_sym, name='QNet',
                initializer=DQNInitializer(factor_type="in"),
                ctx=q_ctx)
    target_qnet = qnet.copy(name="TargetQNet", ctx=q_ctx)

    use_easgd = False
    optimizer = mx.optimizer.create(name=args.optimizer, learning_rate=args.lr, eps=args.eps,
                    clip_gradient=args.clip_gradient,
                    rescale_grad=1.0, wd=args.wd)
    updater = mx.optimizer.get_updater(optimizer)

    qnet.print_stat()
    target_qnet.print_stat()

    # Begin Playing Game
    training_steps = 0
    total_steps = 0
    for epoch in range(epoch_num):
        # Run Epoch
        steps_left = steps_per_epoch
        episode = 0
        epoch_reward = 0
        start = time.time()
        game.start()
        while steps_left > 0:
            # Running New Episode
            episode += 1
            episode_loss = 0.0
            episode_q_value = 0.0
            episode_update_step = 0
            episode_action_step = 0
            time_episode_start = time.time()
            game.begin_episode(steps_left)
            while not game.episode_terminate:
                # 1. We need to choose a new action based on the current game status
                if game.state_enabled and game.replay_memory.sample_enabled:
                    do_exploration = (npy_rng.rand() < eps_curr)
                    eps_curr = max(eps_curr - eps_decay, eps_min)
                    if do_exploration:
                        action = npy_rng.randint(action_num)
                    else:
                        # TODO Here we can in fact play multiple gaming instances simultaneously and make actions for each
                        # We can simply stack the current_state() of gaming instances and give prediction for all of them
                        # We need to wait after calling calc_score(.), which makes the program slow
                        # TODO Profiling the speed of this part!
                        current_state = game.current_state()
                        state = nd.array(current_state.reshape((1,) + current_state.shape),
                                         ctx=q_ctx) / float(255.0)
                        qval_npy = qnet.forward(is_train=False, data=state)[0].asnumpy()
                        action = numpy.argmax(qval_npy)
                        episode_q_value += qval_npy[0, action]
                        episode_action_step += 1
                else:
                    action = npy_rng.randint(action_num)

                # 2. Play the game for a single mega-step (Inside the game, the action may be repeated for several times)
                game.play(action)
                total_steps += 1

                # 3. Update our Q network if we can start sampling from the replay memory
                #    Also, we update every `update_interval`
                if total_steps % update_interval == 0 and game.replay_memory.sample_enabled:
                    # 3.1 Draw sample from the replay_memory
                    training_steps += 1
                    episode_update_step += 1
                    states, actions, rewards, next_states, terminate_flags \
                        = game.replay_memory.sample(batch_size=minibatch_size)
                    states = nd.array(states, ctx=q_ctx) / float(255.0)
                    next_states = nd.array(next_states, ctx=q_ctx) / float(255.0)
                    actions = nd.array(actions, ctx=q_ctx)
                    rewards = nd.array(rewards, ctx=q_ctx)
                    terminate_flags = nd.array(terminate_flags, ctx=q_ctx)

                    # 3.2 Use the target network to compute the scores and
                    #     get the corresponding target rewards
                    if not args.double_q:
                        target_qval = target_qnet.forward(is_train=False, data=next_states)[0]
                        target_rewards = rewards + nd.choose_element_0index(target_qval,
                                                                nd.argmax_channel(target_qval))\
                                           * (1.0 - terminate_flags) * discount
                    else:
                        target_qval = target_qnet.forward(is_train=False, data=next_states)[0]
                        qval = qnet.forward(is_train=False, data=next_states)[0]

                        target_rewards = rewards + nd.choose_element_0index(target_qval,
                                                                nd.argmax_channel(qval))\
                                           * (1.0 - terminate_flags) * discount
                    outputs = qnet.forward(is_train=True,
                                           data=states,
                                           dqn_action=actions,
                                           dqn_reward=target_rewards)
                    qnet.backward()
                    qnet.update(updater=updater)

                    # 3.3 Calculate Loss
                    diff = nd.abs(nd.choose_element_0index(outputs[0], actions) - target_rewards)
                    quadratic_part = nd.clip(diff, -1, 1)
                    loss = 0.5 * nd.sum(nd.square(quadratic_part)).asnumpy()[0] +\
                           nd.sum(diff - quadratic_part).asnumpy()[0]
                    episode_loss += loss

                    # 3.3 Update the target network every freeze_interval
                    if training_steps % freeze_interval == 0:
                        qnet.copy_params_to(target_qnet)
            steps_left -= game.episode_step
            time_episode_end = time.time()
            # Update the statistics
            epoch_reward += game.episode_reward
            info_str = "Epoch:%d, Episode:%d, Steps Left:%d/%d, Reward:%f, fps:%f, Exploration:%f" \
                        % (epoch, episode, steps_left, steps_per_epoch, game.episode_reward,
                           game.episode_step / (time_episode_end - time_episode_start), eps_curr)
            if episode_update_step > 0:
                info_str += ", Avg Loss:%f/%d" % (episode_loss / episode_update_step,
                                                  episode_update_step)
            if episode_action_step > 0:
                info_str += ", Avg Q Value:%f/%d" % (episode_q_value / episode_action_step,
                                                  episode_action_step)
            if episode % 100 == 0:
                logging.info(info_str)
        end = time.time()
        fps = steps_per_epoch / (end - start)
        qnet.save_params(dir_path=args.dir_path, epoch=epoch)
        logging.info("Epoch:%d, FPS:%f, Avg Reward: %f/%d"
                 % (epoch, fps, epoch_reward / float(episode), episode))
Ejemplo n.º 36
0
def student_grad(student_mean, student_var, teacher_pred, teacher_noise_precision):
    grad_mean = nd.exp(-student_var) * (student_mean - teacher_pred)

    grad_var = (1 - nd.exp(-student_var) * (nd.square(student_mean - teacher_pred)
                                                  + 1 / teacher_noise_precision))/2
    return [grad_mean, grad_var]