Esempio n. 1
0
def discount_reward(r):
    discount_r = np.zero_like(r)
    running_add = 0
    for t in reversed(range(r.size)):
        running_add = running_add * gamma + r[t]
        discount_r[t] = running_add
    return discount_r
Esempio n. 2
0
	def learn(self):
		self.policy.optimizer.zero_grad()
		G = np.zero_like(self.reward_memory, dtype = np.float64) ## torch demands the float64
		for t in range(len(self.reward_memory)):
		 	G_sum = 0
		 	discount = 1
		 	for k in range(t, len(self.reward_memory)):
		 		G_sum += self.reward_memory[k] * discount 
		 		discount *= self. gamma
		 	G[t] = G_sum

		mean = np.mean(G)
		std = np.std(G) if np.std(G) > 0 else 1
		G = (G-mean)/std

		G = T.Tensor(G, dtype = T.float).to(self.policy.device)

		loss = 0
		for g, log_prob in zip(G, self.action_memory):
			loss += -g * log_prob 

		loss.backward()
		self.policy.optimizer.step()
		self.action_memory = []
		self.reward_memory = []
    def update(self, params, grads):
        if self.h is None:
            self.h = {}
            for key, val in params.items():
                self.h[key] = np.zero_like(val)

        for key in params.keys():
            self.h[key] += grads[key] * grads[key]
            # 1e-7 -> self.h에 0이 담겨있다해도 0으로 나누어주는 사태를 막아준다.
            params[key] -= self.lr * grads[key] / (np.sqrt(self.h[key]) + 1e-7)
Esempio n. 4
0
    def _discount_and_norm_rewards(self):
        discounted_ep_rs = np.zero_like(self.ep_rs)
        running_add = 0
        for t in reversed(range(0, len(self.ep_rs))):
            running_add = running_add * self.gamma + self.ep_rs[t]
            discounted_ep_rs[t] = running_add

        discounted_ep_rs -= np.mean(discounted_ep_rs)
        discounted_ep_rs -= np.std(discounted_ep_rs)
        return discounted_ep_rs
Esempio n. 5
0
    def callback(self, data):
        try:
            cv_image = self.bridge.imgmsg_to_cv2(data, "bgr8")
        except CvBridgeError as e:
            print(e)

        # Actual content code
        #  code fiding circles and publish the closest circle

        # initialize key variables
        self.feature_point = list()
        self.tracked_point = list()
        self.mask = None

        #def process_image(self, cv_image):
        #    try:
        # Mask image
        self.mask = np.zero_like(cv_image)
        x, y, w, h = self.frame_width / 4, self.frame_height / 4, self.frame_width / 2, frame_height / 2
        self.mask[y:y + h, x:x + h]
        masked_image = cv2.bitwise_and(cv_image, self.mask)

        # create grey scale version of the image
        grey = cv2.cvtColor(masked_image, cv2.COLOR_RGB2GRAY)

        # Equalize the histogram to reduce lighting effect
        grey = cv2.equalizeHist(grey)

        # Remote salt-and-pepper, and white noise by using a combination of a median and Gaussian filter
        grey = cv2.medianBlur(grey, 5)
        grey = cv2.GaussianBlur(grey, 5)

        # Get the HoughCircles feature closest to the image center
        feature_point = self.get_feature_point(grey)

        if feature_point is not None and len(feature_point) > 0:
            # draw the center of the circles
            cv2.circle(self.marker_image, (feature_point[0], feature_point[1]),
                       self.feature_size, (0, 0, 255, 0), cv.CV_FILLED, 8, 0)
            # draw the outer circle
            cv2.circle(self.marker_image, (feature_point[0], feature_point[1]),
                       feature_point[2], (0, 255, 0, 0), self.feature_size, 8,
                       0)

            # convert feature_point from image coordinates to world coordinates
            feature_point = np.dot(
                np.linalg.pinv(self.projectionMatrix),
                np.array([feature_point[0], feature_point[1], 1]))
            feature_point = feature_point / feature_point[2]
            feature_point = np.array((feature_point[0], feature_point[1]))

            # provide self.tracked point to publish_poi to be published on the /poi topic
            self.tracked_point = feature_point

        return cv_image
Esempio n. 6
0
    def n_step_td_target(self, rewards, next_v_value, done):
        td_targets = np.zero_like(rewards)
        cumulative = 0
        if not done:
            cumulative = next_v_value

        for k in reversed(range(
                0, len(rewards))):  # trajectory 의 시간만큼 t=0,...,len(reward)(=T)
            cumulative = self.GAMMA * cumulative + rewards[k]
            td_targets[k] = cumulative
        return td_targets  #시간별로 시간차 타깃 계산 (벡터)
Esempio n. 7
0
>>> def numerical_gradient(f,x):
	h = 1e-4
	grad = np.zero_like(x)
	for idx in range(x.size):
		tmp_val = x[idx]
		x[idx] = tmp_val + h
		fxh1 = f(x)
		x[idx] = tmp_val - h
		fxh2 = f(x)
		grad[idx] = (fxh1 - fxh2)/(2*h)
		x[idx] = tmp_val
	return grad
Esempio n. 8
0
def region_of_interest(img, vertices):
	"""
    Apply mask to image. Only keeps the region defined by vertices of img, other region is set to black
	"""
	mask = np.zero_like(img)
	img_shape = img.shape
	if len(img_shape) > 2:
		mask_color = (255, )*len(img_shape)
	else:
		mask_color = 255
	cv2.fillPoly(mask, vertices, mask_color)
	masked_image = cv2.bitwise_and(img, mask)
	return masked_image
Esempio n. 9
0
        def _l(x, *args, **kwargs):
            x = np.array([x])
            if np.any(np.isnan(x)):
                #raise Exception("oO")
                if derivative:
                    return np.inf, np.zero_like(x)
                else:
                    return np.inf
            a = acq_f(x, derivative=derivative, *args, **kwargs)
            if derivative:
                #print -a[0][0], -a[1][0][0, :]
                return -a[0][0], -a[1][0][0, :]

            else:
                return -a[0]
Esempio n. 10
0
 def _l(x, *args, **kwargs):
     x = np.array([x])
     if np.any(np.isnan(x)):
         #raise Exception("oO")
         
         if derivative:
             return np.inf, np.zero_like(x)
         else:
             return np.inf
     a = acq_f(x, derivative=derivative, *args, **kwargs)
     
     if derivative:
         #print -a[0][0], -a[1][0][0, :]
         return -a[0][0], -a[1][0][0, :]
        
     else:
         return -a[0]
Esempio n. 11
0
 def grad(self, _cur_x, _batch_tuples):
     with tf.device(self.config.device):
         # get action data (one hot)
         action_data = self.getActionData(
             self.p_func.get_shape().as_list()[1], _batch_tuples)
         # get value data
         value_data = self.getNStepVTargetData(None, _batch_tuples)
         if value_data.std() == 0:
             value_data = np.zero_like(value_data)
         else:
             value_data = (value_data - value_data.mean()) / \
                 value_data.std()
         self.grads_data = self.sess.run(
             self.grads_op,
             feed_dict={
                 self.x_place: _cur_x,
                 self.action_place: action_data,
                 self.value_place: value_data,
             }
         )
Esempio n. 12
0
def simulate_gbm
    # model paramters
    S0 = 100.0
    T = 10
    r = 0.05
    vol = 0.20

    # simulation parameters

    np.random.seed(250000)
    gbm_dates = pd.DatetimeIndex(start = '30-09-2014',
                                 end = '30-09-2014',
                                 freq = 'B')
    M = len(gbm_dates)
    I = 1
    dt = 1 / 252
    df = math.exp(-r * dt)

    # price paths
    rand = np.random.standard_normal((M, I))
    S = np.zero_like(rand)
    S[0] = S0
    for t in range(1, M)
      S[t] = S[t-1] * np.exp((r - vol ** 2 / 2) * dt)
    def loss(self, X_batch, y_batch, reg):
        """
    Compute the loss function and its derivative. 
    Subclasses will override this.

    Inputs:
    - X_batch: A numpy array of shape (N, D) containing a minibatch of N
      data points; each point has dimension D.
    - y_batch: A numpy array of shape (N,) containing labels for the minibatch.
    - reg: (float) regularization strength.

    Returns: A tuple containing:
    - loss as a single float
    - gradient with respect to self.W; an array of the same shape as W
    """
        loss = 0.0
        grad = np.zero_like(W)
        W = self.W
        N = X_batch.shape[0]
        C = W.shape[1]
        print("W:", W.shape)
        print(y_batch.head())

        pass
Esempio n. 14
0
 def __init__(self, W):
     self.params = [W]
     self.grads = [np.zero_like(W)]
     self.idx = None
Esempio n. 15
0
def solver_diffusion_FE(
    I, a, f, L, Nx, F, T, U_0, U_L, h=None, user_action=None):
    """
    Forward Euler scheme for the diffusion equation
    u_t = a*u_xx + f, u(x,0)=I(x).
    If U_0 is a function of t: u(0,t)=U_0(t)
    If U_L is a function of t: u(L,t)=U_L(t)
    If U_0 is None: du/dx(0,t)=0
    If U_L is None: du/dx(L,t)=0
    If U_0 is a number: Robin condition -a*du/dn(0,t)=h*(u-U_0)
    If U_L is a number: Robin condition -a*du/dn(L,t)=h*(u-U_0)
    """
    import numpy as np
    version = 'scalar'
    x = np.linspace(0, L, Nx+1)   # mesh points in space
    dx = x[1] - x[0]
    dt = F*dx**2/a
    Nt = int(round(T/float(dt)))
    t = np.linspace(0, T, Nt+1)   # mesh points in time

    if f is None:
        f = lambda x, t: 0 if isinstance(x, (float,int)) else np.zero_like(x)

    u   = np.zeros(Nx+1)   # solution array
    u_1 = np.zeros(Nx+1)   # solution at t-dt
    u_2 = np.zeros(Nx+1)   # solution at t-2*dt

    # Set initial condition
    for i in range(0,Nx+1):
        u_1[i] = I(x[i])

    if user_action is not None:
        user_action(u_1, x, t, 0)

    for n in range(0, Nt):
        # Update all inner points
        if version == 'scalar':
            for i in range(1, Nx):
                if callable(f):  # f(x,t)
                    u[i] = u_1[i] + \
                           F*(u_1[i-1] - 2*u_1[i] + u_1[i+1])\
                           + f(x[i], t[n])
                elif isinstance(f, (float,int)):
                    # f = f*(u-1)
                    u[i] = u_1[i] + \
                           F*(u_1[i-1] - 2*u_1[i] + u_1[i+1])\
                           + f*(u_1[i] - 1)  # special source

        elif version == 'vectorized':
            if callable(f):
                u[1:Nx] = u_1[1:Nx] +  \
                          F*(u_1[0:Nx-1] - 2*u_1[1:Nx] + u_1[2:Nx+1])\
                          + f(x[1:Nx], t[n])
            elif isinstance(f, (float,int)):
                # f = f*(u-1)
                u[1:Nx] = u_1[1:Nx] +  \
                          F*(u_1[0:Nx-1] - 2*u_1[1:Nx] + u_1[2:Nx+1])\
                          + f*(u_1[1:Nx] - 1)

        # Insert boundary conditions
        if callable(U_0):
            u[0] = U_0(t[n+1])
        elif U_0 is None:
            # Homogeneous Neumann condition
            i = 0
            u[i] = u_1[i] + F*(u_1[i+1] - 2*u_1[i] + u_1[i+1])
        elif isinstance(U_0, (float,int)):
            # Robin condition
            # u_-1 = u_1 + 2*dx/a*(u[i] - U_0)
            i = 0
            u[i] = u_1[i] + F*(u_1[i+1] + 2*dx*h/a*(u[i] - U_0)
                               - 2*u_1[i] + u_1[i+1])
        if callable(U_L):
            u[Nx] = U_L(t[n+1])
        elif U_L is None:
            # Homogeneous Neumann condition
            i = Nx
            u[i] = u_1[i] + F*(u_1[i-1] - 2*u_1[i] + u_1[i-1])
        elif isinstance(U_0, (float,int)):
            # Robin condition
            # u_Nx+1 = u_Nx-1 - 2*dx/a*(u[i] - U_0)
            i = Nx
            u[i] = u_1[i] + F*(u_1[i-1] - 2*u_1[i] +
                               u_1[i-1] - 2*dx*h/a*(u[i] - U_0))

        if user_action is not None:
            user_action(u, x, t, n+1)

        # Update u_1 before next step
        #u_1[:] = u  # safe, but slow
        u_1, u = u, u_1  # just switch references
Esempio n. 16
0
def solver_diffusion_FE(I, a, f, L, Nx, F, T, U_0, U_L, h=None, user_action=None):
    """
    Forward Euler scheme for the diffusion equation
    u_t = a*u_xx + f, u(x,0)=I(x).
    If U_0 is a function of t: u(0,t)=U_0(t)
    If U_L is a function of t: u(L,t)=U_L(t)
    If U_0 is None: du/dx(0,t)=0
    If U_L is None: du/dx(L,t)=0
    If U_0 is a number: Robin condition -a*du/dn(0,t)=h*(u-U_0)
    If U_L is a number: Robin condition -a*du/dn(L,t)=h*(u-U_0)
    """
    import numpy as np

    version = "scalar"
    x = np.linspace(0, L, Nx + 1)  # mesh points in space
    dx = x[1] - x[0]
    dt = F * dx ** 2 / a
    Nt = int(round(T / float(dt)))
    t = np.linspace(0, T, Nt + 1)  # mesh points in time

    if f is None:
        f = lambda x, t: 0 if isinstance(x, (float, int)) else np.zero_like(x)

    u = np.zeros(Nx + 1)  # solution array
    u_1 = np.zeros(Nx + 1)  # solution at t-dt
    u_2 = np.zeros(Nx + 1)  # solution at t-2*dt

    # Set initial condition
    for i in range(0, Nx + 1):
        u_1[i] = I(x[i])

    if user_action is not None:
        user_action(u_1, x, t, 0)

    for n in range(0, Nt):
        # Update all inner points
        if version == "scalar":
            for i in range(1, Nx):
                if callable(f):  # f(x,t)
                    u[i] = u_1[i] + F * (u_1[i - 1] - 2 * u_1[i] + u_1[i + 1]) + f(x[i], t[n])
                elif isinstance(f, (float, int)):
                    # f = f*(u-1)
                    u[i] = u_1[i] + F * (u_1[i - 1] - 2 * u_1[i] + u_1[i + 1]) + f * (u_1[i] - 1)  # special source

        elif version == "vectorized":
            if callable(f):
                u[1:Nx] = u_1[1:Nx] + F * (u_1[0 : Nx - 1] - 2 * u_1[1:Nx] + u_1[2 : Nx + 1]) + f(x[1:Nx], t[n])
            elif isinstance(f, (float, int)):
                # f = f*(u-1)
                u[1:Nx] = u_1[1:Nx] + F * (u_1[0 : Nx - 1] - 2 * u_1[1:Nx] + u_1[2 : Nx + 1]) + f * (u_1[1:Nx] - 1)

        # Insert boundary conditions
        if callable(U_0):
            u[0] = U_0(t[n + 1])
        elif U_0 is None:
            # Homogeneous Neumann condition
            i = 0
            u[i] = u_1[i] + F * (u_1[i + 1] - 2 * u_1[i] + u_1[i + 1])
        elif isinstance(U_0, (float, int)):
            # Robin condition
            # u_-1 = u_1 + 2*dx/a*(u[i] - U_0)
            i = 0
            u[i] = u_1[i] + F * (u_1[i + 1] + 2 * dx * h / a * (u[i] - U_0) - 2 * u_1[i] + u_1[i + 1])
        if callable(U_L):
            u[Nx] = U_L(t[n + 1])
        elif U_L is None:
            # Homogeneous Neumann condition
            i = Nx
            u[i] = u_1[i] + F * (u_1[i - 1] - 2 * u_1[i] + u_1[i - 1])
        elif isinstance(U_0, (float, int)):
            # Robin condition
            # u_Nx+1 = u_Nx-1 - 2*dx/a*(u[i] - U_0)
            i = Nx
            u[i] = u_1[i] + F * (u_1[i - 1] - 2 * u_1[i] + u_1[i - 1] - 2 * dx * h / a * (u[i] - U_0))

        if user_action is not None:
            user_action(u, x, t, n + 1)

        # Update u_1 before next step
        # u_1[:] = u  # safe, but slow
        u_1, u = u, u_1  # just switch references
Esempio n. 17
0
		grad = numerical_gradient(f, x)
		x = lr * grad
	return x

>>> def function_2(x):
	return x[0]**2 + x[1]**2

>>> init_x = np.array([-3.0, 4.0])
>>> gradient_descent(function_2, init_x = init_x, lr = 0.0, step_num = 100)
Traceback (most recent call last):
  File "<pyshell#25>", line 1, in <module>
    gradient_descent(function_2, init_x = init_x, lr = 0.0, step_num = 100)
  File "<pyshell#20>", line 4, in gradient_descent
    grad = numerical_gradient(f, x)
  File "<pyshell#13>", line 3, in numerical_gradient
    grad = np.zero_like(x)
AttributeError: module 'numpy' has no attribute 'zero_like'
>>> def numerical_gradient(f,x):
	h = 1e-4
	grad = np.zeros_like(x)
	for idx in range(x.size):
		tmp_val = x[idx]
		x[idx] = tmp_val + h
		fxh1 = f(x)
		x[idx] = tmp_val - h
		fxh2 = f(x)
		grad[idx] = (fxh1 - fxh2)/(2*h)
		x[idx] = tmp_val
	return grad

>>> def gradient_descent(f, init_x, lr = 0.01, step_num = 100):
Esempio n. 18
0
    def loss(self, X, y=None, reg=0.0):
        """
    Compute the loss and gradients for a two layer fully connected neural
    network.

    Inputs:
    - X: Input data of shape (N, D). Each X[i] is a training sample.
    - y: Vector of training labels. y[i] is the label for X[i], and each y[i] is
      an integer in the range 0 <= y[i] < C. This parameter is optional; if it
      is not passed then we only return scores, and if it is passed then we
      instead return the loss and gradients.
    - reg: Regularization strength.

    Returns:
    If y is None, return a matrix scores of shape (N, C) where scores[i, c] is
    the score for class c on input X[i].

    If y is not None, instead return a tuple of:
    - loss: Loss (data loss and regularization loss) for this batch of training
      samples.
    - grads: Dictionary mapping parameter names to gradients of those parameters
      with respect to the loss function; has the same keys as self.params.
    """
        # Unpack variables from the params dictionary
        W1, b1 = self.params['W1'], self.params['b1']
        W2, b2 = self.params['W2'], self.params['b2']
        N, D = X.shape

        # Compute the forward pass
        scores = None
        #############################################################################
        # TODO: Perform the forward pass, computing the class scores for the input. #
        # Store the result in the scores variable, which should be an array of      #
        # shape (N, C).                                                             #
        #############################################################################

        scores = np.clip(np.dot(X, W1) + b1, 0, None)
        if (scores.shape != (N, W1.shape[1])):
            print('score shape is not right')
        scores = np.dot(scores, W2) + b2

        #############################################################################
        #                              END OF YOUR CODE                             #
        #############################################################################

        # If the targets are not given then jump out, we're done
        if y is None:
            return scores

        # Compute the loss
        loss = None
        #############################################################################
        # TODO: Finish the forward pass, and compute the loss. This should include  #
        # both the data loss and L2 regularization for W1 and W2. Store the result  #
        # in the variable loss, which should be a scalar. Use the Softmax           #
        # classifier loss.                                                          #
        #############################################################################
        scores = np.exp(scores) / np.sum(np.exp(scores), axis=0)
        widArray = range(0, N)

        #This is cross-entropy loss for traning
        loss = -np.log(scores[widArray, y])
        #############################################################################
        #                              END OF YOUR CODE                             #
        #############################################################################

        # Backward pass: compute gradients
        grads = {}
        dW1 = np.zero_like(W1)
        db1 = np.zero_like(b1)
        dW2 = np.zero_like(W2)
        db2 = np.zero_like(b2)
        #############################################################################
        # TODO: Compute the backward pass, computing the derivatives of the weights #
        # and biases. Store the results in the grads dictionary. For example,       #
        # grads['W1'] should store the gradient on W1, and be a matrix of same size #
        #############################################################################
        pass

        #############################################################################
        #                              END OF YOUR CODE                             #
        #############################################################################

        return loss, grads
Esempio n. 19
0
 def similar_copy(self):
     "A new Bloomfilter with the same size array and same seeds"
     return self.__class__(self.seeds.copy(), np.zero_like(self.arr))