コード例 #1
0
ファイル: hrr.py プロジェクト: HBPNeurorobotics/holographic
    def decodeCoordinate(self, memory=None, dim=1, return_list=False, suppress_value=None, decode_range=None):

        assert(dim == 1 or dim == 2 or dim == 3)

        if memory is None:
            memory = self.memory

        memory = helpers.normalize(memory)

        if decode_range is None:
            decode_range = self.valid_range
        if decode_range is None:
            raise ValueError("Decoding scalar values requires valid range (valid_range or decode_range parameter)")



        assert(len(decode_range) == dim)

        memory = self.reverse_permute(memory)

        if self.visualize:
            print("Output Reverse:")
            self.plot(np.reshape(memory,self.size))

        memory = helpers.smooth(helpers.reShape(memory, dim),self.window_ratio)
        l = helpers.sideLength(memory.size, dim)

        if self.visualize:
            print("Output Smooth pre:")
            self.plot(np.reshape(memory,self.size))

        if suppress_value is not None:
            memory = self.deductValue(memory,supress_value,HRR.valid_range)
            if self.visualize:
                print("Output Smooth (after suppression):")
                self.plot(np.reshape(memory,self.size))

        result = []

        if(self.peak_min == 0):
            self.peak_min = np.max(memory)/2

        while np.max(memory) > self.peak_min_ratio * abs(np.mean(memory)) + self.peak_min:

            spot = list(np.unravel_index(np.argmax(memory),memory.shape))

            for i in range(dim):
                spot[i] = helpers.reverse_scale(spot[i], l, decode_range[i])

            result.append((spot, 1))
            if return_list is False:
                return spot
            memory = self.deductValue(memory,spot,HRR.valid_range,dim, np.max(memory))
            if self.visualize:
                print("Output Post Deduction:")
                self.plot(np.reshape(memory,self.size))

        if len(result) == 0 and suppress_value is not None:
            return [(np.nan, 1)] if return_list else np.nan
        return result
コード例 #2
0
ファイル: rnn.py プロジェクト: chrissaher/NN-Projects
    def fit(self, X, num_iter=100, lr=1e-5, char_to_ix=None, dino_names=10):
        parameters = self.initialize_parameters()
        n_x, n_y, n_a = self.n_x, self.n_y, self.n_a

        loss = get_initial_loss(vocab_size, dino_names)

        a_prev = np.zeros((n_a, 1))
        for j in range(num_iter):
            example = X[j % len(examples)]
            x = [None] + [char_to_ix[ch] for ch in example]
            y = x[1:] + [char_to_ix['\n']]

            cache = self.forward(x, a_prev, parameters)
            curr_loss = self.calculate_lost(y, cache)
            gradients, a = self.backpropagation(x, y, parameters, cache)
            gradients = self.clip(gradients, maxValue=5)
            parameters = self.update_parameters(lr, parameters, gradients)

            loss = smooth(loss, curr_loss)

            if j % 2000 == 0:
                print('Iteration: %d, Loss: %f' % (j, loss) + '\n')
                for name in range(dino_names):
                    sampled_indices = self.sample(parameters, char_to_ix)
                    print_sample(sampled_indices, ix_to_char)
                print('\n')

        return parameters
コード例 #3
0
ファイル: vsa.py プロジェクト: jackokaiser/holographic
    def plot(self,
             vect=None,
             unpermute=False,
             smooth=False,
             wide=False,
             multidim=False):

        if vect is None:
            vect = self.memory
        if unpermute:
            vect = self.reverse_permute(vect)
        if smooth:
            vect = helpers.smooth(vect)

        fig = plt.figure()

        if wide:
            widen = len(vect) * widening
            down = np.amin(vect)
            up = np.amax(vect)
            mean = np.amax(vect) - np.amin(vect)
            down -= mean * widening
            up += mean * widening
            plt.axis([-widen, len(vect) + widen, down, up])

        if multidim:
            assert (len(vect.shape) < 3)
            if (len(vect.shape) == 1):
                vect = helpers.reShape(vect, 2)
            X = np.arange(-len(vect) / 2, len(vect) / 2, 1)
            Y = np.arange(-len(vect[0]) / 2, len(vect[0]) / 2, 1)
            X, Y = np.meshgrid(X, Y)
            ax = fig.gca(projection='3d')
            surf = ax.plot_surface(X,
                                   Y,
                                   vect,
                                   rstride=1,
                                   cstride=1,
                                   cmap='coolwarm',
                                   linewidth=0,
                                   antialiased=True)
            ax.set_zlim(np.min(vect) / 3, 1.1 * np.max(vect))
            ax.set_xlabel('Y Index')
            ax.set_ylabel('X Index')
            ax.set_zlabel('Encoded Value')
            ax.set_xlim3d(-len(vect) / 2, len(vect) / 2)
            ax.set_ylim3d(-len(vect[0]) / 2, len(vect[0]) / 2)
            ax.azim = 200
            fig.colorbar(surf, shrink=0.5, aspect=5)
        else:
            xx = range(len(vect))
            plt.plot(xx, vect)

        #fig.savefig('temp.png', transparent=True)
        plt.show()
コード例 #4
0
ファイル: hrr.py プロジェクト: HBPNeurorobotics/holographic
    def deductValue(self, memory, value, input_range, dim = 1, height = 1):

        #result = self.permute(helpers.normalize(self.coordinate_encoder(input_value, encode_range)))
        assert(input_range is not None)
        # suppress the given values in the memory
        if not isinstance(value, (frozenset, list, np.ndarray, set, tuple)):
            value = [value]
        compensate = self.coordinate_encoder(value, input_range)
        # we have to smooth this to ensure correct alignment with the current memory
        compensate = helpers.reShape(compensate,dim)
        compensate = helpers.smooth(compensate, self.window_ratio)
        compensate[:] = [x * -height for x in compensate]

        if self.visualize:
            print("Output Supressed Value:")
            self.plot(np.reshape(compensate,self.size))


        memory += compensate
        return memory
コード例 #5
0
        n_ep=args.n_ep,
        n_mcts=args.n_mcts,
        max_ep_len=args.max_ep_len,
        lr=args.lr,
        c=args.c,
        gamma=args.gamma,
        data_size=args.data_size,
        batch_size=args.batch_size,
        temp=args.temp,
        n_hidden_layers=args.n_hidden_layers,
        n_hidden_units=args.n_hidden_units)

    # Finished training: Visualize
    fig, ax = plt.subplots(1, figsize=[7, 5])
    total_eps = len(episode_returns)
    episode_returns = smooth(episode_returns, args.window, mode='valid')
    ax.plot(symmetric_remove(np.arange(total_eps), args.window - 1),
            episode_returns,
            linewidth=4,
            color='darkred')
    ax.set_ylabel('Return')
    ax.set_xlabel('Episode', color='darkred')
    plt.savefig(os.getcwd() + '/learning_curve.png',
                bbox_inches="tight",
                dpi=300)

#    print('Showing best episode with return {}'.format(R_best))
#    Env = make_game(args.game)
#    Env = wrappers.Monitor(Env,os.getcwd() + '/best_episode',force=True)
#    Env.reset()
#    Env.seed(seed_best)
コード例 #6
0
ファイル: run_tests.py プロジェクト: mivadi/RL_project
                      discount_factor=discount_factor,
                      lr=1e-3,
                      epsilon=epsilon,
                      memory=memory,
                      experience_replay=experience_replay,
                      true_gradient=true_gradient,
                      batch_size=batch_size)
else:
    dynaQ = TabularDynaQ(env,
                         planning_steps=n,
                         discount_factor=discount_factor,
                         lr=learning_rate,
                         epsilon=epsilon,
                         deterministic=False)

dynaQ.learn_policy(1000)

# plot results
plt.plot(smooth(dynaQ.episode_lengths, 10))
plt.title('Episode lengths Deep Dyna-Q (nongreedy)')  # NB: lengths == returns
plt.show()

dynaQ.test_model_greedy(100)

# plot results
plt.plot(smooth(dynaQ.episode_lengths, 10))
print("Average episode length (greedy): {}".format(
    np.mean(np.array(dynaQ.episode_lengths))))
plt.title('Episode lengths Deep Dyna-Q (greedy)')  # NB: lengths == returns
plt.show()
コード例 #7
0
    def process_data(self):
        #smooth floor data
        Fdata_t = np.transpose(self._Fdata)
        for i in range(4):
            Fdata_t[i] = smooth(Fdata_t[i], 13, 'hanning')
        self._Fdata = np.transpose(Fdata_t)
        #apply floor transform
        for i in range(len(self._Bdata)):
            xrot = atan(self._Fdata[i][3] / self._Fdata[i][2])
            R = rotation_matrix(xrot, 0.0, 0.0)
            self._Bdata[i] = rotate_body(R, self._Bdata[i])
            if self._Tdata is None:
                self._Tdata = np.array([get_root_transform(self._Bdata[i])])
            else:
                self._Tdata = np.append(self._Tdata, [get_root_transform(self._Bdata[i])], axis=0)

        Tdata_t = np.transpose(self._Tdata)
        for i in range(2, 5):
            Tdata_t[i] = smooth(Tdata_t[i], 13, 'hanning')
        self._Tdata = np.transpose(Tdata_t)

        for i in range(len(self._frame_count)):
            self._Bdata[i] = translate_body(self._Tdata[i][0], self._Tdata[i][1], self._Bdata[i])
            R = rotation_matrix(self._Tdata[i][3], self._Tdata[i][4], self._Tdata[i][5])
            self._Bdata[i] = rotate_body(R, self._Bdata[i])

        self._joint_p = np.zeros((self._frame_count, PyKinectV2.JointType_Count * 3))
        self._joint_v = np.zeros((self._frame_count, PyKinectV2.JointType_Count * 3))
        for i in range(self._frame_count):
            self._joint_p[i * 3 + 0] = self._Bdata[i].Position.x
            self._joint_p[i * 3 + 1] = self._Bdata[i].Position.y
            self._joint_p[i * 3 + 2] = self._Bdata[i].Position.z

        for i in range(3, self._frame_count * 3):
            self._joint_v[i] = self._joint_p[i] - self._joint_p[i - 3]

        self._transx_v = np.zeros(self._frame_count)
        self._transz_v = np.zeros(self._frame_count)
        self._angy_v = np.zeros(self._frame_count)
        for i in range(1, self._frame_count):
            self._transx_v[i] = self._Tdata[i][0] - self._Tdata[i - 1][0]
            self._transz_v[i] = self._Tdata[i][2] - self._Tdata[i - 1][2]
            alpha = atan2(self._Tdata[i][5], self._Tdata[i][3])
            beta = atan2(self._Tdata[i - 1][5], self._Tdata[i - 1][3])
            delta = alpha - beta
            self._angy_v = delta

        for i in range(self._frame_count):
            #caculate the angle a between the facing direction and the z axis
            theta = atan2(self._Tdata[i][5], self._Tdata[i][3])
            #rotate the x and z velocity by the angle a
            self._transx_v[i] = cos(theta * self._transx_v[i]) - sin(theta * self._transz_v[i])
            self._transz_v[i] = sin(theta * self._transx_v[i]) + cos(theta * self._transz_v[i])


        #TODO: pass over all joints again and interpolate any missing points

        self._gen_contact_labels()
        self._gen_phase_labels()

        self._traj_px = np.zeros((self._frame_count, self._sample_count))
        self._traj_pz = np.zeros((self._frame_count, self._sample_count))
        self._traj_dx = np.zeros((self._frame_count, self._sample_count))
        self._traj_dz = np.zeros((self._frame_count, self._sample_count))
        for i in range(self._frame_count):
            frame_samples = self._sample_frames(i)
            self._gen_trajectory(i, frame_samples)