Python prepro Examples

Programming Language: Python

Namespace/Package Name: karpathy

Method/Function: prepro

Examples at hotexamples.com: 2

Python prepro - 2 examples found. These are the top rated real world Python examples of karpathy.prepro extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: vanilla_gradient_policy.py Project: wimverleyen/DeepLearning

    def train(self):

        self.build_model()
        self.__model.summary()
        self.__model.compile(loss='binary_crossentropy',
                             optimizer='adam',
                             metrics=['accuracy'])

        UP_ACTION = 2
        DOWN_ACTION = 3

        # hyperparameters
        gamma = .99

        # initializing variables
        x_train, y_train, rewards = [], [], []
        reward_sum = 0
        episode_nb = 0

        # initialize variables
        resume = True
        running_reward = None
        epochs_before_saving = 10
        log_dir = './log' + datetime.now().strftime("%Y%m%d-%H%M%S") + "/"

        # load pre-trained model if exist
        if (resume and os.path.isfile(LOG_DIR + 'my_model_weights.h5')):
            print("loading previous weights")
            self.__model.load_weights(LOG_DIR + 'my_model_weights.h5')

        # add a callback tensorboard object to visualize learning
        tbCallBack = callbacks.TensorBoard(log_dir=log_dir, histogram_freq=0, \
                                           write_graph=True, write_images=True)

        # initializing environment
        env = gym.make('Pong-v0')
        observation = env.reset()
        prev_input = None

        # main loop
        while (True):

            # preprocess the observation, set input as difference between images
            cur_input = prepro(observation)
            x = cur_input - prev_input if prev_input is not None else np.zeros(
                80 * 80)
            prev_input = cur_input

            # forward the policy network and sample action according to the proba distribution
            proba = self.__model.predict(np.expand_dims(x, axis=1).T)
            action = UP_ACTION if np.random.uniform() < proba else DOWN_ACTION
            y = 1 if action == 2 else 0  # 0 and 1 are our labels

            # log the input and label to train later
            x_train.append(x)
            y_train.append(y)

            # do one step in our environment
            observation, reward, done, info = env.step(action)
            rewards.append(reward)
            reward_sum += reward

            # end of an episode
            if done:
                print('At the end of episode', episode_nb,
                      'the total reward was :', reward_sum)

                # increment episode number
                episode_nb += 1
                # training
                self.__model.fit(x=np.vstack(x_train), y=np.vstack(y_train), verbose=1, callbacks=[tbCallBack], \
                                 sample_weight=discount_rewards(rewards, gamma))

                # Saving the weights used by our model
                if episode_nb % epochs_before_saving == 0:
                    self.__model.save_weights(
                        'my_model_weights' +
                        datetime.now().strftime("%Y%m%d-%H%M%S") + '.h5')
                    # Log the reward
                running_reward = reward_sum if running_reward is None else running_reward * 0.99 + reward_sum * 0.01
                tflog('running_reward', running_reward, custom_dir=log_dir)

                # Reinitialization
                x_train, y_train, rewards = [], [], []
                observation = env.reset()
                reward_sum = 0
                prev_input = None

Example #2

Show file

File: demo_model.py Project: maPaydar/spinning-up-a-Pong-AI-with-deep-RL

# load pre-trained model if exist
if (os.path.isfile('my_model_weights.h5')):
    print("loading previous weights")
    model.load_weights('my_model_weights.h5')

from karpathy import prepro, discount_rewards
import numpy as np

prev_input = None

# main loop
for i in range(STEPS):
    # choose random action
    # preprocess the observation, set input as difference between images
    cur_input = prepro(observation)
    x = cur_input - prev_input if prev_input is not None else np.zeros(80 * 80)
    prev_input = cur_input

    # forward the policy network and sample action according to the proba distribution
    proba = model.predict(np.expand_dims(x, axis=1).T)
    action = UP_ACTION if np.random.uniform() < proba else DOWN_ACTION

    # run one step
    observation, reward, done, info = env.step(action)
    frames.append(observation)  # collecting observation

    # if episode is over, reset to beginning
    if done:
        observation = env.reset()
        frames.append(observation)  # collecting observation