Beispiel #1
0
def benchmark_all_eval(model,
                       criterion,
                       converter,
                       opt,
                       calculate_infer_time=False):
    # 'None' corresponds to the clean data
    transforms = [None]
    # Make tests reproducible
    rng = np.random.default_rng(opt.seed)
    if opt.corrupt:
        corruptions = [
            Curve(rng=rng),
            Distort(rng),
            Stretch(rng),
            Rotate(rng=rng),
            Perspective(rng),
            Shrink(rng),
            TranslateX(rng),
            TranslateY(rng),
            VGrid(rng),
            HGrid(rng),
            Grid(rng),
            RectGrid(rng),
            EllipseGrid(rng),
            GaussianNoise(rng),
            ShotNoise(rng),
            ImpulseNoise(rng),
            SpeckleNoise(rng),
            GaussianBlur(rng),
            DefocusBlur(rng),
            MotionBlur(rng),
            GlassBlur(rng),
            ZoomBlur(rng),
            Contrast(rng),
            Brightness(rng),
            JpegCompression(rng),
            Pixelate(rng),
            Fog(rng),
            Snow(rng),
            Frost(rng),
            Rain(rng),
            Shadow(rng),
            Posterize(rng),
            Solarize(rng),
            Invert(rng),
            Equalize(rng),
            AutoContrast(rng),
            Sharpness(rng),
            Color(rng)
        ]
        # Generate partial functions for the three severity levels
        for c in corruptions:
            for level in range(3):
                p = partial(c, mag=level)
                p.__name__ = '{}-{}'.format(c.__class__.__name__, level)
                transforms.append(p)
    for tr in transforms:
        benchmark_all_eval_transform(model, criterion, converter, opt,
                                     calculate_infer_time, tr, rng)
    def __init__(self,
                 state_space_dim,
                 action_space_dim,
                 min_action_val,
                 max_action_val,
                 hidden_layer_size=512,
                 gamma=0.99,
                 tau=0.0001,
                 path_to_load=None):
        self.gamma = gamma
        self.tau = tau
        self.min_action_val = min_action_val
        self.max_action_val = max_action_val
        self.buffer = Buffer(state_space_dim, action_space_dim)
        self.noise_generator = GaussianNoise(0., 0.2, action_space_dim)

        self.actor = Actor(state_space_dim, action_space_dim, max_action_val,
                           hidden_layer_size)
        self.critic = Critic(state_space_dim, action_space_dim,
                             hidden_layer_size)

        if path_to_load is not None:
            if os.path.exists(path_to_load + "_actor.h5") and \
                    os.path.exists(path_to_load + "_critic.h5"):
                self.load(path_to_load)

        self.target_actor = Actor(state_space_dim, action_space_dim,
                                  max_action_val, hidden_layer_size)
        self.target_critic = Critic(state_space_dim, action_space_dim,
                                    hidden_layer_size)

        self.target_actor.model.set_weights(self.actor.model.get_weights())
        self.target_critic.model.set_weights(self.critic.model.get_weights())

        critic_lr = 0.002
        actor_lr = 0.001

        self.critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
        self.actor_optimizer = tf.keras.optimizers.Adam(actor_lr)
Beispiel #3
0
    def __init__(self,env,nS,nA,config):
        self.seed = config.seed
        self.name = config.name
        self.nA = nA
        self.nS = nS
        self.num_agents = config.num_agents
        self.episodes = config.episodes
        self.tmax = config.tmax
        self.print_every = config.print_every
        self.update_every = config.UPDATE_EVERY
        self.SGD_epoch = config.SGD_epoch
        self.actor_path = config.actor_path
        self.critic_path = config.critic_path
        self.noise = GaussianNoise((self.num_agents,nA),config.episodes)
        # self.noise = OUnoise(nA,config.seed)
        self.winning_condition = config.winning_condition
        if torch.cuda.is_available():
            self.device = 'cuda:0'
            self.device2 = 'cuda:1'
        else:
            self.device = 'cpu'

        # Hyperparams
        self.gamma = config.gamma
        self.buffer_size = config.buffer_size
        self.min_buffer_size = config.min_buffer_size
        self.batch_size = config.batch_size
        self.L2 = config.L2
        self.tau = config.TAU
        
        # For multi agent
        self.nO = self.num_agents * nS # Observation space
        self.env = env
        self.R = ReplayBuffer(config.buffer_size,config.batch_size,config.seed)

        # Instantiating Actor and Critic
        self.agents = [DDPG(self.seed,nA,nS,config.L2,0),DDPG(self.seed,nA,nS,config.L2,1)]
Beispiel #4
0
 def __init__(self,
              num_sym,
              num_chan,
              rate,
              batch_size=200,
              train_snr=7,
              hidden_neurons=50):
     super(comm_4_1, self).__init__()
     self.num_symbols = num_sym
     self.num_channels = num_chan
     self.Ebno = 10.0**(train_snr / 10.0)  #db eqivalent
     self.std_dev = np.sqrt(1 / (2 * self.Ebno * rate))
     self.lin1 = nn.Linear(self.num_symbols, self.num_symbols)
     self.lin2 = nn.Linear(self.num_symbols, hidden_neurons)
     self.lin3 = nn.Linear(hidden_neurons, 2)
     self.lin_c = nn.Linear(2, self.num_channels * 2)
     self.norm1 = nn.BatchNorm1d(self.num_channels * 2)
     self.noise = GaussianNoise((batch_size, self.num_channels * 2),
                                std=self.std_dev)
     self.lin4 = nn.Linear(self.num_channels * 2, hidden_neurons)
     self.lin5 = nn.Linear(hidden_neurons, self.num_symbols)
     self.lin6 = nn.Linear(self.num_symbols, self.num_symbols)
def main(opt):
    # 'None' corresponds to the clean data
    transforms = [None]
    # Make tests reproducible
    rng = np.random.default_rng(opt.seed)
    corruptions = [
        Curve(rng=rng),
        Distort(rng),
        Stretch(rng),
        Rotate(rng=rng),
        Perspective(rng),
        Shrink(rng),
        TranslateX(rng),
        TranslateY(rng),
        VGrid(rng),
        HGrid(rng),
        Grid(rng),
        RectGrid(rng),
        EllipseGrid(rng),
        GaussianNoise(rng),
        ShotNoise(rng),
        ImpulseNoise(rng),
        SpeckleNoise(rng),
        GaussianBlur(rng),
        DefocusBlur(rng),
        MotionBlur(rng),
        GlassBlur(rng),
        ZoomBlur(rng),
        Contrast(rng),
        Brightness(rng),
        JpegCompression(rng),
        Pixelate(rng),
        Fog(rng),
        Snow(rng),
        Frost(rng),
        Rain(rng),
        Shadow(rng),
        Posterize(rng),
        Solarize(rng),
        Invert(rng),
        Equalize(rng),
        AutoContrast(rng),
        Sharpness(rng),
        Color(rng)
    ]
    # Generate partial functions for the three severity levels
    for c in corruptions:
        for level in range(1):
            p = partial(c, mag=level)
            p.__name__ = '{}-{}'.format(c.__class__.__name__, level)
            transforms.append(p)

    for tr in transforms:
        name = 'Clean' if tr is None else tr.__name__
        for d in os.listdir(opt.eval_data):
            outdir = os.path.join('corrupted-data', name, d)
            os.makedirs(outdir)
            for i, (img, label) in enumerate(
                    LmdbDataset(os.path.join(opt.eval_data, d), opt, tr)):
                print(outdir, i)
                #img = img.resize((224, 224))
                img = img.resize((100, 32))
                if tr is not None:
                    img = tr(img)
                #img = img.resize((100, 32))
                img.save(os.path.join(outdir, '{:04d}.png'.format(i)))
# MNISTデータ(手書き数字画像)を読み込む
# 初回はDLするので時間がかかる(53MB)
mnist = fetch_mldata('MNIST original')

# サンプルデータの読み込み
x_all = mnist.data.astype(np.float32) / 255
x_data = np.vstack([x_all[0]]*10)

# SaltAndPepperNoise
x = x_data.copy()
titles = []
for i in xrange(10):
    rate = 0.1 * i
    titles.append('%3.1f' % rate)
    sap = SaltAndPepperNoise(rate=rate)
    x[i] = sap.noise(x[i])
visualize(x, '../img/noise/s&p.jpg', (8, 2), (1, 10), titles=titles)

# GaussianNoise
x = x_data.copy()
titles = []
for i in xrange(10):
    scale = 0.1 * i
    titles.append('%3.1f' % scale)
    if i == 0:
        continue
    gaus = GaussianNoise(scale=scale)
    x[i] = gaus.noise(x[i])
visualize(x, '../img/noise/gaus.jpg', (8, 2), (1, 10), titles=titles)

    def __init__(self, env, nS, nA, config):
        self.seed = config.seed
        self.name = config.name
        self.nA = nA
        self.nS = nS
        self.num_agents = config.num_agents
        self.episodes = config.episodes
        self.tmax = config.tmax
        self.print_every = config.print_every
        self.update_every = config.UPDATE_EVERY
        self.SGD_epoch = config.SGD_epoch
        self.actor_path = config.actor_path
        self.critic_path = config.critic_path
        self.noise = GaussianNoise((self.num_agents, nA), config.episodes)
        # self.noise = OUnoise(nA,config.seed)
        self.winning_condition = config.winning_condition
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")

        # Hyperparams
        self.gamma = config.gamma
        self.buffer_size = config.buffer_size
        self.min_buffer_size = config.min_buffer_size
        self.batch_size = config.batch_size
        self.L2 = config.L2
        self.tau = config.TAU

        # For multi agent
        self.nO = self.num_agents * nS  # Observation space
        self.env = env
        self.R = ReplayBuffer(config.buffer_size, config.batch_size,
                              config.seed)

        # Instantiating Actor and Critic
        self.base_actor = Actor(self.seed, self.nS, self.nA)
        self.base_critic = Critic(self.seed, self.nO, self.nA)

        # Instantiate the desired number of agents and envs
        self.local_critics = [
            Critic(self.seed, self.nO, self.nA)
            for agent in range(self.num_agents)
        ]
        self.local_actors = [
            Actor(self.seed, self.nS, self.nA)
            for agent in range(self.num_agents)
        ]
        self.target_critics = [
            Critic(self.seed, self.nO, self.nA)
            for agent in range(self.num_agents)
        ]
        self.target_actors = [
            Actor(self.seed, self.nS, self.nA)
            for agent in range(self.num_agents)
        ]

        # Copy the weights from base agents to target and local
        map(lambda x: hard_update(self.base_critic, x), self.local_critics)
        map(lambda x: hard_update(self.base_critic, x), self.target_critics)
        map(lambda x: hard_update(self.base_actor, x), self.local_actors)
        map(lambda x: hard_update(self.base_actor, x), self.target_actors)

        # Instantiate optimizers
        self.critic_optimizers = [
            optim.Adam(self.local_critics[i].parameters(),
                       lr=1e-3,
                       weight_decay=self.L2) for i in range(self.num_agents)
        ]
        self.actor_optimizers = [
            optim.Adam(self.local_actors[i].parameters(), lr=1e-4)
            for i in range(self.num_agents)
        ]
class MultiAgent(object):
    def __init__(self, env, nS, nA, config):
        self.seed = config.seed
        self.name = config.name
        self.nA = nA
        self.nS = nS
        self.num_agents = config.num_agents
        self.episodes = config.episodes
        self.tmax = config.tmax
        self.print_every = config.print_every
        self.update_every = config.UPDATE_EVERY
        self.SGD_epoch = config.SGD_epoch
        self.actor_path = config.actor_path
        self.critic_path = config.critic_path
        self.noise = GaussianNoise((self.num_agents, nA), config.episodes)
        # self.noise = OUnoise(nA,config.seed)
        self.winning_condition = config.winning_condition
        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")

        # Hyperparams
        self.gamma = config.gamma
        self.buffer_size = config.buffer_size
        self.min_buffer_size = config.min_buffer_size
        self.batch_size = config.batch_size
        self.L2 = config.L2
        self.tau = config.TAU

        # For multi agent
        self.nO = self.num_agents * nS  # Observation space
        self.env = env
        self.R = ReplayBuffer(config.buffer_size, config.batch_size,
                              config.seed)

        # Instantiating Actor and Critic
        self.base_actor = Actor(self.seed, self.nS, self.nA)
        self.base_critic = Critic(self.seed, self.nO, self.nA)

        # Instantiate the desired number of agents and envs
        self.local_critics = [
            Critic(self.seed, self.nO, self.nA)
            for agent in range(self.num_agents)
        ]
        self.local_actors = [
            Actor(self.seed, self.nS, self.nA)
            for agent in range(self.num_agents)
        ]
        self.target_critics = [
            Critic(self.seed, self.nO, self.nA)
            for agent in range(self.num_agents)
        ]
        self.target_actors = [
            Actor(self.seed, self.nS, self.nA)
            for agent in range(self.num_agents)
        ]

        # Copy the weights from base agents to target and local
        map(lambda x: hard_update(self.base_critic, x), self.local_critics)
        map(lambda x: hard_update(self.base_critic, x), self.target_critics)
        map(lambda x: hard_update(self.base_actor, x), self.local_actors)
        map(lambda x: hard_update(self.base_actor, x), self.target_actors)

        # Instantiate optimizers
        self.critic_optimizers = [
            optim.Adam(self.local_critics[i].parameters(),
                       lr=1e-3,
                       weight_decay=self.L2) for i in range(self.num_agents)
        ]
        self.actor_optimizers = [
            optim.Adam(self.local_actors[i].parameters(), lr=1e-4)
            for i in range(self.num_agents)
        ]

    def load_weights(self, critic_path, actor_path):
        # Load weigths from both
        [
            actor.load_state_dict(torch.load(actor_path + str(index) +
                                             '.ckpt'))
            for actor, index in zip(self.local_actors, range(self.num_agents))
        ]
        [actor.eval() for actor in self.local_actors]

    def save_weights(self, critic_path, actor_path):
        # Save weights for both
        [
            torch.save(critic.state_dict(), critic_path + str(index) + '.ckpt')
            for critic, index in zip(self.local_critics, range(
                self.num_agents))
        ]
        [
            torch.save(actor.state_dict(), actor_path + str(index) + '.ckpt')
            for actor, index in zip(self.local_actors, range(self.num_agents))
        ]

    def train(self):
        """
        We stack and store the stacks as observations for critic training, 
        but keep the states and next states seperate for actor actions.
        """
        tic = time.time()
        means = []
        stds = []
        steps = 0
        scores_window = deque(maxlen=100)
        for e in range(1, self.episodes):

            self.noise.step()
            episode_scores = []
            obs = self.env.reset()
            for t in range(self.tmax):
                actions = self.act(obs)
                next_obs, rewards, dones = self.env.step(actions)

                # Store experience
                if np.max(rewards) > 0:
                    print('hit the ball over the net', rewards)
                self.R.add(obs.reshape(1, 48), obs, actions, rewards,
                           next_obs.reshape(1, 48), next_obs, dones)
                obs = next_obs
                # Score tracking
                episode_scores.append(np.max(rewards))

            # Learn
            if len(self.R) > self.min_buffer_size:
                for _ in range(self.SGD_epoch):
                    # Update each agent
                    for i in range(self.num_agents):
                        self.learn(i)
                    # update target networks
                    self.update_targets_all()

            steps += int(t)
            means.append(np.mean(episode_scores))
            stds.append(np.std(episode_scores))
            scores_window.append(np.sum(episode_scores))
            if e % 4 == 0:
                toc = time.time()
                r_mean = np.mean(scores_window)
                r_max = max(scores_window)
                r_min = min(scores_window)
                r_std = np.std(scores_window)
                plot(self.name, means, stds)
                print(
                    "\rEpisode: {} out of {}, Steps {}, Rewards: mean {:.2f}, min {:.2f}, max {:.2f}, std {:.2f}, Elapsed {:.2f}"
                    .format(e, self.episodes, steps, r_mean, r_min, r_max,
                            r_std, (toc - tic) / 60))
            if np.mean(scores_window) > self.winning_condition:
                print('Env solved!')
                # save scores
                pickle.dump([means, stds],
                            open(str(self.name) + '_scores.p', 'wb'))
                # save policy
                self.save_weights(self.critic_path, self.actor_path)
                break

    def act(self, obs):
        # split states for each agent
        actions = [
            actor(obs[i]).detach().cpu().numpy()
            for i, actor in enumerate(self.local_actors)
        ]
        actions = np.vstack(actions)
        # Add noise for exploration
        actions = np.add(actions, self.noise.sample())
        return actions

    def evaluate(self, state):
        # TODO
        # Evaluate the agent's performance
        rewards = []

        obs = env.reset()
        for i in range(400):
            action = maddpg.act(obs)
            next_obs, reward, done = env.step(action)
            obs = next_obs
            rewards.append(np.sum(rewards))
            if done:
                break
        self.env.close()
        print("The agent achieved an average score of {:.2f}".format(
            np.mean(rewards)))
        return action

    def learn(self, index):
        # Get sample experiences
        obs, states, actions, rewards, next_obs, next_states, dones = self.R.sample(
        )
        # Get target actions and target values
        self.critic_optimizers[index].zero_grad()
        with torch.no_grad():
            target_actions = torch.stack([
                self.target_actors[index](next_states[i])
                for i in range(next_states.shape[0])
            ])
        next_values = self.target_critics[index](next_obs,
                                                 target_actions).detach()

        target_y = rewards + self.gamma * next_values * (1 - dones)
        current_y = self.local_critics[index](obs, actions)

        critic_loss = F.smooth_l1_loss(current_y, target_y)
        critic_loss.backward()
        self.critic_optimizers[index].step()
        # Update actor
        self.actor_optimizers[index].zero_grad()
        local_actions = torch.stack([
            self.local_actors[index](states[i]) for i in range(states.shape[0])
        ])
        actor_loss = -self.local_critics[index](obs, local_actions).mean()
        actor_loss.backward()
        self.actor_optimizers[index].step()

    def update_targets(self, index):
        MultiAgent.soft_update(self.local_critics[index],
                               self.target_critics[index], self.tau)
        MultiAgent.soft_update(self.local_actors[index],
                               self.target_actors[index], self.tau)

    def update_targets_all(self):
        for index in range(len(self.local_actors)):
            MultiAgent.soft_update(self.local_critics[index],
                                   self.target_critics[index], self.tau)
            MultiAgent.soft_update(self.local_actors[index],
                                   self.target_actors[index], self.tau)

    def multi_update_targets(self):
        [
            MultiAgent.soft_update(critic, target, self.tau)
            for critic, target in zip(self.local_critics, self.target_critics)
        ]
        [
            MultiAgent.soft_update(actor, target, self.tau)
            for actor, target in zip(self.local_actors, self.target_actors)
        ]

    @staticmethod
    def soft_update(source, target, tau):
        for param, target_param in zip(source.parameters(),
                                       target.parameters()):
            target_param.data.copy_(tau * target_param.data +
                                    (1 - tau) * param.data)
class DDPGAgent:
    def __init__(self,
                 state_space_dim,
                 action_space_dim,
                 min_action_val,
                 max_action_val,
                 hidden_layer_size=512,
                 gamma=0.99,
                 tau=0.0001,
                 path_to_load=None):
        self.gamma = gamma
        self.tau = tau
        self.min_action_val = min_action_val
        self.max_action_val = max_action_val
        self.buffer = Buffer(state_space_dim, action_space_dim)
        self.noise_generator = GaussianNoise(0., 0.2, action_space_dim)

        self.actor = Actor(state_space_dim, action_space_dim, max_action_val,
                           hidden_layer_size)
        self.critic = Critic(state_space_dim, action_space_dim,
                             hidden_layer_size)

        if path_to_load is not None:
            if os.path.exists(path_to_load + "_actor.h5") and \
                    os.path.exists(path_to_load + "_critic.h5"):
                self.load(path_to_load)

        self.target_actor = Actor(state_space_dim, action_space_dim,
                                  max_action_val, hidden_layer_size)
        self.target_critic = Critic(state_space_dim, action_space_dim,
                                    hidden_layer_size)

        self.target_actor.model.set_weights(self.actor.model.get_weights())
        self.target_critic.model.set_weights(self.critic.model.get_weights())

        critic_lr = 0.002
        actor_lr = 0.001

        self.critic_optimizer = tf.keras.optimizers.Adam(critic_lr)
        self.actor_optimizer = tf.keras.optimizers.Adam(actor_lr)

    @tf.function
    def _apply_gradients(self, states, actions, next_states, rewards):
        with tf.GradientTape() as tape:
            target_actions = self.target_actor.forward(next_states)
            y = tf.cast(rewards,
                        tf.float32) + self.gamma * self.target_critic.forward(
                            [next_states, target_actions])
            critic_value = self.critic.forward([states, actions])
            critic_loss = tf.math.reduce_mean(tf.math.square(y - critic_value))

        critic_grad = tape.gradient(critic_loss,
                                    self.critic.model.trainable_variables)
        self.critic_optimizer.apply_gradients(
            zip(critic_grad, self.critic.model.trainable_variables))

        with tf.GradientTape() as tape:
            actions = self.actor.forward(states)
            critic_value = self.critic.forward([states, actions])
            actor_loss = -tf.math.reduce_mean(critic_value)

        actor_grad = tape.gradient(actor_loss,
                                   self.actor.model.trainable_variables)
        self.actor_optimizer.apply_gradients(
            zip(actor_grad, self.actor.model.trainable_variables))

    def learn(self):
        states, actions, next_states, rewards = self.buffer.sample()
        self._apply_gradients(states, actions, next_states, rewards)

    def remember_step(self, info):
        self.buffer.remember(info)

    def update_targets(self):
        new_weights = []
        target_variables = self.target_critic.model.weights
        for i, variable in enumerate(self.critic.model.weights):
            new_weights.append(variable * self.tau + target_variables[i] *
                               (1 - self.tau))

        self.target_critic.model.set_weights(new_weights)

        new_weights = []
        target_variables = self.target_actor.model.weights
        for i, variable in enumerate(self.actor.model.weights):
            new_weights.append(variable * self.tau + target_variables[i] *
                               (1 - self.tau))

        self.target_actor.model.set_weights(new_weights)

    def get_best_action(self, state):
        tf_state = tf.expand_dims(tf.convert_to_tensor(state), 0)
        return tf.squeeze(self.actor.forward(tf_state)).numpy()

    def get_action(self, state):
        actions = self.get_best_action(
            state) + self.noise_generator.get_noise()
        return np.clip(actions, self.min_action_val, self.max_action_val)

    def save(self, path):
        print(f"Model has been saved as '{path}'")
        self.actor.save(path)
        self.critic.save(path)

    def load(self, path):
        print(f"Model has been loaded from '{path}'")
        self.actor.load(path)
        self.critic.load(path)
Beispiel #10
0
class MultiAgent(object):
    def __init__(self,env,nS,nA,config):
        self.seed = config.seed
        self.name = config.name
        self.nA = nA
        self.nS = nS
        self.num_agents = config.num_agents
        self.episodes = config.episodes
        self.tmax = config.tmax
        self.print_every = config.print_every
        self.update_every = config.UPDATE_EVERY
        self.SGD_epoch = config.SGD_epoch
        self.actor_path = config.actor_path
        self.critic_path = config.critic_path
        self.noise = GaussianNoise((self.num_agents,nA),config.episodes)
        # self.noise = OUnoise(nA,config.seed)
        self.winning_condition = config.winning_condition
        if torch.cuda.is_available():
            self.device = 'cuda:0'
            self.device2 = 'cuda:1'
        else:
            self.device = 'cpu'

        # Hyperparams
        self.gamma = config.gamma
        self.buffer_size = config.buffer_size
        self.min_buffer_size = config.min_buffer_size
        self.batch_size = config.batch_size
        self.L2 = config.L2
        self.tau = config.TAU
        
        # For multi agent
        self.nO = self.num_agents * nS # Observation space
        self.env = env
        self.R = ReplayBuffer(config.buffer_size,config.batch_size,config.seed)

        # Instantiating Actor and Critic
        self.agents = [DDPG(self.seed,nA,nS,config.L2,0),DDPG(self.seed,nA,nS,config.L2,1)]

    def load_weights(self,critic_path,actor_path):
        # Load weigths from both
        [agent.load_weights(critic_path,actor_path) for agent in self.agents]
        
    def save_weights(self,critic_path,actor_path):
        # Save weights for both
        [agent.save_weights(critic_path,actor_path) for agent in self.agents]

    def seed_replay_buffer(self):
        obs = self.env.reset()
        while len(self.R) < self.min_buffer_size:
            for t in range(self.tmax):
                actions = ((np.random.rand(2,2)*2)-1)
                next_obs,rewards,dones = self.env.step(actions)
                # Store experience
                self.R.add(obs,actions,rewards,next_obs,dones)
                obs = next_obs

    def train(self):
        """
        We stack and store the stacks as observations for critic training, 
        but keep the states and next states seperate for actor actions.
        """
        tic = time.time()
        means = []
        stds = []
        steps = []
        scores_window = deque(maxlen=100)
        for e in range(1,self.episodes):

            self.noise.step()
            episode_scores = []
            net_hits = 0
            obs = self.env.reset()
            for t in range(self.tmax):
                actions = self.act(obs)
                next_obs,rewards,dones = self.env.step(actions)
                # Check rate of success
                if np.max(rewards) > 0:
                    net_hits += 1
                # Store experience
                self.R.add(obs,actions,rewards,next_obs,dones)
                # Score tracking
                if dones.any():
                    steps.append(int(t))
                episode_scores.append(np.max(rewards))
                obs = next_obs
            print('hit the ball over the net {} times'.format(net_hits))
            
            # Learn
            for _ in range(self.SGD_epoch):
                # Update each agent
                for i in range(self.num_agents):
                    self.learn(self.agents[i])
                # update target networks
                self.update_targets_all()
                
            means.append(np.mean(episode_scores))
            stds.append(np.std(episode_scores))
            scores_window.append(np.sum(episode_scores))
            if e % 4 == 0:
                toc = time.time()
                r_mean = np.mean(scores_window)
                r_max = max(scores_window)
                r_min = min(scores_window)
                r_std = np.std(scores_window)
                plot(self.name,means,stds)
                print("\rEpisode: {} out of {}, Steps {}, Mean steps {}, Rewards: mean {:.2f}, min {:.2f}, max {:.2f}, std {:.2f}, Elapsed {:.2f}".format(e,self.episodes,np.sum(steps),np.mean(steps),r_mean,r_min,r_max,r_std,(toc-tic)/60))
            if np.mean(scores_window) > self.winning_condition:
                print('Env solved!')
                # save scores
                pickle.dump([means,stds], open(str(self.name)+'_scores.p', 'wb'))
                # save policy
                self.save_weights(self.critic_path,self.actor_path)
                break
        self.env.close()
                
        
    def act(self,obs):
        # split states for each agent
        actions = [agent.act(obs[i]) for i,agent in enumerate(self.agents)]
        actions = np.vstack(actions)
        return actions

    def evaluate(self,state):
        # TODO fix
        # Evaluate the agent's performance
        rewards = []
        
        obs = env.reset()
        for i in range(400):
            action = maddpg.act(obs)
            next_obs,reward,done = env.step(action)
            obs = next_obs
            rewards.append(np.sum(rewards))
            if done:
                break
        self.env.close()
        print("The agent achieved an average score of {:.2f}".format(np.mean(rewards)))
        return action

    def target_act(self,next_states):
        target_actions = torch.from_numpy(np.vstack([agent.target_act(next_states[:,index,:].reshape(self.batch_size,1,24)) for index,agent in enumerate(self.agents)]).reshape(self.batch_size,2,2)).float().to(self.device)
        return target_actions

    def local_act(self,states):
        local_actions = torch.from_numpy(np.vstack([agent.act(states[:,index,:].reshape(self.batch_size,1,24)) for index,agent in enumerate(self.agents)]).reshape(self.batch_size,2,2)).float().to(self.device)
        return local_actions

    def learn(self,agent):
        # Get sample experiences
        obs,actions,rewards,next_obs,dones = self.R.sample()
        # Get target actions and target values
        agent.critic_optimizer.zero_grad()
        target_actions = self.target_act(next_obs)
        # stack actions and observations for single critic input
        target_critic_input = torch.cat((next_obs,target_actions),dim=-1).view(self.batch_size,52)
        with torch.no_grad():
            next_values = agent.target_critic(target_critic_input).detach().squeeze(1)

        target_y = rewards[:,agent.index] + self.gamma * next_values * (1-dones[:,agent.index])
        # stack actions and observations for single critic input
        local_critic_input = torch.cat((obs,actions),dim=-1).view(self.batch_size,52)
        current_y = agent.local_critic(local_critic_input)

        critic_loss = F.smooth_l1_loss(current_y,target_y)
        critic_loss.backward()
        agent.critic_optimizer.step()
        # Update actor
        agent.actor_optimizer.zero_grad()
        local_actions = self.local_act(obs)
        actor_critic_input = torch.cat((obs,local_actions),dim=-1).view(self.batch_size,52)
        actor_loss = -agent.local_critic(actor_critic_input).mean()
        actor_loss.backward()
        agent.actor_optimizer.step()

    def update_targets(self,index):
        MultiAgent.soft_update(self.local_critics[index],self.target_critics[index],self.tau)
        MultiAgent.soft_update(self.local_actors[index],self.target_actors[index],self.tau)

    def update_targets_all(self):
        for agent in self.agents:
            MultiAgent.soft_update(agent.local_critic,agent.target_critic,self.tau)
            MultiAgent.soft_update(agent.local_actor,agent.target_actor,self.tau)

    def multi_update_targets(self):
        [MultiAgent.soft_update(critic,target,self.tau) for critic,target in zip(self.local_critics,self.target_critics)]
        [MultiAgent.soft_update(actor,target,self.tau) for actor,target in zip(self.local_actors,self.target_actors)]

    @staticmethod
    def soft_update(source,target,tau):
        for param,target_param in zip(source.parameters(),target.parameters()):
            target_param.data.copy_(tau * target_param.data + (1-tau) * param.data)
loss = MaxSE()

y = closing_prices.copy()

# training
for i in range(5):
    e = []
    T = rolling_window(y.copy()[:1000], window)

    np.random.shuffle(T)
    for t in tqdm(T):
        t -= t.min()
        t /= t.max()
        t -= 0.5
        t *= 2
        e.append(dA_model.learn(t, loss, noise=GaussianNoise(0, 0.001)))

    print(np.mean(e))

# encoding
prices = [
    i.copy() for i in np.array_split(y,
                                     len(y) // window) if len(i) == window
]
p = []
T = []
r = []
for i in prices:
    i -= i.min()
    i /= i.max()
    i = 2 * (i - 0.5)
Beispiel #12
0
            f.config({"kernel": args.average_blur})
            image = f.filter(image)
        
        # median filter
        if args.median_blur != 0:
            f = MedianBlur()
            f.config({"kernel": args.median_blur})
            image = f.filter(image)

        # sp noise
        if args.sp_noise != 0:
            spnoiser = SPNoise()
            spnoiser.config({"percent": args.sp_noise})
            image = spnoiser.addNoise(image)

        # gaussian_noise
        if args.gaussian_noise != 0:
            gauss_noiser = GaussianNoise()
            gauss_noiser.config({"percent": args.gaussian_noise})
            image = gauss_noiser.addNoise(image)
        
        # poisson noise
        if args.poisson_noise:
            pn = PoissonNoise()
            image = pn.addNoise(image)

        new_filename = os.path.join(args.output_folder, basename)
        cv2.imwrite(new_filename, image)

    print("success!!!!")