def __init__(self,
                 env,
                 optim=Adam,
                 policy_lr=0.001,
                 value_lr=0.001,
                 policy_hidden_size=[32],
                 value_hidden_size=[32],
                 gamma=0.95,
                 policy_lambda=0.9,
                 value_lambda=0.9,
                 batch_size=5000,
                 epochs=50,
                 update_every=50,
                 render=False):
        self.env = env
        self.batch_size = batch_size
        self.render = render
        self.epochs = epochs
        self.gamma = gamma
        self.policy_lambda = policy_lambda
        self.value_lambda = value_lambda
        self.update_every = update_every

        obs_size = np.prod(env.observation_space.shape)
        action_size = env.action_space.n
        self.policy_mlp = CategoricalMLP([obs_size] + policy_hidden_size +
                                         [action_size])
        self.policy_optim = optim(self.policy_mlp.parameters(), lr=policy_lr)
        self.value_mlp = MLP([obs_size] + value_hidden_size + [1])
        self.value_optim = optim(self.value_mlp.parameters(), lr=value_lr)
Ejemplo n.º 2
0
    def __init__(self,
                 env,
                 optim=Adam,
                 policy_lr=0.001,
                 value_lr=0.001,
                 policy_hidden_size=[64],
                 value_hidden_size=[64],
                 gamma=0.9,
                 batch_size=5000,
                 epochs=50,
                 update_every=50,
                 render=False):
        self.env = env
        self.batch_size = batch_size
        self.render = render
        self.epochs = epochs
        self.gamma = gamma
        self.update_every = update_every
        self.writer_count = 0

        obs_size = np.prod(env.observation_space.shape)
        action_size = env.action_space.shape[0]
        action_limit = env.action_space.high
        self.policy_mlp = GaussianMLP([obs_size] + policy_hidden_size +
                                      [action_size], action_limit)
        self.policy_optim = optim(self.policy_mlp.parameters(), lr=policy_lr)
        self.value_mlp = MLP([obs_size] + value_hidden_size + [1])
        self.value_optim = optim(self.value_mlp.parameters(), lr=value_lr)
Ejemplo n.º 3
0
    def __init__(self,
                 env,
                 optim=Adam,
                 lr=0.01,
                 hidden_size=[64],
                 batch_size=5000,
                 n_episodes=2000,
                 render=False):
        self.env = env
        self.batch_size = batch_size
        self.n_episodes = n_episodes
        self.lr = lr
        self.render = render

        obs_size = np.prod(env.observation_space.shape)
        action_size = env.action_space.n
        self.mlp = MLP([obs_size] + hidden_size + [action_size])
        self.optim = optim(self.mlp.parameters(), lr=lr)
Ejemplo n.º 4
0
    def __init__(self,
                 env,
                 optim=Adam,
                 policy_lr=0.01,
                 value_lr=0.1,
                 policy_hidden_size=[32],
                 value_hidden_size=[32],
                 batch_size=5000,
                 render=False):
        self.env = env
        self.batch_size = batch_size
        self.render = render

        obs_size = np.prod(env.observation_space.shape)
        action_size = env.action_space.n
        self.policy_mlp = MLP([obs_size] + policy_hidden_size + [action_size])
        self.policy_optim = optim(self.policy_mlp.parameters(), lr=policy_lr)
        self.value_mlp = MLP([obs_size] + value_hidden_size + [1])
        self.value_optim = optim(self.value_mlp.parameters(), lr=value_lr)
Ejemplo n.º 5
0
 def __init__(self):
     """ Virtually private constructor. """
     if Ensembling.__instance != None:
         raise Exception("This class is a singleton!")
     else:
         Ensembling.__instance = self
     # for root, dirs, files in os.walk("D:\\MSc\\Chat Parser Script\\models\\ensemble"):
     #   for foldername in dirs:
     #     model = tf.keras.models.load_model("D:\\MSc\\Chat Parser Script\\models\\ensemble\\" + foldername, compile=False)
     #     self.models[foldername] = model
     self.svmInstance = SVM.getInstance()
     self.mlpInstance = MLP.getInstance()
     self.tensorflowNNInstance = TensorflowNN.getInstance()
     self.naiveBayesInstance = NaiveBayes.getInstance()
 def __init__(self):
     self.svmInstance = SVM.getInstance()
     self.tensorflowNNInstance = TensorflowNN.getInstance()
     self.mlpInstance = MLP.getInstance()
     self.naiveBayesInstance = NaiveBayes.getInstance()
     self.ensemblingInstance = Ensembling.getInstance()
Ejemplo n.º 7
0
class ActorCriticContinuous:
    def __init__(self,
                 env,
                 optim=Adam,
                 policy_lr=0.001,
                 value_lr=0.001,
                 policy_hidden_size=[64],
                 value_hidden_size=[64],
                 gamma=0.9,
                 batch_size=5000,
                 epochs=50,
                 update_every=50,
                 render=False):
        self.env = env
        self.batch_size = batch_size
        self.render = render
        self.epochs = epochs
        self.gamma = gamma
        self.update_every = update_every
        self.writer_count = 0

        obs_size = np.prod(env.observation_space.shape)
        action_size = env.action_space.shape[0]
        action_limit = env.action_space.high
        self.policy_mlp = GaussianMLP([obs_size] + policy_hidden_size +
                                      [action_size], action_limit)
        self.policy_optim = optim(self.policy_mlp.parameters(), lr=policy_lr)
        self.value_mlp = MLP([obs_size] + value_hidden_size + [1])
        self.value_optim = optim(self.value_mlp.parameters(), lr=value_lr)

    def train(self):
        for epoch in range(self.epochs):
            returns, lens = self.train_single_batch(render=self.render)
            print("Epoch %2d, Return: %5.1f, Length: %3d" %
                  (epoch, np.mean(returns), np.mean(lens)))

    def train_single_batch(self, render=False):
        group_data = []
        batch_returns = []
        batch_lens = []
        episode_rewards = []

        done = False
        obs = self.env.reset()
        I_val = 1

        first_episode_render = True
        for t in range(self.batch_size):
            if render and first_episode_render:
                self.env.render()

            curr_obs = obs
            action = self.get_action(torch.as_tensor(obs, dtype=torch.float32))
            clamped_action = action.clamp(self.env.action_space.low.min(),
                                          self.env.action_space.high.max())
            obs, reward, done, _ = self.env.step(
                clamped_action.detach().numpy())
            episode_rewards.append(reward)
            group_data.append((curr_obs, action, reward, obs, done, I_val))

            I_val *= self.gamma

            if t > 0 and t % self.update_every == 0:
                (error, value_loss, policy_loss, value_grad,
                 policy_grad) = 0, 0, 0, 0, 0
                for data in group_data:
                    (error, value_loss, policy_loss, value_grad,
                     policy_grad) = self.update(data)
                self.writer_count += 1
                group_data = []

            if done:
                ep_return, ep_len = sum(episode_rewards), len(episode_rewards)
                batch_returns.append(ep_return)
                batch_lens.append(ep_len)

                episode_rewards = []
                obs, done = self.env.reset(), False
                I_val = 1

                first_episode_render = False

        return batch_returns, batch_lens

    def update(self, data):
        obs, action, reward, next_obs, done, I_val = data
        obs = torch.as_tensor([obs], dtype=torch.float32)
        next_obs = torch.as_tensor([next_obs], dtype=torch.float32)
        action = torch.as_tensor([action], dtype=torch.float32)
        reward = torch.as_tensor(reward, dtype=torch.float32)

        error = self.get_value_error(obs, next_obs, reward, done)

        self.value_optim.zero_grad()
        value_loss = self.value_update(obs, error)
        value_loss.backward()
        # nn.utils.clip_grad_norm_(self.value_mlp.parameters(), 0.5)
        self.value_optim.step()

        self.policy_optim.zero_grad()
        policy_loss = self.policy_update(obs, action, error, I_val)
        policy_loss.backward()
        # nn.utils.clip_grad_norm_(self.policy_mlp.parameters(), 0.5)
        self.policy_optim.step()

        value_grad_sum = 0
        policy_grad_sum = 0
        for p in self.value_mlp.parameters():
            value_grad_sum += p.sum()
        for p in self.policy_mlp.parameters():
            policy_grad_sum += p.sum()

        return error, value_loss, policy_loss, value_grad_sum, policy_grad_sum

    def policy(self, obs):
        mlp_out = self.policy_mlp(obs)
        return Categorical(logits=mlp_out)

    def get_action(self, obs):
        policy_dist = self.policy_mlp(obs)
        action = policy_dist.rsample()
        return action

    def policy_update(self, obs, action, error, I):
        policy_dist = self.policy_mlp(obs)
        log_proba = policy_dist.log_prob(action).mean()
        return -(error * I * log_proba)

    def state_value(self, obs):
        mlp_out = self.value_mlp(obs)
        return mlp_out

    def value_update(self, obs, error):
        value = self.state_value(obs)
        return -(error * value)

    def get_value_error(self, obs, next_obs, reward, done):
        value = self.state_value(obs).clone().detach()
        next_value = 0 if done else self.state_value(next_obs).detach().clone()

        return (reward + self.gamma * next_value - value)
class ActorCriticEligibilityTrace:
    def __init__(self,
                 env,
                 optim=Adam,
                 policy_lr=0.001,
                 value_lr=0.001,
                 policy_hidden_size=[32],
                 value_hidden_size=[32],
                 gamma=0.95,
                 policy_lambda=0.9,
                 value_lambda=0.9,
                 batch_size=5000,
                 epochs=50,
                 update_every=50,
                 render=False):
        self.env = env
        self.batch_size = batch_size
        self.render = render
        self.epochs = epochs
        self.gamma = gamma
        self.policy_lambda = policy_lambda
        self.value_lambda = value_lambda
        self.update_every = update_every

        obs_size = np.prod(env.observation_space.shape)
        action_size = env.action_space.n
        self.policy_mlp = CategoricalMLP([obs_size] + policy_hidden_size +
                                         [action_size])
        self.policy_optim = optim(self.policy_mlp.parameters(), lr=policy_lr)
        self.value_mlp = MLP([obs_size] + value_hidden_size + [1])
        self.value_optim = optim(self.value_mlp.parameters(), lr=value_lr)

    def train(self):
        for epoch in range(self.epochs):
            returns, lens = self.train_single_batch(render=self.render)
            print("Epoch %2d, Return: %5.1f, Length: %3d" %
                  (epoch, np.mean(returns), np.mean(lens)))

    def train_single_batch(self, render=False):
        batch_returns = []
        batch_lens = []
        episode_rewards = []

        done = False
        obs = self.env.reset()
        I_val = 1
        self.policy_trace = self.create_trace(self.policy_mlp)
        self.value_trace = self.create_trace(self.value_mlp)

        first_episode_render = True
        for t in range(self.batch_size):
            if render and first_episode_render:
                self.env.render()

            curr_obs = obs
            action, log_prob = self.policy_mlp(
                torch.as_tensor(obs, dtype=torch.float32))
            obs, reward, done, _ = self.env.step(action.detach().numpy())
            episode_rewards.append(reward)
            self.update((curr_obs, action, log_prob, reward, obs, done, I_val))

            I_val *= self.gamma

            if done:
                ep_return, ep_len = sum(episode_rewards), len(episode_rewards)
                batch_returns.append(ep_return)
                batch_lens.append(ep_len)

                episode_rewards = []
                obs, done = self.env.reset(), False
                I_val = 1

                first_episode_render = False

        return batch_returns, batch_lens

    def update(self, data):
        obs, action, log_prob, reward, next_obs, done, I_val = data
        obs = torch.as_tensor([obs], dtype=torch.float32)
        next_obs = torch.as_tensor([next_obs], dtype=torch.float32)
        action = torch.as_tensor(action, dtype=torch.float32)
        reward = torch.as_tensor(reward, dtype=torch.float32)

        error = self.get_value_error(obs, next_obs, reward, done)

        self.value_optim.zero_grad()
        self.value_set_grad(obs, error)
        self.value_optim.step()

        self.policy_optim.zero_grad()
        self.policy_set_grad(obs, action, log_prob, error, I_val)
        self.policy_optim.step()

    def policy_set_grad(self, obs, action, log_prob, error, I):
        log_prob.backward()
        for i, p in enumerate(self.policy_mlp.parameters()):
            self.policy_trace[i] = (
                self.gamma * self.policy_lambda * self.policy_trace[i] +
                I * p.grad)
            p.grad = -(error * self.policy_trace[i])

    def state_value(self, obs):
        mlp_out = self.value_mlp(obs)
        return mlp_out

    def value_set_grad(self, obs, error):
        value = self.state_value(obs)
        value.backward()
        for i, p in enumerate(self.value_mlp.parameters()):
            self.value_trace[i] = (
                self.gamma * self.value_lambda * self.value_trace[i] + p.grad)
            p.grad = -(error * self.value_trace[i])

    def get_value_error(self, obs, next_obs, reward, done):
        value = self.state_value(obs).clone().detach()
        next_value = 0 if done else self.state_value(next_obs).clone().detach()

        return (reward + self.gamma * next_value - value).item()

    def create_trace(self, model):
        trace = []
        for p in model.parameters():
            trace.append(torch.zeros(p.shape))
        return trace
Ejemplo n.º 9
0
class PolicyGradient:
    def __init__(self,
                 env,
                 optim=Adam,
                 lr=0.01,
                 hidden_size=[64],
                 batch_size=5000,
                 n_episodes=2000,
                 render=False):
        self.env = env
        self.batch_size = batch_size
        self.n_episodes = n_episodes
        self.lr = lr
        self.render = render

        obs_size = np.prod(env.observation_space.shape)
        action_size = env.action_space.n
        self.mlp = MLP([obs_size] + hidden_size + [action_size])
        self.optim = optim(self.mlp.parameters(), lr=lr)

    def train(self):
        for epoch in range(50):
            render = False
            if self.render:
                render = True if epoch % 5 == 0 else False

            loss, returns, lens = self.train_single_batch(render=render)
            print("Epoch %2d, Loss %5.1f, Return: %5.1f, Length: %3d" %
                  (epoch, loss.item(), np.mean(returns), np.mean(lens)))

    def train_single_batch(self, render=False):
        timestep = 0

        batch_obss = []
        batch_actions = []
        batch_weights = []
        batch_returns = []
        batch_lens = []
        episode_rewards = []

        done = False
        obs = self.env.reset()

        first_episode_render = True
        while True:

            if render and first_episode_render:
                self.env.render()

            batch_obss.append(obs)
            action = self.get_action(torch.as_tensor(obs, dtype=torch.float32))
            obs, reward, done, _ = self.env.step(action)
            batch_actions.append(action)
            episode_rewards.append(reward)

            timestep += 1
            if done:
                episode_return = sum(episode_rewards)
                episode_len = len(episode_rewards)
                batch_returns.append(episode_return)
                batch_lens.append(episode_len)

                batch_weights += [
                    sum(episode_rewards[i:])
                    for i, _ in enumerate(episode_rewards)
                ]

                first_episode_render = False
                obs, done, episode_rewards = self.env.reset(), False, []

                if len(batch_obss) > self.batch_size:
                    break

        self.optim.zero_grad()
        batch_loss = self.policy_update(
            torch.as_tensor(batch_obss, dtype=torch.float32),
            torch.as_tensor(batch_actions, dtype=torch.float32),
            torch.as_tensor(batch_weights, dtype=torch.float32))
        batch_loss.backward()
        self.optim.step()

        return batch_loss, batch_returns, batch_lens

    def policy(self, obs):
        mlp_out = self.mlp(obs)
        return Categorical(logits=mlp_out)

    def get_action(self, obs):
        policy_dist = self.policy(obs)
        action = policy_dist.sample().item()
        return action

    def policy_update(self, obs, actions, returns):
        policy_dist = self.policy(obs)
        log_proba = policy_dist.log_prob(actions)
        return -(returns * log_proba).mean()
Ejemplo n.º 10
0
if "NRF" in list(data):
    data.pop("NRF")
if "POSTCR" in list(data):
    data.pop("POSTCR")
if "OpID" in list(data):
    data.pop("OpID")
if "PatID" in list(data):
    data.pop("PatID")
if "DOA" in list(data):
    data.pop("DOA")

y = data.pop("HAEMOFIL")

ros = RandomOverSampler(random_state=1)

scaler = scale()
scaler.fit(data)

print("full")
param = {'layers':[2,5], 'nodes':[5,10], 'dropout':[0.4,0.8], 'epochs':[50]}
gsearch = GridSearchCV(estimator = MLP(),
                      param_grid = param, scoring='roc_auc', iid=False, cv=rkf_search, verbose=2)


gsearch.fit(scaler.transform(data.values), y.values)
clf = gsearch.best_estimator_
pd.DataFrame(gsearch.cv_results_).to_csv("output/HF/MLPfull.csv")

output = cross_validate(clf, scaler.transform(data.values), y.values, scoring=metrics,cv=rkf, verbose=2,return_train_score=True)
pd.DataFrame(output).to_csv('output/HF/performanceMLPfull.csv')
Ejemplo n.º 11
0
class ActorCritic:
    def __init__(self,
                 env,
                 optim=Adam,
                 policy_lr=0.001,
                 value_lr=0.001,
                 policy_hidden_size=[32],
                 value_hidden_size=[32],
                 gamma=0.9,
                 batch_size=5000,
                 epochs=50,
                 update_every=50,
                 render=False):
        self.env = env
        self.batch_size = batch_size
        self.render = render
        self.epochs = epochs
        self.gamma = gamma
        self.update_every = update_every

        obs_size = np.prod(env.observation_space.shape)
        action_size = env.action_space.n
        self.policy_mlp = MLP([obs_size] + policy_hidden_size + [action_size])
        self.policy_optim = optim(self.policy_mlp.parameters(), lr=policy_lr)
        self.value_mlp = MLP([obs_size] + value_hidden_size + [1])
        self.value_optim = optim(self.value_mlp.parameters(), lr=value_lr)

    def train(self):
        for epoch in range(self.epochs):
            render = False
            if self.render:
                render = True if epoch % 5 == 0 else False

            returns, lens = self.train_single_batch(render=render)
            print("Epoch %2d, Return: %5.1f, Length: %3d" %
                  (epoch, np.mean(returns), np.mean(lens)))

    def train_single_batch(self, render=False):
        group_data = []
        batch_returns = []
        batch_lens = []
        episode_rewards = []

        done = False
        obs = self.env.reset()
        I_val = 1

        first_episode_render = True
        for t in range(self.batch_size):
            if render and first_episode_render:
                self.env.render()

            curr_obs = obs
            action = self.get_action(torch.as_tensor(obs, dtype=torch.float32))
            obs, reward, done, _ = self.env.step(action)
            episode_rewards.append(reward)
            group_data.append((curr_obs, action, reward, obs, done, I_val))

            I_val *= self.gamma

            if t > 0 and t % self.update_every == 0:
                for data in group_data:
                    self.update(data)
                group_data = []

            if done:
                ep_return, ep_len = sum(episode_rewards), len(episode_rewards)
                batch_returns.append(ep_return)
                batch_lens.append(ep_len)

                episode_rewards = []
                obs, done = self.env.reset(), False
                I_val = 1

                first_episode_render = False

        return batch_returns, batch_lens

    def update(self, data):
        obs, action, reward, next_obs, done, I_val = data
        obs = torch.as_tensor([obs], dtype=torch.float32)
        next_obs = torch.as_tensor([next_obs], dtype=torch.float32)
        action = torch.as_tensor([action], dtype=torch.float32)
        reward = torch.as_tensor(reward, dtype=torch.float32)

        error = self.get_value_error(obs, next_obs, reward, done)

        self.value_optim.zero_grad()
        value_loss = self.value_update(obs, error)
        value_loss.backward()
        self.value_optim.step()

        self.policy_optim.zero_grad()
        policy_loss = self.policy_update(obs, action, error, I_val)
        policy_loss.backward()
        self.policy_optim.step()

    def policy(self, obs):
        mlp_out = self.policy_mlp(obs)
        return Categorical(logits=mlp_out)

    def get_action(self, obs):
        policy_dist = self.policy(obs)
        action = policy_dist.sample().item()
        return action

    def policy_update(self, obs, action, error, I):
        policy_dist = self.policy(obs)
        log_proba = policy_dist.log_prob(action)
        print(-(error * I * log_proba))
        return -(error * I * log_proba)

    def state_value(self, obs):
        mlp_out = self.value_mlp(obs)
        return mlp_out

    def value_update(self, obs, error):
        value = self.state_value(obs)
        return -(error * value)

    def get_value_error(self, obs, next_obs, reward, done):
        value = self.state_value(obs).clone().detach()
        next_value = 0 if done else self.state_value(next_obs).clone().detach()

        return (reward + self.gamma * next_value - value)