Beispiel #1
0
    def learn(self):
        """ Train behavior model """
        for t in range(self.num_replays):
            states_t, actions, rewards, next_states_t, dones = self.replay_buffer.sample(
            )
            states_t = torch.stack(states_t).to(self.device)
            next_states_t = torch.stack(next_states_t).to(self.device)

            q_values = self.decision_model(states_t)
            q_values_next = self.policy_model(next_states_t)
            q_values_target = q_values.clone()

            for i in range(len(actions)):
                action = actions[i]
                q_values_target[i,
                                action] = utils.target(rewards[i], self.gamma,
                                                       q_values_next[i],
                                                       self.targetPolicy,
                                                       dones[i])

            self.optimizer.zero_grad()
            loss = self.criterion(q_values, q_values_target.detach())
            loss.backward()
            self.optimizer.step()

        utils.copy_model(self.decision_model, self.policy_model, tau=self.tau)
Beispiel #2
0
    def __init__(self, agent_dict={}, actor_dict={}, critic_dict={}):
        """ Initialize Agent object

        Params
        ======
            agent_dict(dict): dictionary containing parameters for agent
            actor_dict(dict): dictionary containing parameters for agents actor-model
            critic_dict(dict): dictionary containing parameters for agents critic-model
        """
        enable_cuda = agent_dict.get("enable_cuda", False)
        if enable_cuda:
            self.device = torch.device(
                "cuda:0" if torch.cuda.is_available() else "cpu")
        else:
            self.device = torch.device("cpu")

        self.num_agents = agent_dict.get("num_agents", 20)

        self.num_episodes = agent_dict.get("num_episodes", 10000)
        self.save_after = agent_dict.get("save_after", -1)
        self.name = agent_dict.get("name", "reacher")

        self.gamma = agent_dict.get("gamma", 0.9)

        self.tau = agent_dict.get("tau", 0.001)

        self.noise = utils.OUNoise((self.num_agents, 4), 0)

        self.num_replays = agent_dict.get("num_replays", 1)

        self.learning_rate_actor = agent_dict.get("learning_rate_actor", 1E-3)
        self.learning_rate_critic = agent_dict.get("learning_rate_critic",
                                                   1E-3)

        self.criterion = nn.MSELoss()

        memory_size = agent_dict.get("memory_size", 2**14)
        batchsize = agent_dict.get("batchsize", 2**10)
        replay_reg = agent_dict.get("replay_reg", 0.0)

        self.replay_buffer = utils.ReplayBuffer(memory_size, batchsize)

        self.actor = model.ActorModel(actor_dict).to(self.device)
        self.actor_target = model.ActorModel(actor_dict).to(self.device)

        self.critic = model.CriticModel(critic_dict).to(self.device)
        self.critic_target = model.CriticModel(critic_dict).to(self.device)

        self.actor_optimizer = optim.Adam(self.actor.parameters(),
                                          lr=self.learning_rate_actor)
        self.critic_optimizer = optim.Adam(self.critic.parameters(),
                                           lr=self.learning_rate_critic)

        utils.copy_model(self.actor, self.actor_target, tau=1.0)
        utils.copy_model(self.critic, self.critic_target, tau=1.0)

        seed = agent_dict.get("seed", 0)

        torch.manual_seed(seed)
        np.random.seed(seed)
Beispiel #3
0
    def learn(self):
        """ Train actor and critic based on past experiences """
        for t in range(self.num_replays):
            states_t, actions_t, rewards_t, next_states_t, dones_t = self.replay_buffer.sample(
                self.device)

            next_actions_t = self.actor_target(next_states_t)
            next_q_values_t = self.critic_target(next_states_t,
                                                 next_actions_t).detach()

            targets_t = rewards_t + (self.gamma * next_q_values_t *
                                     (1 - dones_t))

            q_values_t = self.critic(states_t, actions_t)

            self.critic_optimizer.zero_grad()
            critic_loss = self.criterion(q_values_t, targets_t)
            critic_loss.backward()
            self.critic_optimizer.step()

            proposed_actions_t = self.actor(states_t)
            proposed_q_values_t = self.critic(states_t, proposed_actions_t)

            self.actor_optimizer.zero_grad()
            actor_loss = -proposed_q_values_t.mean()
            actor_loss.backward()
            self.actor_optimizer.step()

            utils.copy_model(self.actor, self.actor_target, tau=self.tau)
            utils.copy_model(self.critic, self.critic_target, tau=self.tau)
Beispiel #4
0
    def run(self, env):
        """ Train agent in environment env

        Params
        ======
            env(Env): environment to train agent in
        """
        recent_scores = deque(maxlen=100)
        recent_losses = deque(maxlen=100)

        f = open("performance.log", "w")
        f.write("#Score\tAvg.Score\n")

        for e in range(self.num_episodes):
            scores = np.zeros(self.num_agents)
            states = env.reset()
            done = False

            self.noise.reset()

            while True:
                actions, states_t = self.act(states)

                next_states, rewards, dones, _ = env.step(actions)
                scores += rewards

                next_states_t = self.preprocess(next_states)

                for a in range(self.num_agents):
                    self.replay_buffer.append(states_t[a], actions[a],
                                              rewards[a], next_states_t[a],
                                              dones[a])

                self.learn()

                states = next_states

                if np.any(dones):
                    break

            recent_scores.append(np.mean(scores))
            print("Iteration %i: score: %f\taverage_score: %f" %
                  (e, np.mean(scores), np.mean(recent_scores)))

            f.write(
                str(np.mean(scores)) + "\t" + str(np.mean(recent_scores)) +
                "\n")

            f.flush()

            utils.copy_model(self.actor, self.actor_target, tau=self.tau)
            utils.copy_model(self.critic, self.critic_target, tau=self.tau)

            if e == self.save_after:
                self.save_state()

        f.close()
Beispiel #5
0
    def __init__(self, agent_dict={}, model_dict={}):
        """ Initialize Agent object

        Params
        ======
            agent_dict(dict): dictionary containing parameters for agent
            model_dict(dict): dictionary containing parameters for agents model
        """
        if agent_dict.get("enable_gpu", False):
            self.device = torch.device(
                "cuda:0" if torch.cuda.is_available() else "cpu")
        else:
            self.device = torch.device("cpu")
        self.num_episodes = agent_dict.get("num_episodes", 10000)
        self.save_after = agent_dict.get("save_after", -1)
        self.name = agent_dict.get("name", "banana_collector")

        self.gamma = agent_dict.get("gamma", 0.9)

        self.epsilon = agent_dict.get("epsilon_start", 1.0)
        self.epsilon_decay = agent_dict.get("epsilon_decay", 0.9)
        self.epsilon_min = agent_dict.get("epsilon_min", 0.1)

        self.tau = agent_dict.get("tau", 0.1)

        self.num_replays = agent_dict.get("num_replays", 1)

        self.criterion = nn.MSELoss()

        memory_size = agent_dict.get("memory_size", 2**14)
        batchsize = agent_dict.get("batchsize", 2**10)
        replay_reg = agent_dict.get("replay_reg", 0.0)

        self.replay_buffer = utils.PrioritizedReplayBuffer(memory_size,
                                                           batchsize,
                                                           epsilon=replay_reg)

        self.decision_model = model.Model(model_dict).to(self.device)
        self.policy_model = model.Model(model_dict).to(self.device)

        self.optimizer = optim.Adam(self.decision_model.parameters(), lr=1E-3)

        utils.copy_model(self.decision_model, self.policy_model, tau=1.0)
        seed = agent_dict.get("seed", 0)

        torch.manual_seed(seed)
        np.random.seed(seed)
Beispiel #6
0
 def emit_model_event(self, evtname, evtsrc, *data, **kwargs):
     loop = IOLoop.instance()
     if "force" in kwargs or utils.is_dirty(evtsrc):
         src = utils.copy_model(evtsrc)
         if isinstance(evtname, (list, tuple)):
             for evt in evtname:
                 loop.add_callback(self.emit, evt, src, *data)
         else:
             loop.add_callback(self.emit, evtname, src, *data)
Beispiel #7
0
    def trainDecisionModel(self, q_values, reward, action, q_values_next,
                           done):
        """ Train behavior model. Returns TD error and loss

        Params
        ======
            q_values(torch.Tensor): action values
            reward(float): observed reward
            action(int): chosen action
            q_vals_next(torch.Tensor): action values of next state
            done(bool): flag indicating terminal state
        """
        q_values_target, td_target = self.qValuesTarget(
            q_values, reward, action, q_values_next, done)

        self.optimizer.zero_grad()
        loss = self.criterion(q_values, q_values_target.detach())
        loss.backward()
        self.optimizer.step()
        utils.copy_model(self.decision_model, self.policy_model, tau=self.tau)

        td_error = (td_target - q_values[action].cpu()).item()

        return td_error, loss
Beispiel #8
0
def run_NNclassifier(params):

    if params.out_dir is not None:
        if not os.path.exists(params.out_dir): os.makedirs(params.out_dir)

    for key, value in params.__dict__.items():
        print str(key) + ': \t\t\t' + str(value)

    # load data: x: ntrials x ntimepoints x nfeatures, y: ntrials
    x = np.load(params.x_file)
    y = np.load(params.y_file)
    n_classes = len(np.unique(y))

    # get train, val and test sets
    n_folds = int(100 / params.test_pcnt)
    Train, Val, Test = utils.make_kcrossvalidation(x, y, n_folds, shuffle=True)
    Train, Val, Test, means, stds = utils.zscore_dataset(Train, Val, Test,
                                                         z_train=True, zscore_x=params.zscore, zscore_y=False)
    Train, Val, Test = utils.dim_check(Train, Val, Test, nn_type=params.nn_type, nn_dim=params.n_dim)

    # train model
    Models = []
    for kfold in range(n_folds):
        print "Fold " + str(kfold)
        NN = utils.make_NN(n_classes=n_classes, params=params)

        M = NNClassifier(NN, lr = params.lr, w_decay=params.w_decay)
        ktrain = utils.augment(Train[kfold], n_times=params.augment_times) if params.augment else Train[kfold]
        M.train(ktrain, Val[kfold], n_epochs=params.n_epochs, batch_size=params.batch_size,
                                                                                no_improve_lim=params.early_stop)
        M.test(Test[kfold])

        Models.append(utils.copy_model(M, copyall=params.save_weights))

    # save models
    pM = utils.concat_models(Models)
    if params.out_dir is not None: utils.save_model(pM, params.out_dir + '/model' + str(n_folds) + '.p')

    print "Total performance: " + \
        str(round(sum(pM.test_correct) / float(sum(pM.test_n)), 4)) + " (" +\
            str(sum(pM.test_correct)) + '/' + str(sum(pM.test_n)) + ")"
Beispiel #9
0
def column_generation(n, demands, capacity, distances, duals, MP_branch):

    SP_branch = SubProblem(n, demands, capacity, distances, duals)
    SP_branch.build_model()

    SP_branch.optimize()

    new_MP = None

    newAssing = [SP_branch.y[i].x for i in SP_branch.y]  # new route
    obj = get_min_dist(newAssing, distances)  # Cost of new route

    if obj + SP_branch.modelo.ObjVal < 0.0:
        newColumn = gp.Column(newAssing, MP_branch.modelo.getConstrs())
        MP_branch.modelo.addVar(vtype=GRB.BINARY, obj=obj, column=newColumn)
        MP_branch.modelo.update()
        MP_branch.RelaxOptimize()
        best_cost = MP_branch.getCosts()
        routes = MP_branch.modelo.getA().toarray()

        new_MP = copy_model(best_cost, routes, MP_branch)

    return new_MP
Beispiel #10
0
def branch_n_price(n, demands, capacity, distances, MasterProb):

    queue = PriorityQueue()
    MasterProb.RelaxOptimize()
    obj_val = MasterProb.relax_modelo.ObjVal

    queue.insert(obj_val, MasterProb)
    best_int_obj = 1e3
    best_relax_obj = 1e3

    nodes_explored = 0
    best_model = None

    while not queue.isEmpty():
        obj_val, MP_branch = queue.delete()
        nodes_explored += 1
        MP_branch.RelaxOptimize()
        solution = MP_branch.getSolution()
        duals = MP_branch.getDuals()

        branch_cost = MP_branch.getCosts()
        branch_routes = MP_branch.modelo.getA().toarray()
        sol_is_int = all([float(round(s, 4)).is_integer() for s in solution])
        # sol_is_int = all([False if i > 0.3 and np.abs(i - 1.0) > 0.3 else True for i in solution ])

        if obj_val < best_int_obj and sol_is_int:
            print(f"Best Integer Obj: {obj_val}")
            print(f"Nodes explored: {nodes_explored}")
            best_int_obj = obj_val

            # print(f"best sol: {solution}")
            best_model = copy_model(branch_cost, branch_routes, MP_branch)

        if obj_val < best_relax_obj:
            print(f"Best Relaxed Obj: {obj_val}")
            print(f"Nodes explored: {nodes_explored}")
            best_relax_obj = obj_val

        # --- # --- # Column generation # --- # --- #
        new_MP = column_generation(n, demands, capacity, distances, duals,
                                   MP_branch)

        if new_MP != None:
            new_MP.RelaxOptimize()
            branch_cost = new_MP.getCosts()
            branch_routes = new_MP.modelo.getA().toarray()
            if new_MP.relax_modelo.ObjVal <= best_relax_obj:
                queue.insert(new_MP.relax_modelo.ObjVal,
                             copy_model(branch_cost, branch_routes, new_MP))

        else:
            # --- # If stopped col generation then branch if solution is not integer # --- #

            if not sol_is_int:

                # print("#--#--#--# Not integer solution  ........Branching")
                queue = branch(branch_cost, branch_routes, n, demands,
                               capacity, distances, duals, solution, MP_branch,
                               queue, best_relax_obj)
            else:
                # print(f"best sol: {solution}")
                best_model = MP_branch

    return best_model
Beispiel #11
0
def branch(branch_cost, branch_routes, n, demands, capacity, distances, duals,
           solution_to_branch, MP_to_copy, queue, best_inc_obj):

    frac_ixs = []

    for ix, val in enumerate(solution_to_branch):
        if val > 0.0 and val < 1.0:
            frac_ixs.append(ix)

    A_mp = MP_to_copy.modelo.getA().toarray()

    locations_index = list(MP_to_copy.locations_index)

    for comb in combinations(frac_ixs, 2):

        SP_1 = SubProblem(n, demands, capacity, distances, duals)
        SP_2 = SubProblem(n, demands, capacity, distances, duals)
        SP_1.build_model()
        SP_2.build_model()

        s1_and_s2 = [
            True if (A_mp[i - 1, comb[0]] == 1 and A_mp[i - 1, comb[1]] == 1)
            else False for i in range(len(MP_to_copy.locations_index))
        ]
        s1_not_s2 = [
            True if (A_mp[i - 1, comb[0]] == 1 and A_mp[i - 1, comb[1]] == 0)
            else False for i in range(len(MP_to_copy.locations_index))
        ]

        for i in locations_index:
            locations_prime = [x for x in locations_index if x != i]
            for j in locations_prime:

                if (s1_and_s2[i - 1] and s1_not_s2[j - 1]):
                    # SP_1.modelo.addConstr(SP_1.y[i - 1] + SP_1.y[j - 1] == 2)
                    # SP_2.modelo.addConstr(SP_2.y[i - 1] + SP_2.y[j - 1] == 1)
                    SP_1.modelo.addConstr(SP_1.y[i - 1] == 1)
                    SP_1.modelo.addConstr(SP_1.y[j - 1] == 1)
                    SP_2.modelo.addConstr(SP_2.y[i - 1] == 1)
                    SP_2.modelo.addConstr(SP_2.y[j - 1] == 0)

        MP_1, MP_2 = copy_models(branch_cost, branch_routes, MP_to_copy)

        SP_1.modelo.update()
        SP_1.optimize()
        if SP_1.modelo.Status == 2:

            newAssing = [SP_1.y[i].x for i in SP_1.y]  # new Assingment
            obj = get_min_dist(newAssing, distances)  # Cost of new route

            if obj + SP_1.modelo.ObjVal < 0.0:
                newColumn = gp.Column(newAssing, MP_1.modelo.getConstrs())
                MP_1.modelo.addVar(vtype=GRB.BINARY, obj=obj, column=newColumn)
                MP_1.modelo.update()

                MP_1.RelaxOptimize()
                mp1_cost = MP_1.getCosts()
                mp1_routes = MP_1.modelo.getA().toarray()
                if MP_1.relax_modelo.ObjVal <= best_inc_obj:
                    queue.insert(MP_1.relax_modelo.ObjVal,
                                 copy_model(mp1_cost, mp1_routes, MP_1))

        SP_2.modelo.update()
        SP_2.optimize()
        if SP_2.modelo.Status == 2:

            newAssing = [SP_2.y[i].x for i in SP_2.y]  # new Assingment
            obj = get_min_dist(newAssing, distances)  # Cost of new route

            if obj + SP_2.modelo.ObjVal < 0.0:

                newColumn = gp.Column(newAssing, MP_2.modelo.getConstrs())
                MP_2.modelo.addVar(vtype=GRB.BINARY, obj=obj, column=newColumn)
                MP_2.modelo.update()
                MP_2.RelaxOptimize()
                mp2_cost = MP_2.getCosts()
                mp2_routes = MP_2.modelo.getA().toarray()
                if MP_2.relax_modelo.ObjVal <= best_inc_obj:
                    queue.insert(MP_2.relax_modelo.ObjVal,
                                 copy_model(mp2_cost, mp2_routes, MP_2))

    return queue
Beispiel #12
0
def main(_):
    """
    main function
    """

    dataset = data_manager.DataManager(init_data=FLAGS.allow_init_data)
    model_dir = '../../runs/bag/{}'.format(FLAGS.model)

    settings = NETwork.Settings()
    settings.vocab_size = len(dataset.wordembedding)
    settings.num_classes = len(dataset.train_y[0])
    settings.filter_sizes = list(map(int, FLAGS.filter_sizes.split(',')))
    settings.pattern_num = FLAGS.pattern_num
    settings.l2_reg_omega = FLAGS.l2_reg_omega

    with tf.Graph().as_default():
        #gpu_options = tf.GPUOptions(allow_growth=True, per_process_gpu_memory_fraction=0.4)
        gpu_options = tf.GPUOptions(allow_growth=True)
        session_conf = tf.ConfigProto(
            allow_soft_placement=FLAGS.allow_soft_placement,
            log_device_placement=FLAGS.log_device_placement,
            gpu_options=gpu_options)
        sess = tf.Session(config=session_conf)
        with sess.as_default():

            # Output directory for models and summaries
            # timestamp = str(int(time.time()))
            timestamp = FLAGS.model
            out_dir = os.path.abspath(
                os.path.join(os.path.pardir, os.path.pardir + '/runs/bag',
                             timestamp))

            print('Construct network for train......')
            network = NETwork.CNN(word_embeddings=dataset.wordembedding,
                                  settings=settings,
                                  is_training=True,
                                  is_evaluating=False,
                                  use_types=FLAGS.use_types)

            # Get Evaluator for evaluation
            if FLAGS.allow_evaluation:
                print('Construct network for evaluation......')
                e_network = NETwork.CNN(word_embeddings=dataset.wordembedding,
                                        settings=settings,
                                        is_training=True,
                                        is_evaluating=True,
                                        use_types=FLAGS.use_types)
                lastest_score = utils.read_pr(out_dir)
                evaluator = evaluation.Evaluator(dataset, sess, e_network,
                                                 model_dir, settings,
                                                 lastest_score)

            # Define training procedure
            global_step = tf.Variable(0, name='global_step', trainable=False)
            optimizer = tf.train.AdamOptimizer(0.001)
            grads_and_vars = optimizer.compute_gradients(network.final_loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

            # Summaries for loss and accuracy
            loss_summary = tf.summary.scalar('loss', network.final_loss)
            acc_summary = tf.summary.scalar('accuracy', network.accuracy)
            pr_summary = tf.summary.scalar('pr_curve', evaluator.highest_score)

            # Train summaries
            train_summary_op = tf.summary.merge_all()
            train_summary_dir = os.path.join(out_dir, 'summaries', 'train')
            train_summary_writer = tf.summary.FileWriter(
                train_summary_dir, sess.graph)

            # Checkpoint derectory, tensorflow assumes this directory already exists so we need to create it
            checkpoint_dir = os.path.abspath(
                os.path.join(out_dir, 'checkpoints'))
            checkpoint_prefix = os.path.join(checkpoint_dir, 'model')
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.global_variables(),
                                   max_to_keep=FLAGS.num_checkpoints)

            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            # train_step
            def train_step(word_batch, pos1_batch, pos2_batch, type_batch,
                           y_batch, mask_batch):
                """
                A single training step
                """
                total_word = []
                total_pos1 = []
                total_pos2 = []
                total_type = []
                total_shape = []
                total_mask = []
                total_num = 0

                for i in range(len(word_batch)):
                    total_shape.append(total_num)
                    total_num += len(word_batch[i])

                    for j in range(len(word_batch[i])):
                        total_word.append(word_batch[i][j])
                        total_pos1.append(pos1_batch[i][j])
                        total_pos2.append(pos2_batch[i][j])
                        total_type.append(type_batch[i][j])
                        total_mask.append(mask_batch[i][j])

                # Here total_word and y_batch are not equal, total_word[total_shape[i]:total_shape[i+1]] is related to y_batch[i]
                total_shape.append(total_num)

                total_shape = np.array(total_shape)
                total_word = np.array(total_word)
                total_pos1 = np.array(total_pos1)
                total_pos2 = np.array(total_pos2)
                total_type = np.array(total_type)
                total_mask = np.array(total_mask)

                feed_dict = {
                    network.input_word: total_word,
                    network.input_pos1: total_pos1,
                    network.input_pos2: total_pos2,
                    network.input_type: total_type,
                    network.input_y: y_batch,
                    network.total_shape: total_shape,
                    network.dropout_keep_prob: FLAGS.dropout_keep_prob,
                    # network.input_mask: total_mask
                }

                _, step, summaries, loss, accuracy = sess.run([
                    train_op, global_step, train_summary_op,
                    network.final_loss, network.accuracy
                ], feed_dict)
                train_summary_writer.add_summary(summaries, step)
                """
                if step % 100 == 0:
                    time_str = datetime.datetime.now().isoformat()
                    print('{}: step {}, loss {:g}, acc {:g}'.format(time_str, step, loss, accuracy))
                """

            """
            Train epochs
            """
            print('Start training......')
            for epoch in range(FLAGS.num_epochs):
                # Randomly shuffle data
                shuffle_indices = np.random.permutation(
                    np.arange(len(dataset.train_y)))
                num_batches_per_epoch = int(
                    (len(dataset.train_y) - 1) / settings.batch_size) + 1
                #num_batches_per_epoch = int(len(shuffle_indices)/float(settings.batch_size))

                epoch_last_step = 0
                for batch_num in range(num_batches_per_epoch):
                    start_index = batch_num * settings.batch_size
                    end_index = min((batch_num + 1) * settings.batch_size,
                                    len(dataset.train_y))
                    if (end_index - start_index) != settings.batch_size:
                        start_index = end_index - settings.batch_size
                    batch_index = shuffle_indices[start_index:end_index]

                    word_batch = dataset.train_word[batch_index]
                    pos1_batch = dataset.train_pos1[batch_index]
                    pos2_batch = dataset.train_pos2[batch_index]
                    type_batch = dataset.train_type[batch_index]
                    mask_batch = dataset.train_mask[batch_index]
                    y_batch = dataset.train_y[batch_index]

                    train_step(word_batch, pos1_batch, pos2_batch, type_batch,
                               y_batch, mask_batch)

                if epoch % FLAGS.checkpoint_every == 0:
                    epoch_last_step = tf.train.global_step(sess, global_step)
                    path = saver.save(sess,
                                      checkpoint_prefix,
                                      global_step=epoch_last_step)
                    print(
                        'Epoch {} batch {} Saved model checkpoint to {}, pattern_num {}'
                        .format(epoch, batch_num, path, FLAGS.pattern_num))

                if FLAGS.allow_evaluation and epoch % FLAGS.evaluate_every == 0:
                    new_highest = evaluator.test()
                    print(
                        'Best precision recall area now is {}, progress: {}\n'.
                        format(
                            evaluator.highest_score,
                            utils.calculate_progress(epoch,
                                                     FLAGS.pattern_num)))
                    if new_highest:
                        utils.copy_model(out_dir, epoch_last_step)
                        utils.store_pr(out_dir, evaluator.highest_score)

            print('final best precision recall: {} pattern_num: {}\n'.format(
                evaluator.highest_score, FLAGS.pattern_num))