Example #1
0
    def test_all_available(self):
        agent_data, final_count, roles = get_data("all_available")

        agents = get_agent(agent_data, "all_available", roles)
        agents_count = self.count_available(agents, roles)

        self.assertEqual(agents_count, final_count)
Example #2
0
    def test_least_busy(self):
        agent_data, time_available, roles = get_data("least_busy")

        agents = get_agent(agent_data, "least_busy", roles)
        agents_count = self.count_available(agents, roles)

        self.assertEqual(agents_count, 1)
        self.assertEqual(agents[0]['available_since'], time_available)
Example #3
0
def main():
    # create experiment config
    config = get_config('pqnet')('train')

    # create network and training agent
    tr_agent = get_agent(config)

    # load from checkpoint if provided
    if config.cont:
        tr_agent.load_ckpt(config.ckpt)

    # create dataloader
    train_loader = get_dataloader('train', config)
    val_loader = get_dataloader('val', config)
    val_loader = cycle(val_loader)

    # start training
    clock = tr_agent.clock

    for e in range(clock.epoch, config.nr_epochs):
        # begin iteration
        pbar = tqdm(train_loader)
        for b, data in enumerate(pbar):
            # train step
            outputs, losses = tr_agent.train_func(data)

            # visualize
            if config.vis and clock.step % config.vis_frequency == 0:
                tr_agent.visualize_batch(data, 'train', outputs=outputs)

            pbar.set_description("EPOCH[{}][{}]".format(e, b))
            pbar.set_postfix(
                OrderedDict({k: v.item()
                             for k, v in losses.items()}))

            # validation step
            if clock.step % config.val_frequency == 0:
                data = next(val_loader)
                outputs, losses = tr_agent.val_func(data)

                if config.vis and clock.step % config.vis_frequency == 0:
                    tr_agent.visualize_batch(data,
                                             'validation',
                                             outputs=outputs)

            clock.tick()

        # update lr by scheduler
        tr_agent.update_learning_rate()

        # update teacher forcing ratio
        if config.module == 'seq2seq':
            tr_agent.update_teacher_forcing_ratio()

        clock.tock()
        if clock.epoch % config.save_frequency == 0:
            tr_agent.save_ckpt()
        tr_agent.save_ckpt('latest')
Example #4
0
    def test_random(self):
        agent_data = get_data("random")
        role = [
            'management',
        ]

        agents = get_agent(agent_data, "random", role)
        agents_count = self.count_available(agents, role)

        self.assertEqual(agents_count, 1)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--continue',
                        dest='cont',
                        action='store_true',
                        help="continue training from checkpoint")
    parser.add_argument('--ckpt',
                        type=str,
                        default='latest',
                        required=False,
                        help="desired checkpoint to restore")
    parser.add_argument('-g',
                        '--gpu_ids',
                        type=int,
                        default=0,
                        required=False,
                        help="specify gpu ids")
    parser.add_argument('--vis',
                        action='store_true',
                        default=False,
                        help="visualize output in training")

    args = parser.parse_args()

    # create experiment config
    config = get_config(args)
    print(config)

    # create network and training agent
    tr_agent = get_agent(config)
    print(tr_agent.net)

    # load from checkpoint if provided
    if args.cont:
        tr_agent.load_ckpt(args.ckpt)

    # writer = SummaryWriter()
    # create dataloader
    # train_loader = get_dataloader(PHASE_TRAINING, batch_size=config.batch_size, num_workers=2, dataset_json="/home/huydd/train_noise/result_json/result.json")
    val_loader = get_dataloader(
        PHASE_TESTING,
        batch_size=config.batch_size,
        num_workers=2,
        dataset_json="/home/huydd/other_done/result_json/result.json")
    val_loader_step = get_dataloader(
        PHASE_TESTING,
        batch_size=config.batch_size,
        num_workers=2,
        dataset_json="/home/huydd/other_done/result_json/result.json")
    val_loader_step = cycle(val_loader_step)

    epoch_acc = tr_agent.evaluate(val_loader)
    print(epoch_acc)
def main():
    # create experiment config containing all hyperparameters
    config = get_config('train')

    # create network and training agent
    tr_agent = get_agent(config)

    # load from checkpoint if provided
    if config.cont:
        tr_agent.load_ckpt(config.ckpt)

    # create dataloader
    train_loader = get_dataloader('train', config)
    val_loader = get_dataloader('validation', config)
    val_loader = cycle(val_loader)

    # start training
    clock = tr_agent.clock

    for e in range(clock.epoch, config.nr_epochs):
        # begin iteration
        pbar = tqdm(train_loader)
        for b, data in enumerate(pbar):
            # train step
            tr_agent.train_func(data)

            # visualize
            if config.vis and clock.step % config.vis_frequency == 0:
                tr_agent.visualize_batch(data, "train")

            pbar.set_description("EPOCH[{}][{}]".format(e, b))
            losses = tr_agent.collect_loss()
            pbar.set_postfix(
                OrderedDict({k: v.item()
                             for k, v in losses.items()}))

            # validation step
            if clock.step % config.val_frequency == 0:
                data = next(val_loader)
                tr_agent.val_func(data)

                if config.vis and clock.step % config.vis_frequency == 0:
                    tr_agent.visualize_batch(data, "validation")

            clock.tick()

        tr_agent.update_learning_rate()
        clock.tock()

        if clock.epoch % config.save_frequency == 0:
            tr_agent.save_ckpt()
        tr_agent.save_ckpt('latest')
Example #7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--continue', dest='cont',  action='store_true', help="continue training from checkpoint")
    parser.add_argument('--ckpt', type=str, default='latest', required=False, help="desired checkpoint to restore")
    parser.add_argument('-g', '--gpu_ids', type=int, default=0, required=False, help="specify gpu ids")
    parser.add_argument('--vis', action='store_true', default=False, help="visualize output in training")
    args = parser.parse_args()

    # create experiment config
    config = get_config(args)
    print(config)

    # create network and training agent
    tr_agent = get_agent(config)
    print(tr_agent.net)

    # load from checkpoint if provided
    if args.cont:
        tr_agent.load_ckpt(args.ckpt)

    # create dataloader
    train_loader = get_dataloader(PHASE_TRAINING, batch_size=config.batch_size, num_workers=config.num_workers)
    val_loader = get_dataloader(PHASE_TESTING, batch_size=config.batch_size, num_workers=config.num_workers)
    val_loader_step = get_dataloader(PHASE_TESTING, batch_size=config.batch_size, num_workers=config.num_workers)
    val_loader_step = cycle(val_loader_step)
    # val_loader = cycle(val_loader)

    # start training
    clock = tr_agent.clock

    for e in range(clock.epoch, config.nr_epochs):
        # begin iteration
        pbar = tqdm(train_loader)
        for b, data in enumerate(pbar):
            # train step
            outputs, losses = tr_agent.train_func(data)

            # visualize
            if args.vis and clock.step % config.visualize_frequency == 0:
                tr_agent.visualize_batch(data, PHASE_TRAINING, outputs)

            pbar.set_description("EPOCH[{}][{}]".format(e, b))
            pbar.set_postfix(OrderedDict({k: v.item() for k, v in losses.items()}))

            # validation step
            if clock.step % config.val_frequency == 0:
                # data = next(val_loader)
                data = next(val_loader_step)
                outputs, losses = tr_agent.val_func(data)

                if args.vis and clock.step % config.visualize_frequency == 0:
                    tr_agent.visualize_batch(data, PHASE_TESTING, outputs)

            clock.tick()

        tr_agent.evaluate(val_loader)
        tr_agent.update_learning_rate()
        clock.tock()

        if clock.epoch % config.save_frequency == 0:
            tr_agent.save_ckpt()
        tr_agent.save_ckpt('latest')
Example #8
0
def main():
    test_data = True
    pretrain = True

    # create experiment config
    config = get_config()

    # create network and training agent
    tr_agent = get_agent(config)
    print(tr_agent.net)

    # load from checkpoint if provided
    if pretrain:
        tr_agent.load_ckpt("latest")

    # create dataloader
    train_loader = get_dataloader(config, 'train')
    val_loader = get_dataloader(config, 'validation')
    test_loader = get_dataloader(config, 'test')

    # start training
    clock = tr_agent.clock

    # test
    if test_data == True:
        pbar = tqdm(test_loader)
        writer = csv.writer(open("../result.csv", "w"))
        writer.writerow(["id", "clip_count"])
        for b, data in enumerate(pbar):
            outputs, losses = tr_agent.val_func(data[0].cuda(), data[1].cuda())
            outputs = outputs.argmax().cpu().numpy()
            writer.writerow([b + 25001, outputs])

    for e in range(clock.epoch, config.epochs):
        if e % config.val_frequency == 0:
            loss = 0
            for b, data in enumerate(val_loader):
                outputs, losses = tr_agent.val_func(data[0].cuda(),
                                                    data[1].cuda())
                loss += losses['loss']
            loss /= len(val_loader)
            print("EPOCH {} valid loss : {}".format(e, loss))
        # begin iteration
        pbar = tqdm(train_loader)
        for b, data in enumerate(pbar):
            # train step
            outputs, losses = tr_agent.train_func(data[0].cuda(),
                                                  data[1].cuda())

            pbar.set_description("EPOCH[{}][{}]".format(e, b))
            pbar.set_postfix(
                OrderedDict({k: v.item()
                             for k, v in losses.items()}))

            clock.tick()

        tr_agent.update_learning_rate()
        clock.tock()

        if clock.epoch % config.save_frequency == 0:
            tr_agent.save_ckpt()
        tr_agent.save_ckpt('latest')
Example #9
0
rew = None
done = None
info = None
agent_obs = [[]] * (hiders + seekers)
agent_act = [[]] * (hiders + seekers)
time_steps = 80
episodes = 2000
interval = 500

for a in agents:
    a.training = True

acc_rew = np.zeros([hiders + seekers, episodes])

for i in range(hiders + seekers):
    agents.append(get_agent(env, i, model=2))
    if load_weights:
        agents[-1].load_weights("agent_%i_weights.h5f" % (i))

# https://github.com/keras-rl/keras-rl/blob/master/rl/core.py
obs = None
rew = None
done = None
info = None
agent_obs = [[]] * (hiders + seekers)
agent_act = [[]] * (hiders + seekers)
if display:
    env.render()
for a in agents:
    a.training = True
Example #10
0
    args = arg_parser.parse_args()
    return args


if __name__ == "__main__":

    # Parse Arguments
    args = parse_args()

    # Other Defaults
    individual_types = ['Susceptible', 'Infected', 'Immune', 'Vaccinated']
    color_list = ['black', 'red', 'white', 'blue']

    # RL Environment and Agent
    env = game_env(args.grid_size, individual_types, color_list, args.vax_size)
    agent = get_agent(env, args)

    # RL run
    episode_rewards = []
    eps_history = []

    for episode in range(args.max_epd):
        state = env.reset(args.grid_size)
        episode_reward = 0
        done = False
        step = 0

        while not done:
            action = agent.get_action(state)
            next_state, reward, done, _ = env.step(action)
            agent.learn(state, action, reward, next_state, done,
Example #11
0
display = True
load_weights = False

env = hide_and_seek.make_env(n_hiders=hiders, n_seekers=seekers, n_boxes=boxes, n_ramps=ramps, n_food=food, n_rooms=rooms)

# # probably shouldn't use those two. but was testing.
# rewardWrapper = hide_and_seek.HideAndSeekRewardWrapper(env, n_hiders=hiders, n_seekers=seekers)
# trackStatW = hide_and_seek.TrackStatWrapper(env, boxes, ramps, food)

# run one episode
env.seed(42)
env.reset()

agents = []
for i in range(hiders+seekers):
    agents.append(get_agent(env,i))
    if load_weights:
        agents[-1].load_weights("agent_%i_weights.h5f"%(i))

#https://github.com/keras-rl/keras-rl/blob/master/rl/core.py
obs = None
rew = None
done = None
info = None
agent_obs = [[]]*(hiders+seekers)
agent_act = [[]]*(hiders+seekers)
if display:
    env.render()
for a in agents:
    a.training = True
Example #12
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-c',
                        '--continue',
                        dest='continue_path',
                        type=str,
                        required=False)
    parser.add_argument('-g',
                        '--gpu_ids',
                        type=int,
                        default=0,
                        required=False,
                        help="specify gpu ids")
    parser.add_argument('--vis',
                        action='store_true',
                        default=False,
                        help="visualize output in training")
    args = parser.parse_args()

    # create experiment config
    config = get_config(stage)
    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_ids)
    config.device = torch.device("cuda:0")
    print(config)

    # create soft link to experiment log directory
    if not os.path.exists('train_log'):
        os.symlink(config.exp_dir, 'train_log')

    # create network and training agent
    tr_agent = get_agent(config)

    # load from checkpoint if provided
    if args.continue_path:
        tr_agent.load_ckpt(args.continue_path)

    print(tr_agent.net)

    # create tensorboard writer
    train_tb = SummaryWriter(os.path.join(config.log_dir, 'train.events'))
    val_tb = SummaryWriter(os.path.join(config.log_dir, 'val.events'))

    # create dataloader
    train_loader = get_dataloader('train',
                                  batch_size=config.batch_size,
                                  num_workers=config.num_workers)
    val_loader = get_dataloader('validation',
                                batch_size=config.batch_size,
                                num_workers=config.num_workers)
    val_loader = cycle(val_loader)

    # start training
    clock = tr_agent.clock

    for e in range(clock.epoch, config.nr_epochs):
        # begin iteration
        pbar = tqdm(train_loader)
        for b, data in enumerate(pbar):
            # train step
            outputs, losses = tr_agent.train_func(data)

            losses_values = {k: v.item() for k, v in losses.items()}

            # record loss to tensorboard
            for k, v in losses_values.items():
                train_tb.add_scalar(k, v, clock.step)

            # visualize
            if args.vis and clock.step % config.visualize_frequency == 0:
                pass
                # with torch.no_grad():
                #     tr_agent.visualize_batch(data['path'][0], train_tb)

            pbar.set_description("EPOCH[{}][{}]".format(e, b))
            pbar.set_postfix(OrderedDict(losses_values))

            # validation step
            if clock.step % config.val_frequency == 0:
                data = next(val_loader)

                outputs, losses = tr_agent.val_func(data)

                losses_values = {k: v.item() for k, v in losses.items()}

                for k, v in losses_values.items():
                    val_tb.add_scalar(k, v, clock.step)

                if args.vis and clock.step % config.visualize_frequency == 0:
                    pass
                    # with torch.no_grad():
                    #     tr_agent.visualize_batch(data['path'][0], val_tb)

            clock.tick()

        train_tb.add_scalar('learning_rate',
                            tr_agent.optimizer.param_groups[-1]['lr'],
                            clock.epoch)
        tr_agent.update_learning_rate()

        clock.tock()

        if clock.epoch % config.save_frequency == 0:
            tr_agent.save_ckpt()
        tr_agent.save_ckpt('latest.pth.tar')
Example #13
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--continue',
                        dest='cont',
                        action='store_true',
                        help="continue training from checkpoint")
    parser.add_argument('--ckpt',
                        type=str,
                        default='latest',
                        required=False,
                        help="desired checkpoint to restore")
    parser.add_argument('-g',
                        '--gpu_ids',
                        type=int,
                        default=0,
                        required=False,
                        help="specify gpu ids")
    parser.add_argument('--vis',
                        action='store_true',
                        default=False,
                        help="visualize output in training")
    args = parser.parse_args()

    # create experiment config
    config = get_config(args)
    print(config)

    # create network and training agent
    tr_agent = get_agent(config)
    print(tr_agent.net)

    # load from checkpoint if provided
    if args.cont:
        tr_agent.load_ckpt(args.ckpt)

    writer = SummaryWriter()
    # create dataloader
    train_loader = get_dataloader(
        PHASE_TRAINING,
        batch_size=config.batch_size,
        num_workers=2,
        dataset_json=
        "/opt/hdd2/huydd/data_with_noise/train_data/result_json/train.json")
    val_loader = get_dataloader(
        PHASE_TESTING,
        batch_size=config.batch_size,
        num_workers=config.num_workers,
        dataset_json=
        "/opt/hdd2/huydd/data_with_noise/val_data/result_json/val.json")
    val_loader_step = get_dataloader(
        PHASE_TESTING,
        batch_size=config.batch_size,
        num_workers=config.num_workers,
        dataset_json=
        "/opt/hdd2/huydd/data_with_noise/val_data/result_json/val.json")
    val_loader_step = cycle(val_loader_step)
    # val_loader = cycle(val_loader)

    # start training
    clock = tr_agent.clock
    max_epoch_acc = 0
    for e in range(clock.epoch, config.nr_epochs):
        n = 0
        # begin iteration
        pbar = tqdm(train_loader)
        for b, data in enumerate(pbar):
            # train step
            n += 1
            outputs, train_losses = tr_agent.train_func(data)

            if n == 1:
                train_losses_sum = train_losses['bce']
            else:
                train_losses_sum += train_losses['bce']

            # visualize
            # if args.vis and clock.step % config.visualize_frequency == 0:
            #     tr_agent.visualize_batch(data, "train", outputs)

            pbar.set_description("EPOCH[{}][{}]".format(e, b))
            pbar.set_postfix(
                OrderedDict({k: v.item()
                             for k, v in train_losses.items()}))

            # print("\nTrain Loss {}".format(train_losses))
            # validation step
            if clock.step % config.val_frequency == 0:
                data = next(val_loader_step)
                outputs, losses = tr_agent.val_func(data)
                # print("Val Loss {}".format(losses))

                # visualize
                # if args.vis and clock.step % config.visualize_frequency == 0:
                #     tr_agent.visualize_batch(data, "validation", outputs)

            clock.tick()
        train_losses_sum = train_losses_sum / (n * config.batch_size)
        print("\nResult Epoch {} Train Loss {} ".format(e, train_losses_sum))
        writer.add_scalar('Loss/train', train_losses_sum, e)
        # save the best accuracy
        epoch_acc = tr_agent.evaluate(val_loader)
        print("Epoch {} - accuracy {}".format(e, epoch_acc))
        writer.add_scalar('Val accuracy', epoch_acc, e)

        if epoch_acc > max_epoch_acc:
            tr_agent.save_ckpt('best_acc')
            max_epoch_acc = epoch_acc

        tr_agent.update_learning_rate()
        clock.tock()

        if clock.epoch % config.save_frequency == 0:
            tr_agent.save_ckpt()
        tr_agent.save_ckpt('latest')

    writer.close()
Example #14
0
import gym
from tensorboardX import SummaryWriter

from agent import get_agent
from common import config
from train_process import single_ac_train
from wrapper import atari_env

if __name__ == '__main__':
    env = gym.make('CartPole-v1')
    # env = atari_env(config.game_name)

    actor = get_agent('actor', n_ac=config.n_ac, lr=1e-2, test=True)
    critic = get_agent('critic', lr=1e-2, discount=config.discount, test=True)

    single_ac_train(env, actor, critic, config.base_path, config.batch_size,
                    config.epsilon, config.save_interval,
                    config.update_interval, config.learning_starts,
                    config.memory_size, config.max_epoch, config.max_iter)