Example #1
0
def main():
    args = argparser()

    args.clip_rewards = False
    env = make_atari(args.env)
    env = wrap_atari_dqn(env, args)

    seed = args.seed + 1122
    utils.set_global_seeds(seed, use_torch=True)
    env.seed(seed)

    model = DuelingDQN(env)
    model.load_state_dict(torch.load('model.pth', map_location='cpu'))

    episode_reward, episode_length = 0, 0
    state = env.reset()
    while True:
        if args.render:
            env.render()
        action, _ = model.act(torch.FloatTensor(np.array(state)), 0.)
        next_state, reward, done, _ = env.step(action)

        state = next_state
        episode_reward += reward
        episode_length += 1

        if done:
            state = env.reset()
            print("Episode Length / Reward: {} / {}".format(
                episode_length, episode_reward))
            episode_reward = 0
            episode_length = 0
Example #2
0
async def main():
    """
    main event loop
    """
    args = argparser()
    utils.set_global_seeds(args.seed, use_torch=False)

    procs = [
        Process(target=recv_batch_device),
        Process(target=recv_prios_device),
        Process(target=send_batch_device),
    ]
    for p in procs:
        p.start()

    buffer = CustomPrioritizedReplayBuffer(args.replay_buffer_size, args.alpha)
    exe = ThreadPoolExecutor()
    event = asyncio.Event()
    lock = asyncio.Lock()

    # TODO: How to decide the proper number of asyncio workers?
    workers = []
    for _ in range(args.n_recv_batch_worker):
        w = recv_batch_worker(buffer, exe, event, lock, args.threshold_size)
        workers.append(w)
    for _ in range(args.n_recv_prios_worker):
        w = recv_prios_worker(buffer, exe, event, lock)
        workers.append(w)
    for _ in range(args.n_send_batch_worker):
        w = send_batch_worker(buffer, exe, event, lock, args.batch_size, args.beta)
        workers.append(w)

    await asyncio.gather(*workers)
    return True
Example #3
0
def main():
    learner_ip = get_environ()
    args = argparser()

    writer = SummaryWriter(comment="-{}-eval".format(args.env))

    ctx = zmq.Context()
    param_socket = ctx.socket(zmq.SUB)
    param_socket.setsockopt(zmq.SUBSCRIBE, b'')
    param_socket.setsockopt(zmq.CONFLATE, 1)
    param_socket.connect('tcp://{}:52001'.format(learner_ip))

    env = make_atari(args.env)
    env = wrap_atari_dqn(env, args)

    seed = args.seed + 1122
    utils.set_global_seeds(seed, use_torch=True)
    env.seed(seed)

    model = DuelingDQN(env)

    data = param_socket.recv(copy=False)
    param = pickle.loads(data)
    model.load_state_dict(param)
    print("Loaded first parameter from learner")

    episode_reward, episode_length, episode_idx = 0, 0, 0
    state = env.reset()
    while True:
        if args.render:
            env.render()
        action, _ = model.act(torch.FloatTensor(np.array(state)), 0.01)
        next_state, reward, done, _ = env.step(action)

        state = next_state
        episode_reward += reward
        episode_length += 1

        if done:
            state = env.reset()
            writer.add_scalar("eval/episode_reward", episode_reward,
                              episode_idx)
            writer.add_scalar("eval/episode_length", episode_length,
                              episode_idx)
            episode_reward = 0
            episode_length = 0
            episode_idx += 1

            if episode_idx % args.eval_update_interval == 0:
                data = param_socket.recv(copy=False)
                param = pickle.loads(data)
                model.load_state_dict(param)
Example #4
0
def main():
    args = argparser()

    args.clip_rewards = False
    args.episode_life=False
    env = make_atari(args.env)
    env = wrap_atari_dqn(env, args)

    # seed = args.seed + 1122
    # utils.set_global_seeds(seed, use_torch=True)
    # env.seed(seed)

    model = DuelingDQN(env, args)
    model.load_state_dict(torch.load('model.pth', map_location='cpu'))

    episode_reward, episode_length = 0, 0
    state = env.reset()
    if not os.path.exists('plays'):
        os.mkdir('plays')
    video = cv2.VideoWriter('plays/tmp.avi', cv2.VideoWriter_fourcc(*'DIVX'), 15, (160, 210))
    while True:
        img = env.render(mode='rgb_array')
        model.zero_grad()
        state = torch.tensor(state[np.newaxis, :], dtype=torch.float32, requires_grad=True)
        value, action = model(state).max(1)
        value = value[0]
        action = action[0]
        value.backward()
        img_gradient = np.abs(state.grad.numpy())
        img_gradient = np.sum(img_gradient, axis=(0,1))
        img_gradient = (img_gradient - np.min(img_gradient)) / (np.max(img_gradient) - np.min(img_gradient))
        img_gradient = img_gradient.transpose()
        img_gradient = cv2.resize(img_gradient, (160, 210))[...,np.newaxis]
        img_gradient = img_gradient * 255
        masked_img = (img + img_gradient).astype(np.uint8)
        masked_img = np.clip(masked_img, 0, 255)
        video.write(masked_img)
        next_state, reward, done, _ = env.step(int(action))

        state = next_state
        episode_reward += reward
        episode_length += 1

        if done:
            state = env.reset()
            print("Episode Length / Reward: {} / {}".format(episode_length, episode_reward))
            video.release()
            os.rename('plays/tmp.avi', f'plays/{args.env}-{episode_reward}.avi')
            video = cv2.VideoWriter('plays/tmp.avi', cv2.VideoWriter_fourcc(*'DIVX'), 15, (160, 210))
            episode_reward = 0
            episode_length = 0
Example #5
0
def main():
    learner_ip = get_environ()
    args = argparser()
    param_queue = Queue(maxsize=3)

    procs = [
        Process(target=exploration_eval, args=(args, -1, param_queue)),
        Process(target=recv_param_eval, args=(learner_ip, -1, param_queue)),
    ]

    for p in procs:
        p.start()
    for p in procs:
        p.join()
    return True
Example #6
0
def main():
    actor_id, n_actors, replay_ip, learner_ip = get_environ()
    args = argparser()
    param_queue = Queue(maxsize=3)

    procs = [
        Process(target=vector_exploration,
                args=(args, actor_id, n_actors, replay_ip, param_queue)),
        Process(target=recv_param, args=(learner_ip, actor_id, param_queue)),
    ]

    for p in procs:
        p.start()
    for p in procs:
        p.join()
    return True
Example #7
0
def run():
    args = argparser()

    path = utils.create_log_dir(sys.argv)
    utils.start(args.http_port)

    env = Env(args)
    agents = [Agent(args) for _ in range(args.n_agent)]
    master = Master(args)

    for agent in agents:
        master.add_agent(agent)
    master.add_env(env)

    success_list = []
    time_list = []

    for idx in range(args.n_episode):
        print('=' * 80)
        print("Episode {}".format(idx + 1))
        # 서버의 stack, timer 초기화
        print("서버를 초기화하는중...")
        master.reset(path)

        # 에피소드 시작
        master.start()
        # 에이전트 학습
        master.train()
        print('=' * 80)
        success_list.append(master.infos["is_success"])
        time_list.append(master.infos["end_time"] - master.infos["start_time"])

        if (idx + 1) % args.print_interval == 0:
            print("=" * 80)
            print("EPISODE {}: Avg. Success Rate / Time: {:.2} / {:.2}".format(
                idx + 1, np.mean(success_list), np.mean(time_list)))
            success_list.clear()
            time_list.clear()
            print("=" * 80)

        if (idx + 1) % args.checkpoint_interval == 0:
            utils.save_checkpoints(path, agents, idx + 1)

    if args.visual:
        visualize(path, args)
    print("끝")
    utils.close()
Example #8
0
def main():
    n_actors, replay_ip = get_environ()
    args = argparser()

    # TODO: Need to adjust the maxsize of prios, param queue
    batch_queue = Queue(maxsize=args.queue_size)
    prios_queue = Queue(maxsize=args.prios_queue_size)
    param_queue = Queue(maxsize=3)
    procs = [
        Process(target=train,
                args=(args, n_actors, batch_queue, prios_queue, param_queue)),
        Process(target=send_param, args=(param_queue, )),
        Process(target=send_prios, args=(prios_queue, replay_ip)),
    ]

    for _ in range(args.n_recv_batch_process):
        p = Process(target=recv_batch,
                    args=(batch_queue, replay_ip, args.device))
        procs.append(p)
    for p in procs:
        p.start()
    for p in procs:
        p.join()
Example #9
0
def run_trial(cnt):
    args = argparser()
    output = Value('f', 0)
    myredis = redis.StrictRedis()
    pub_list = []

    sub_proc = Process(target=sub,
                       kwargs={
                           'myredis': myredis,
                           'name': 'reader1',
                           'n_seconds': args.n_seconds
                       })
    sub_proc.start()

    for _ in range(args.n_threads):
        pub_proc = Process(target=pub, args=[myredis, args.n_seconds, output])
        pub_list.append(pub_proc)
        pub_proc.start()

    procs = [sub_proc] + pub_list
    for proc in procs:
        proc.join()
    print(f'pub_sum : {output.value}')
    nni.report_final_result(output.value)
import matplotlib.pyplot as plt

import os

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from custom_trading_env import TradingEnv
from utils import device
import DQNTradingAgent.dqn_agent as dqn_agent
from custom_hyperparameters import hyperparams
from arguments import argparser

args = argparser()
# device_num, save_num, risk_aversion, n_episodes, fee

device = torch.device("cuda:{}".format(args.device_num))
dqn_agent.set_device(device)

save_location = 'saves/{}'.format(args.save_num)

if not os.path.exists(save_location):
    os.makedirs(save_location)

save_interval = 200
print_interval = 1

n_episodes = args.n_episodes
sample_len = 480
Example #11
0
    perfmeasure = get_cindex
    deepmethod = build_combined_categorical

    experiment(FLAGS, perfmeasure, deepmethod)


class CustomStopper(keras.callbacks.EarlyStopping):
    def __init__(self,
                 monitor='val_loss',
                 min_delta=0,
                 patience=0,
                 verbose=0,
                 mode='auto',
                 start_epoch=100):  # add argument for starting epoch
        super(CustomStopper, self).__init__()
        self.start_epoch = start_epoch

    def on_epoch_end(self, epoch, logs=None):
        if epoch > self.start_epoch:
            super().on_epoch_end(epoch, logs)


if __name__ == "__main__":
    FLAGS = argparser()
    FLAGS.log_dir = FLAGS.log_dir + str(time.time()) + "/"

    if not os.path.exists(FLAGS.log_dir):
        os.makedirs(FLAGS.log_dir)

    logging(str(FLAGS), FLAGS)
    run_regression(FLAGS)
Example #12
0
def main():
    # used from arguments.py
    args = argparser().parse_args()
    if (os.path.exists(args.output_dir) and os.listdir(args.output_dir)
            and args.do_train and not args.overwrite_output_dir):
        raise ValueError(
            "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome."
            .format(args.output_dir))

    # Setup distant debugging if needed
    if args.server_ip and args.server_port:
        # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script
        import ptvsd

        print("Waiting for debugger attach")
        ptvsd.enable_attach(address=(args.server_ip, args.server_port),
                            redirect_output=True)
        ptvsd.wait_for_attach()

    # Setup CUDA, GPU & distributed training
    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda" if torch.cuda.is_available()
                              and not args.no_cuda else "cpu")
        args.n_gpu = torch.cuda.device_count(
        ) if torch.cuda.is_available() and not args.no_cuda else 0
    else:  # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        torch.distributed.init_process_group(backend="nccl")
        args.n_gpu = 1
    args.device = device

    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN,
    )
    logger.warning(
        "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
        args.local_rank,
        device,
        args.n_gpu,
        bool(args.local_rank != -1),
        args.fp16,
    )

    # Set seed
    set_seed(args)

    # Prepare our task
    args.task_name = args.task_name.lower()
    if args.task_name not in processors:
        raise ValueError("Task not found: %s" % (args.task_name))
    processor = processors[args.task_name]

    label_list = processor.get_labels()
    num_labels = len(label_list)

    # Load pretrained model and tokenizer
    if args.local_rank not in [-1, 0]:
        torch.distributed.barrier(
        )  # Make sure only the first process in distributed training will download model & vocab

    args.model_type = args.model_type.lower()
    config = AutoConfig.from_pretrained(
        args.config_name if args.config_name else args.model_path,
        num_labels=num_labels,
        finetuning_task=args.task_name,
        cache_dir=args.cache_dir if args.cache_dir else None)
    tokenizer = AutoTokenizer.from_pretrained(
        args.tokenizer_name if args.tokenizer_name else args.model_path,
        do_lower_case=args.do_lower_case,
        cache_dir=args.cache_dir if args.cache_dir else None,
    )
    model = AutoModelForSequenceClassification.from_pretrained(
        args.model_path,
        config=config,
        cache_dir=args.cache_dir if args.cache_dir else None,
    )

    add_special_tokens(model, tokenizer, processor)

    if args.local_rank == 0:
        torch.distributed.barrier(
        )  # Make sure only the first process in distributed training will download model & vocab
    model.to(args.device)

    logger.info("Training/evaluation parameters %s", args)

    # Training
    if args.do_train:
        train_dataset = load_and_cache_examples(tokenizer, "train", args)
        global_step, tr_loss = train(model, tokenizer, train_dataset,
                                     processor, args)
        logger.info(" global_step = %s, average loss = %s", global_step,
                    tr_loss)

    # Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained()
    if args.do_train and (args.local_rank == -1
                          or torch.distributed.get_rank() == 0):
        # Create output directory if needed
        if not os.path.exists(args.output_dir) and args.local_rank in [-1, 0]:
            os.makedirs(args.output_dir)

        logger.info("Saving model checkpoint to %s", args.output_dir)
        # Save a trained model, configuration and tokenizer using `save_pretrained()`.
        # They can then be reloaded using `from_pretrained()`
        model_to_save = (model.module if hasattr(model, "module") else model
                         )  # Take care of distributed/parallel training
        model_to_save.save_pretrained(args.output_dir)
        tokenizer.save_pretrained(args.output_dir)

        # Good practice: save your training arguments together with the trained model
        torch.save(args, os.path.join(args.output_dir, "training_args.bin"))

        # Load a trained model and vocabulary that you have fine-tuned
        model = AutoModelForSequenceClassification.from_pretrained(
            args.output_dir)
        tokenizer = AutoTokenizer.from_pretrained(args.output_dir)
        model.to(args.device)

    # Evaluation
    assert not (args.do_test and args.do_eval)
    results = {}
    if (args.do_eval or args.do_test) and args.local_rank in [-1, 0]:
        mode = "dev" if args.do_eval else "test"
        tokenizer = AutoTokenizer.from_pretrained(
            args.output_dir, do_lower_case=args.do_lower_case)
        checkpoints = [args.output_dir]
        if args.eval_all_checkpoints:
            checkpoints = list(
                os.path.dirname(c) for c in sorted(
                    glob.glob(args.output_dir + "/**/" + WEIGHTS_NAME,
                              recursive=True)))
            logging.getLogger("transformers.modeling_utils").setLevel(
                logging.WARN)  # Reduce logging
        logger.info("Evaluate(%s) the following checkpoints: %s", mode,
                    checkpoints)
        for checkpoint in checkpoints:
            logger.info("Checkpoint: %s", checkpoint)
            global_step = checkpoint.split(
                "-")[-1] if len(checkpoints) > 1 else ""
            prefix = checkpoint.split(
                "/")[-1] if checkpoint.find("checkpoint") != -1 else ""

            model = AutoModelForSequenceClassification.from_pretrained(
                checkpoint)
            model.to(args.device)
            result = evaluate(model,
                              tokenizer,
                              processor,
                              mode,
                              args,
                              prefix=prefix)
            result = dict(
                (k + "_{}".format(global_step), v) for k, v in result.items())
            results.update(result)
    return results
Example #13
0
    label_row_inds, label_col_inds = np.where(
        np.isnan(Y) == False)  # basically finds the point address of affinity [x,Y]

    print("Logdir: " + FLAGS.log_dir)
    s1_avgperf, s1_avgloss, s1_teststd = nfold_1_2_3_setting_sample(XD, XT, Y, label_row_inds, label_col_inds,
                                                                    perfmeasure, FLAGS, dataset)

    logging("Setting " + str(FLAGS.problem_type), FLAGS)
    logging("avg_perf = %.5f,  avg_mse = %.5f, std = %.5f" %
            (s1_avgperf, s1_avgloss, s1_teststd), FLAGS)
    print("Setting " + str(FLAGS.problem_type))
    print("avg_perf = %.5f,  avg_mse = %.5f, std = %.5f" %
          (s1_avgperf, s1_avgloss, s1_teststd))


def run_regression(FLAGS):
    perfmeasure = get_cindex
    experiment(FLAGS, perfmeasure)


if __name__ == "__main__":
    FLAGS = argparser()
    FLAGS.log_dir = FLAGS.log_dir + str(time.time()) + "/"
    if not os.path.exists(FLAGS.log_dir):
        os.makedirs(FLAGS.log_dir)

    logging(str(FLAGS), FLAGS)
    print(str(FLAGS))
    run_regression(FLAGS)
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import DQNTradingAgent.dqn_agent as dqn_agent
from envs.trading_env_integrated import TradingEnv
from custom_hyperparameters import hyperparams
from arguments import argparser

args = argparser()  # device_num, save_num, risk_aversion, n_episodes

torch.cuda.manual_seed_all(7)

device = torch.device("cuda:{}".format(args.device_num))
dqn_agent.set_device(device)

save_location = 'saves/Original/{}'.format(args.save_num)
if not os.path.exists(save_location):
    os.makedirs(save_location)

save_interval = 1000
print_interval = 1

n_episodes = args.n_episodes
sample_len = 480