コード例 #1
0
def main():
    args = make_args()
    config = configparser.ConfigParser()
    utils.load_config(config, args.config)
    for cmd in args.modify:
        utils.modify_config(config, cmd)
    with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f:
        logging.config.dictConfig(yaml.load(f))
    model_dir = utils.get_model_dir(config)
    path, step, epoch = utils.train.load_model(model_dir)
    state_dict = torch.load(path, map_location=lambda storage, loc: storage)
    mapper = [(inflection.underscore(name), member()) for name, member in inspect.getmembers(importlib.machinery.SourceFileLoader('', __file__).load_module()) if inspect.isclass(member)]
    path = os.path.join(model_dir, os.path.basename(os.path.splitext(__file__)[0])) + '.xlsx'
    with xlsxwriter.Workbook(path, {'strings_to_urls': False, 'nan_inf_to_errors': True}) as workbook:
        worksheet = workbook.add_worksheet(args.worksheet)
        for j, (key, m) in enumerate(mapper):
            worksheet.write(0, j, key)
            for i, (name, variable) in enumerate(state_dict.items()):
                value = m(name, variable)
                worksheet.write(1 + i, j, value)
            if hasattr(m, 'format'):
                m.format(workbook, worksheet, i, j)
        worksheet.autofilter(0, 0, i, len(mapper) - 1)
        worksheet.freeze_panes(1, 0)
    logging.info(path)
コード例 #2
0
def main():
    args = make_args()
    config = configparser.ConfigParser()
    utils.load_config(config, args.config)
    for cmd in args.modify:
        utils.modify_config(config, cmd)
    with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f:
        logging.config.dictConfig(yaml.load(f))
    model_dir = utils.get_model_dir(config)
    height, width = tuple(map(int, config.get('image', 'size').split()))
    resize = transform.parse_transform(config, config.get('transform', 'resize_test'))
    transform_image = transform.get_transform(config, config.get('transform', 'image_test').split())
    transform_tensor = transform.get_transform(config, config.get('transform', 'tensor').split())
    # load image
    image_bgr = cv2.imread('image.jpg')
    image_resized = resize(image_bgr, height, width)
    image = transform_image(image_resized)
    tensor = transform_tensor(image).unsqueeze(0)
    # Caffe2
    init_net = caffe2_pb2.NetDef()
    with open(os.path.join(model_dir, 'init_net.pb'), 'rb') as f:
        init_net.ParseFromString(f.read())
    predict_net = caffe2_pb2.NetDef()
    with open(os.path.join(model_dir, 'predict_net.pb'), 'rb') as f:
        predict_net.ParseFromString(f.read())
    p = workspace.Predictor(init_net, predict_net)
    results = p.run([tensor.numpy()])
    logging.info(utils.abs_mean(results[0]))
    logging.info(hashlib.md5(results[0].tostring()).hexdigest())
コード例 #3
0
ファイル: main.py プロジェクト: carpedm20/NAF-tensorflow
def main(_):
  model_dir = get_model_dir(conf,
      ['is_train', 'random_seed', 'monitor', 'display', 'log_level'])

  preprocess_conf(conf)

  with tf.Session() as sess:
    # environment
    env = gym.make(conf.env_name)
    env.seed(conf.random_seed)

    assert isinstance(env.observation_space, gym.spaces.Box), \
      "observation space must be continuous"
    assert isinstance(env.action_space, gym.spaces.Box), \
      "action space must be continuous"

    # exploration strategy
    if conf.noise == 'ou':
      strategy = OUExploration(env, sigma=conf.noise_scale)
    elif conf.noise == 'brownian':
      strategy = BrownianExploration(env, conf.noise_scale)
    elif conf.noise == 'linear_decay':
      strategy = LinearDecayExploration(env)
    else:
      raise ValueError('Unkown exploration strategy: %s' % conf.noise)

    # networks
    shared_args = {
      'sess': sess,
      'input_shape': env.observation_space.shape,
      'action_size': env.action_space.shape[0],
      'hidden_dims': conf.hidden_dims,
      'use_batch_norm': conf.use_batch_norm,
      'use_seperate_networks': conf.use_seperate_networks,
      'hidden_w': conf.hidden_w, 'action_w': conf.action_w,
      'hidden_fn': conf.hidden_fn, 'action_fn': conf.action_fn,
      'w_reg': conf.w_reg,
    }

    logger.info("Creating prediction network...")
    pred_network = Network(
      scope='pred_network', **shared_args
    )

    logger.info("Creating target network...")
    target_network = Network(
      scope='target_network', **shared_args
    )
    target_network.make_soft_update_from(pred_network, conf.tau)

    # statistic
    stat = Statistic(sess, conf.env_name, model_dir, pred_network.variables, conf.update_repeat)

    agent = NAF(sess, env, strategy, pred_network, target_network, stat,
                conf.discount, conf.batch_size, conf.learning_rate,
                conf.max_steps, conf.update_repeat, conf.max_episodes)

    agent.run(conf.monitor, conf.display, conf.is_train)
コード例 #4
0
def main():
    args = make_args()
    config = configparser.ConfigParser()
    utils.load_config(config, args.config)
    for cmd in args.modify:
        utils.modify_config(config, cmd)
    with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f:
        logging.config.dictConfig(yaml.load(f))
    model_dir = utils.get_model_dir(config)
    init_net = onnx_caffe2.helper.load_caffe2_net(os.path.join(model_dir, 'init_net.pb'))
    predict_net = onnx_caffe2.helper.load_caffe2_net(os.path.join(model_dir, 'predict_net.pb'))
    benchmark = onnx_caffe2.helper.benchmark_caffe2_model(init_net, predict_net)
    logging.info('benchmark=%f(milliseconds)' % benchmark)
コード例 #5
0
def main():
    args = make_args()
    config = configparser.ConfigParser()
    utils.load_config(config, args.config)
    for cmd in args.modify:
        utils.modify_config(config, cmd)
    with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f:
        logging.config.dictConfig(yaml.load(f))
    model_dir = utils.get_model_dir(config)
    model = onnx.load(model_dir + '.onnx')
    onnx.checker.check_model(model)
    init_net, predict_net = onnx_caffe2.backend.Caffe2Backend.onnx_graph_to_caffe2_net(model.graph, device='CPU')
    onnx_caffe2.helper.save_caffe2_net(init_net, os.path.join(model_dir, 'init_net.pb'))
    onnx_caffe2.helper.save_caffe2_net(predict_net, os.path.join(model_dir, 'predict_net.pb'), output_txt=True)
    logging.info(model_dir)
コード例 #6
0
ファイル: main.py プロジェクト: BinbinBian/deep-rl-tensorflow
def main(_):
  # preprocess
  conf.observation_dims = eval(conf.observation_dims)

  for flag in ['memory_size', 't_target_q_update_freq', 't_test',
               't_ep_end', 't_train_max', 't_learn_start', 'learning_rate_decay_step']:
    setattr(conf, flag, getattr(conf, flag) * conf.scale)

  if conf.use_gpu:
    conf.data_format = 'NCHW'
  else:
    conf.data_format = 'NHWC'

  model_dir = get_model_dir(conf,
      ['use_gpu', 'max_random_start', 'n_worker', 'is_train', 'memory_size',
       't_save', 't_train', 'display', 'log_level', 'random_seed', 'tag', 'scale'])

  # start
  with tf.Session() as sess:
    if 'Corridor' in conf.env_name:
      env = ToyEnvironment(conf.env_name, conf.n_action_repeat, conf.max_random_start,
                        conf.observation_dims, conf.data_format, conf.display)
    else:
      env = AtariEnvironment(conf.env_name, conf.n_action_repeat, conf.max_random_start,
                        conf.observation_dims, conf.data_format, conf.display)

    if conf.network_header_type in ['nature', 'nips']:
      pred_network = CNN(sess=sess,
                         data_format=conf.data_format,
                         history_length=conf.history_length,
                         observation_dims=conf.observation_dims,
                         output_size=env.env.action_space.n,
                         network_header_type=conf.network_header_type,
                         name='pred_network', trainable=True)
      target_network = CNN(sess=sess,
                           data_format=conf.data_format,
                           history_length=conf.history_length,
                           observation_dims=conf.observation_dims,
                           output_size=env.env.action_space.n,
                           network_header_type=conf.network_header_type,
                           name='target_network', trainable=False)
    elif conf.network_header_type == 'mlp':
      pred_network = MLPSmall(sess=sess,
                              observation_dims=conf.observation_dims,
                              history_length=conf.history_length,
                              output_size=env.env.action_space.n,
                              hidden_activation_fn=tf.sigmoid,
                              network_output_type=conf.network_output_type,
                              name='pred_network', trainable=True)
      target_network = MLPSmall(sess=sess,
                                observation_dims=conf.observation_dims,
                                history_length=conf.history_length,
                                output_size=env.env.action_space.n,
                                hidden_activation_fn=tf.sigmoid,
                                network_output_type=conf.network_output_type,
                                name='target_network', trainable=False)
    else:
      raise ValueError('Unkown network_header_type: %s' % (conf.network_header_type))

    stat = Statistic(sess, conf.t_test, conf.t_learn_start, model_dir, pred_network.var.values())
    agent = TrainAgent(sess, pred_network, env, stat, conf, target_network=target_network)

    if conf.is_train:
      agent.train(conf.t_train_max)
    else:
      agent.play(conf.ep_end)
コード例 #7
0
ファイル: main.py プロジェクト: rhaps0dy/deep-rl-tensorflow
def main(_):
  # preprocess
  conf.observation_dims = eval(conf.observation_dims)
  if conf.learning_rate < 0:
      conf.learning_rate = conf.learning_rate_minimum = 10**(np.random.random()*2-4)

  for flag in ['memory_size', 't_target_q_update_freq', 't_test',
               't_ep_end', 't_train_max', 't_learn_start',
               'learning_rate_decay_step', 'entropy_regularization_decay_step']:
    setattr(conf, flag, getattr(conf, flag) * conf.scale)
#  for flag in ['learning_rate', 'learning_rate_minimum']:
#    setattr(conf, flag, getattr(conf, flag) / conf.async_threads)

  if conf.use_gpu:
    conf.data_format = 'NCHW'
  else:
    conf.data_format = 'NHWC'

  if conf.model_dir == "":
    model_dir = get_model_dir(conf,
      ['use_gpu', 'max_random_start', 'n_worker', 'is_train', 'memory_size',
       't_save', 't_train', 'display', 'log_level', 'random_seed', 'tag',
       'scale', 'model_dir', 't_train_max'])
  else:
    model_dir = 'checkpoints/' + conf.model_dir + '/'

  device = '/gpu:0' if conf.use_gpu else '/cpu:0'
  # start
  gpu_options = tf.GPUOptions(
      per_process_gpu_memory_fraction=calc_gpu_fraction(conf.gpu_fraction))

  with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess, \
       tf.device(device):
    env_args = [conf.env_name, conf.n_action_repeat, conf.max_random_start,
                conf.observation_dims, conf.data_format, conf.display]
    if any(name in conf.env_name for name in ['Corridor', 'FrozenLake']) :
      Env = ToyEnvironment
    else:
      Env = AtariEnvironment
    if conf.agent_type == 'Replay':
      env = Env(*env_args)
      n_actions = env.env.action_space.n
    elif conf.agent_type == 'Async':
      env = [Env(*env_args) for _ in range(conf.async_threads)]
      n_actions = env[0].env.action_space.n
    else:
      raise ValueError("Unknown agent_type: %s" % conf.agent_type)

    if conf.network_header_type in ['nature', 'nips']:
      NetworkHead = CNN
      args = {'sess': sess,
              'data_format': conf.data_format,
              'history_length': conf.history_length,
              'observation_dims': conf.observation_dims,
              'output_size': n_actions,
              'network_output_type': conf.network_output_type}
    elif conf.network_header_type == 'mlp':
      NetworkHead = MLPSmall
      args = {'sess': sess,
              'history_length': conf.history_length,
              'observation_dims': conf.observation_dims,
              'hidden_sizes': [],
              'output_size': n_actions,
              'hidden_activation_fn': tf.nn.relu,
              'network_output_type': conf.network_output_type}
    else:
      raise ValueError('Unkown network_header_type: %s' % (conf.network_header_type))

    stat = Statistic(sess, conf.t_test, conf.t_learn_start, conf.trace_steps,
                     model_dir)

    if conf.agent_type == 'Replay':
      from agents.deep_q import DeepQ
      pred_network = NetworkHead(name='pred_network', trainable=True, **args)
      stat.create_writer(pred_network.var.values())
      target_network = NetworkHead(name='target_network', trainable=False, **args)
      agent = DeepQ(sess, pred_network, env, stat, conf,
                    target_network=target_network)
    elif conf.agent_type == 'Async':
      from agents.async import Async
      global_network = NetworkHead(name='global_network', trainable=False, **args)
      stat.create_writer(global_network.var.values())
      target_network = NetworkHead(name='target_network', trainable=False, **args)
      pred_networks = list(
        NetworkHead(name=('pred_network_%d'%i), trainable=False, **args)
        for i in range(conf.async_threads))
      if conf.disjoint_a3c:
        value_networks = list(
          NetworkHead(name=('value_network_%d'%i), trainable=False, **args)
          for i in range(conf.async_threads))
      else:
        value_networks = None
      agent = Async(sess, global_network, target_network, env, stat, conf,
                    pred_networks=pred_networks, value_networks=value_networks)
    else:
      raise ValueError('Unkown agent_type: %s' % (conf.agent_type))

    if conf.is_train:
      agent.train(conf.t_train_max)
    else:
      agent.play(conf.ep_end)
コード例 #8
0
ファイル: main.py プロジェクト: orcax/deep-rl-tensorflow
def main(_):
    # preprocess
    conf.observation_dims = eval(conf.observation_dims)

    for flag in [
            'memory_size', 't_target_q_update_freq', 't_test', 't_ep_end',
            't_train_max', 't_learn_start', 'learning_rate_decay_step'
    ]:
        setattr(conf, flag, getattr(conf, flag) * conf.scale)

    if conf.use_gpu:
        conf.data_format = 'NCHW'
    else:
        conf.data_format = 'NHWC'

    model_dir = get_model_dir(conf, [
        'use_gpu', 'max_random_start', 'n_worker', 'is_train', 'memory_size',
        'gpu_fraction', 't_save', 't_train', 'display', 'log_level',
        'random_seed', 'tag', 'scale'
    ])

    # start
    gpu_options = tf.GPUOptions(
        per_process_gpu_memory_fraction=calc_gpu_fraction(conf.gpu_fraction))

    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        if any(name in conf.env_name for name in ['Corridor', 'FrozenLake']):
            env = ToyEnvironment(conf.env_name, conf.n_action_repeat,
                                 conf.max_random_start, conf.observation_dims,
                                 conf.data_format, conf.display,
                                 conf.use_cumulated_reward)
        else:
            env = AtariEnvironment(conf.env_name, conf.n_action_repeat,
                                   conf.max_random_start,
                                   conf.observation_dims, conf.data_format,
                                   conf.display, conf.use_cumulated_reward)

        if conf.network_header_type in ['nature', 'nips']:
            pred_network = CNN(sess=sess,
                               data_format=conf.data_format,
                               history_length=conf.history_length,
                               observation_dims=conf.observation_dims,
                               output_size=env.env.action_space.n,
                               network_header_type=conf.network_header_type,
                               name='pred_network',
                               trainable=True)
            target_network = CNN(sess=sess,
                                 data_format=conf.data_format,
                                 history_length=conf.history_length,
                                 observation_dims=conf.observation_dims,
                                 output_size=env.env.action_space.n,
                                 network_header_type=conf.network_header_type,
                                 name='target_network',
                                 trainable=False)
        elif conf.network_header_type == 'mlp':
            pred_network = MLPSmall(
                sess=sess,
                observation_dims=conf.observation_dims,
                history_length=conf.history_length,
                output_size=env.env.action_space.n,
                hidden_activation_fn=tf.sigmoid,
                network_output_type=conf.network_output_type,
                name='pred_network',
                trainable=True)
            target_network = MLPSmall(
                sess=sess,
                observation_dims=conf.observation_dims,
                history_length=conf.history_length,
                output_size=env.env.action_space.n,
                hidden_activation_fn=tf.sigmoid,
                network_output_type=conf.network_output_type,
                name='target_network',
                trainable=False)
        else:
            raise ValueError('Unkown network_header_type: %s' %
                             (conf.network_header_type))

        stat = Statistic(sess, conf.t_test, conf.t_learn_start, model_dir,
                         pred_network.var.values())
        agent = TrainAgent(sess,
                           pred_network,
                           env,
                           stat,
                           conf,
                           target_network=target_network)

        if conf.is_train:
            agent.train(conf.t_train_max)
        else:
            agent.play(conf.ep_end)
コード例 #9
0
parser.add_argument("--recurrence", type=int, default=1,
                    help="number of time-steps gradient is backpropagated (default: 1). If > 1, a LSTM is added to the model to have memory.")
parser.add_argument("--text", action="store_true", default=False,
                    help="add a GRU to the model to handle text input")

args = parser.parse_args()

args.mem = args.recurrence > 1

# Set run dir

date = datetime.datetime.now().strftime("%y-%m-%d-%H-%M-%S")
default_model_name = f"{args.env}_{args.algo}_seed{args.seed}_{date}"

model_name = args.model or default_model_name
model_dir = utils.get_model_dir(model_name)

# Load loggers and Tensorboard writer

txt_logger = utils.get_txt_logger(model_dir)
csv_file, csv_logger = utils.get_csv_logger(model_dir)
tb_writer = tensorboardX.SummaryWriter(model_dir)

# Log command and all script arguments

txt_logger.info("{}\n".format(" ".join(sys.argv)))
txt_logger.info("{}\n".format(args))

# Set seed for all randomness sources

utils.seed(args.seed)
コード例 #10
0
args = parser.parse_args()

# Set seed for all randomness sources

utils.seed(args.seed)

# Generate environment

env = gym.make(args.env)
env.seed(args.seed)
for _ in range(args.shift):
    env.reset()

# Define agent

model_dir = utils.get_model_dir(args.model)
agent = utils.Agent(args.env, env.observation_space, model_dir, args.argmax)

# Run the agent

if args.gif:
    from array2gif import write_gif
    frames = []

done = True
prev_action = 0

while True:
    if done:
        obs = env.reset()
コード例 #11
0
parser.add_argument("--text",
                    action="store_true",
                    default=False,
                    help="add a GRU to the model to handle text input")

args = parser.parse_args()

args.mem = args.recurrence > 1

# Set run dir

date = datetime.datetime.now().strftime("%y-%m-%d-%H-%M-%S")
default_model_name = f"{args.env}_{args.algo}_seed{args.seed}_{date}"

model_name = args.model or default_model_name
model_dir = 'rl/scripts/' + utils.get_model_dir(model_name)

# Load loggers and Tensorboard writer

txt_logger = utils.get_txt_logger(model_dir)
csv_file, csv_logger = utils.get_csv_logger(model_dir)
tb_writer = tensorboardX.SummaryWriter(model_dir)

# Log command and all script arguments

txt_logger.info("{}\n".format(" ".join(sys.argv)))
txt_logger.info("{}\n".format(args))

# Set seed for all randomness sources

utils.seed(args.seed)
コード例 #12
0
    # Set seed for all randomness sources
    utils.seed(seed)

    # Load environments
    envs = []
    for i in range(procs):
        env = utils.make_env(env_eval,
                             seed + 100 * i)  # Different envs from training
        envs.append(env)

    env = ParallelEnv(envs)
    print("Environments loaded\n")

    # Load agents
    model_dir = utils.get_model_dir(model)
    agent = utils.Agent(env.observation_space, env.action_space, model_dir,
                        ipo_model, device, argmax, procs)

    print("Agent loaded\n")

    # Initialize logs

    logs = {"num_frames_per_episode": [], "return_per_episode": []}

    # Run agent on test domain

    start_time = time.time()
    obss = env.reset()

    log_done_counter = 0