def main(): args = make_args() config = configparser.ConfigParser() utils.load_config(config, args.config) for cmd in args.modify: utils.modify_config(config, cmd) with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f: logging.config.dictConfig(yaml.load(f)) model_dir = utils.get_model_dir(config) path, step, epoch = utils.train.load_model(model_dir) state_dict = torch.load(path, map_location=lambda storage, loc: storage) mapper = [(inflection.underscore(name), member()) for name, member in inspect.getmembers(importlib.machinery.SourceFileLoader('', __file__).load_module()) if inspect.isclass(member)] path = os.path.join(model_dir, os.path.basename(os.path.splitext(__file__)[0])) + '.xlsx' with xlsxwriter.Workbook(path, {'strings_to_urls': False, 'nan_inf_to_errors': True}) as workbook: worksheet = workbook.add_worksheet(args.worksheet) for j, (key, m) in enumerate(mapper): worksheet.write(0, j, key) for i, (name, variable) in enumerate(state_dict.items()): value = m(name, variable) worksheet.write(1 + i, j, value) if hasattr(m, 'format'): m.format(workbook, worksheet, i, j) worksheet.autofilter(0, 0, i, len(mapper) - 1) worksheet.freeze_panes(1, 0) logging.info(path)
def main(): args = make_args() config = configparser.ConfigParser() utils.load_config(config, args.config) for cmd in args.modify: utils.modify_config(config, cmd) with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f: logging.config.dictConfig(yaml.load(f)) model_dir = utils.get_model_dir(config) height, width = tuple(map(int, config.get('image', 'size').split())) resize = transform.parse_transform(config, config.get('transform', 'resize_test')) transform_image = transform.get_transform(config, config.get('transform', 'image_test').split()) transform_tensor = transform.get_transform(config, config.get('transform', 'tensor').split()) # load image image_bgr = cv2.imread('image.jpg') image_resized = resize(image_bgr, height, width) image = transform_image(image_resized) tensor = transform_tensor(image).unsqueeze(0) # Caffe2 init_net = caffe2_pb2.NetDef() with open(os.path.join(model_dir, 'init_net.pb'), 'rb') as f: init_net.ParseFromString(f.read()) predict_net = caffe2_pb2.NetDef() with open(os.path.join(model_dir, 'predict_net.pb'), 'rb') as f: predict_net.ParseFromString(f.read()) p = workspace.Predictor(init_net, predict_net) results = p.run([tensor.numpy()]) logging.info(utils.abs_mean(results[0])) logging.info(hashlib.md5(results[0].tostring()).hexdigest())
def main(_): model_dir = get_model_dir(conf, ['is_train', 'random_seed', 'monitor', 'display', 'log_level']) preprocess_conf(conf) with tf.Session() as sess: # environment env = gym.make(conf.env_name) env.seed(conf.random_seed) assert isinstance(env.observation_space, gym.spaces.Box), \ "observation space must be continuous" assert isinstance(env.action_space, gym.spaces.Box), \ "action space must be continuous" # exploration strategy if conf.noise == 'ou': strategy = OUExploration(env, sigma=conf.noise_scale) elif conf.noise == 'brownian': strategy = BrownianExploration(env, conf.noise_scale) elif conf.noise == 'linear_decay': strategy = LinearDecayExploration(env) else: raise ValueError('Unkown exploration strategy: %s' % conf.noise) # networks shared_args = { 'sess': sess, 'input_shape': env.observation_space.shape, 'action_size': env.action_space.shape[0], 'hidden_dims': conf.hidden_dims, 'use_batch_norm': conf.use_batch_norm, 'use_seperate_networks': conf.use_seperate_networks, 'hidden_w': conf.hidden_w, 'action_w': conf.action_w, 'hidden_fn': conf.hidden_fn, 'action_fn': conf.action_fn, 'w_reg': conf.w_reg, } logger.info("Creating prediction network...") pred_network = Network( scope='pred_network', **shared_args ) logger.info("Creating target network...") target_network = Network( scope='target_network', **shared_args ) target_network.make_soft_update_from(pred_network, conf.tau) # statistic stat = Statistic(sess, conf.env_name, model_dir, pred_network.variables, conf.update_repeat) agent = NAF(sess, env, strategy, pred_network, target_network, stat, conf.discount, conf.batch_size, conf.learning_rate, conf.max_steps, conf.update_repeat, conf.max_episodes) agent.run(conf.monitor, conf.display, conf.is_train)
def main(): args = make_args() config = configparser.ConfigParser() utils.load_config(config, args.config) for cmd in args.modify: utils.modify_config(config, cmd) with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f: logging.config.dictConfig(yaml.load(f)) model_dir = utils.get_model_dir(config) init_net = onnx_caffe2.helper.load_caffe2_net(os.path.join(model_dir, 'init_net.pb')) predict_net = onnx_caffe2.helper.load_caffe2_net(os.path.join(model_dir, 'predict_net.pb')) benchmark = onnx_caffe2.helper.benchmark_caffe2_model(init_net, predict_net) logging.info('benchmark=%f(milliseconds)' % benchmark)
def main(): args = make_args() config = configparser.ConfigParser() utils.load_config(config, args.config) for cmd in args.modify: utils.modify_config(config, cmd) with open(os.path.expanduser(os.path.expandvars(args.logging)), 'r') as f: logging.config.dictConfig(yaml.load(f)) model_dir = utils.get_model_dir(config) model = onnx.load(model_dir + '.onnx') onnx.checker.check_model(model) init_net, predict_net = onnx_caffe2.backend.Caffe2Backend.onnx_graph_to_caffe2_net(model.graph, device='CPU') onnx_caffe2.helper.save_caffe2_net(init_net, os.path.join(model_dir, 'init_net.pb')) onnx_caffe2.helper.save_caffe2_net(predict_net, os.path.join(model_dir, 'predict_net.pb'), output_txt=True) logging.info(model_dir)
def main(_): # preprocess conf.observation_dims = eval(conf.observation_dims) for flag in ['memory_size', 't_target_q_update_freq', 't_test', 't_ep_end', 't_train_max', 't_learn_start', 'learning_rate_decay_step']: setattr(conf, flag, getattr(conf, flag) * conf.scale) if conf.use_gpu: conf.data_format = 'NCHW' else: conf.data_format = 'NHWC' model_dir = get_model_dir(conf, ['use_gpu', 'max_random_start', 'n_worker', 'is_train', 'memory_size', 't_save', 't_train', 'display', 'log_level', 'random_seed', 'tag', 'scale']) # start with tf.Session() as sess: if 'Corridor' in conf.env_name: env = ToyEnvironment(conf.env_name, conf.n_action_repeat, conf.max_random_start, conf.observation_dims, conf.data_format, conf.display) else: env = AtariEnvironment(conf.env_name, conf.n_action_repeat, conf.max_random_start, conf.observation_dims, conf.data_format, conf.display) if conf.network_header_type in ['nature', 'nips']: pred_network = CNN(sess=sess, data_format=conf.data_format, history_length=conf.history_length, observation_dims=conf.observation_dims, output_size=env.env.action_space.n, network_header_type=conf.network_header_type, name='pred_network', trainable=True) target_network = CNN(sess=sess, data_format=conf.data_format, history_length=conf.history_length, observation_dims=conf.observation_dims, output_size=env.env.action_space.n, network_header_type=conf.network_header_type, name='target_network', trainable=False) elif conf.network_header_type == 'mlp': pred_network = MLPSmall(sess=sess, observation_dims=conf.observation_dims, history_length=conf.history_length, output_size=env.env.action_space.n, hidden_activation_fn=tf.sigmoid, network_output_type=conf.network_output_type, name='pred_network', trainable=True) target_network = MLPSmall(sess=sess, observation_dims=conf.observation_dims, history_length=conf.history_length, output_size=env.env.action_space.n, hidden_activation_fn=tf.sigmoid, network_output_type=conf.network_output_type, name='target_network', trainable=False) else: raise ValueError('Unkown network_header_type: %s' % (conf.network_header_type)) stat = Statistic(sess, conf.t_test, conf.t_learn_start, model_dir, pred_network.var.values()) agent = TrainAgent(sess, pred_network, env, stat, conf, target_network=target_network) if conf.is_train: agent.train(conf.t_train_max) else: agent.play(conf.ep_end)
def main(_): # preprocess conf.observation_dims = eval(conf.observation_dims) if conf.learning_rate < 0: conf.learning_rate = conf.learning_rate_minimum = 10**(np.random.random()*2-4) for flag in ['memory_size', 't_target_q_update_freq', 't_test', 't_ep_end', 't_train_max', 't_learn_start', 'learning_rate_decay_step', 'entropy_regularization_decay_step']: setattr(conf, flag, getattr(conf, flag) * conf.scale) # for flag in ['learning_rate', 'learning_rate_minimum']: # setattr(conf, flag, getattr(conf, flag) / conf.async_threads) if conf.use_gpu: conf.data_format = 'NCHW' else: conf.data_format = 'NHWC' if conf.model_dir == "": model_dir = get_model_dir(conf, ['use_gpu', 'max_random_start', 'n_worker', 'is_train', 'memory_size', 't_save', 't_train', 'display', 'log_level', 'random_seed', 'tag', 'scale', 'model_dir', 't_train_max']) else: model_dir = 'checkpoints/' + conf.model_dir + '/' device = '/gpu:0' if conf.use_gpu else '/cpu:0' # start gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=calc_gpu_fraction(conf.gpu_fraction)) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess, \ tf.device(device): env_args = [conf.env_name, conf.n_action_repeat, conf.max_random_start, conf.observation_dims, conf.data_format, conf.display] if any(name in conf.env_name for name in ['Corridor', 'FrozenLake']) : Env = ToyEnvironment else: Env = AtariEnvironment if conf.agent_type == 'Replay': env = Env(*env_args) n_actions = env.env.action_space.n elif conf.agent_type == 'Async': env = [Env(*env_args) for _ in range(conf.async_threads)] n_actions = env[0].env.action_space.n else: raise ValueError("Unknown agent_type: %s" % conf.agent_type) if conf.network_header_type in ['nature', 'nips']: NetworkHead = CNN args = {'sess': sess, 'data_format': conf.data_format, 'history_length': conf.history_length, 'observation_dims': conf.observation_dims, 'output_size': n_actions, 'network_output_type': conf.network_output_type} elif conf.network_header_type == 'mlp': NetworkHead = MLPSmall args = {'sess': sess, 'history_length': conf.history_length, 'observation_dims': conf.observation_dims, 'hidden_sizes': [], 'output_size': n_actions, 'hidden_activation_fn': tf.nn.relu, 'network_output_type': conf.network_output_type} else: raise ValueError('Unkown network_header_type: %s' % (conf.network_header_type)) stat = Statistic(sess, conf.t_test, conf.t_learn_start, conf.trace_steps, model_dir) if conf.agent_type == 'Replay': from agents.deep_q import DeepQ pred_network = NetworkHead(name='pred_network', trainable=True, **args) stat.create_writer(pred_network.var.values()) target_network = NetworkHead(name='target_network', trainable=False, **args) agent = DeepQ(sess, pred_network, env, stat, conf, target_network=target_network) elif conf.agent_type == 'Async': from agents.async import Async global_network = NetworkHead(name='global_network', trainable=False, **args) stat.create_writer(global_network.var.values()) target_network = NetworkHead(name='target_network', trainable=False, **args) pred_networks = list( NetworkHead(name=('pred_network_%d'%i), trainable=False, **args) for i in range(conf.async_threads)) if conf.disjoint_a3c: value_networks = list( NetworkHead(name=('value_network_%d'%i), trainable=False, **args) for i in range(conf.async_threads)) else: value_networks = None agent = Async(sess, global_network, target_network, env, stat, conf, pred_networks=pred_networks, value_networks=value_networks) else: raise ValueError('Unkown agent_type: %s' % (conf.agent_type)) if conf.is_train: agent.train(conf.t_train_max) else: agent.play(conf.ep_end)
def main(_): # preprocess conf.observation_dims = eval(conf.observation_dims) for flag in [ 'memory_size', 't_target_q_update_freq', 't_test', 't_ep_end', 't_train_max', 't_learn_start', 'learning_rate_decay_step' ]: setattr(conf, flag, getattr(conf, flag) * conf.scale) if conf.use_gpu: conf.data_format = 'NCHW' else: conf.data_format = 'NHWC' model_dir = get_model_dir(conf, [ 'use_gpu', 'max_random_start', 'n_worker', 'is_train', 'memory_size', 'gpu_fraction', 't_save', 't_train', 'display', 'log_level', 'random_seed', 'tag', 'scale' ]) # start gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=calc_gpu_fraction(conf.gpu_fraction)) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: if any(name in conf.env_name for name in ['Corridor', 'FrozenLake']): env = ToyEnvironment(conf.env_name, conf.n_action_repeat, conf.max_random_start, conf.observation_dims, conf.data_format, conf.display, conf.use_cumulated_reward) else: env = AtariEnvironment(conf.env_name, conf.n_action_repeat, conf.max_random_start, conf.observation_dims, conf.data_format, conf.display, conf.use_cumulated_reward) if conf.network_header_type in ['nature', 'nips']: pred_network = CNN(sess=sess, data_format=conf.data_format, history_length=conf.history_length, observation_dims=conf.observation_dims, output_size=env.env.action_space.n, network_header_type=conf.network_header_type, name='pred_network', trainable=True) target_network = CNN(sess=sess, data_format=conf.data_format, history_length=conf.history_length, observation_dims=conf.observation_dims, output_size=env.env.action_space.n, network_header_type=conf.network_header_type, name='target_network', trainable=False) elif conf.network_header_type == 'mlp': pred_network = MLPSmall( sess=sess, observation_dims=conf.observation_dims, history_length=conf.history_length, output_size=env.env.action_space.n, hidden_activation_fn=tf.sigmoid, network_output_type=conf.network_output_type, name='pred_network', trainable=True) target_network = MLPSmall( sess=sess, observation_dims=conf.observation_dims, history_length=conf.history_length, output_size=env.env.action_space.n, hidden_activation_fn=tf.sigmoid, network_output_type=conf.network_output_type, name='target_network', trainable=False) else: raise ValueError('Unkown network_header_type: %s' % (conf.network_header_type)) stat = Statistic(sess, conf.t_test, conf.t_learn_start, model_dir, pred_network.var.values()) agent = TrainAgent(sess, pred_network, env, stat, conf, target_network=target_network) if conf.is_train: agent.train(conf.t_train_max) else: agent.play(conf.ep_end)
parser.add_argument("--recurrence", type=int, default=1, help="number of time-steps gradient is backpropagated (default: 1). If > 1, a LSTM is added to the model to have memory.") parser.add_argument("--text", action="store_true", default=False, help="add a GRU to the model to handle text input") args = parser.parse_args() args.mem = args.recurrence > 1 # Set run dir date = datetime.datetime.now().strftime("%y-%m-%d-%H-%M-%S") default_model_name = f"{args.env}_{args.algo}_seed{args.seed}_{date}" model_name = args.model or default_model_name model_dir = utils.get_model_dir(model_name) # Load loggers and Tensorboard writer txt_logger = utils.get_txt_logger(model_dir) csv_file, csv_logger = utils.get_csv_logger(model_dir) tb_writer = tensorboardX.SummaryWriter(model_dir) # Log command and all script arguments txt_logger.info("{}\n".format(" ".join(sys.argv))) txt_logger.info("{}\n".format(args)) # Set seed for all randomness sources utils.seed(args.seed)
args = parser.parse_args() # Set seed for all randomness sources utils.seed(args.seed) # Generate environment env = gym.make(args.env) env.seed(args.seed) for _ in range(args.shift): env.reset() # Define agent model_dir = utils.get_model_dir(args.model) agent = utils.Agent(args.env, env.observation_space, model_dir, args.argmax) # Run the agent if args.gif: from array2gif import write_gif frames = [] done = True prev_action = 0 while True: if done: obs = env.reset()
parser.add_argument("--text", action="store_true", default=False, help="add a GRU to the model to handle text input") args = parser.parse_args() args.mem = args.recurrence > 1 # Set run dir date = datetime.datetime.now().strftime("%y-%m-%d-%H-%M-%S") default_model_name = f"{args.env}_{args.algo}_seed{args.seed}_{date}" model_name = args.model or default_model_name model_dir = 'rl/scripts/' + utils.get_model_dir(model_name) # Load loggers and Tensorboard writer txt_logger = utils.get_txt_logger(model_dir) csv_file, csv_logger = utils.get_csv_logger(model_dir) tb_writer = tensorboardX.SummaryWriter(model_dir) # Log command and all script arguments txt_logger.info("{}\n".format(" ".join(sys.argv))) txt_logger.info("{}\n".format(args)) # Set seed for all randomness sources utils.seed(args.seed)
# Set seed for all randomness sources utils.seed(seed) # Load environments envs = [] for i in range(procs): env = utils.make_env(env_eval, seed + 100 * i) # Different envs from training envs.append(env) env = ParallelEnv(envs) print("Environments loaded\n") # Load agents model_dir = utils.get_model_dir(model) agent = utils.Agent(env.observation_space, env.action_space, model_dir, ipo_model, device, argmax, procs) print("Agent loaded\n") # Initialize logs logs = {"num_frames_per_episode": [], "return_per_episode": []} # Run agent on test domain start_time = time.time() obss = env.reset() log_done_counter = 0