def test(): log_config() devs = mx.cpu() if args.gpus is None else [ mx.gpu(int(i)) for i in args.gpus.split(',') ] # module dataiter = rl_data.GymDataIter('scenes', args.batch_size, args.input_length, web_viz=True) print(dataiter.provide_data) net = sym.get_symbol_thor(dataiter.act_dim) module = mx.mod.Module(net, data_names=[d[0] for d in dataiter.provide_data], label_names=('policy_label', 'value_label'), context=devs) module.bind(data_shapes=dataiter.provide_data, label_shapes=[('policy_label', (args.batch_size, )), ('value_label', (args.batch_size, 1))], for_training=False) # load model assert args.load_epoch is not None assert args.model_prefix is not None module.load_params('%s-%04d.params' % (args.model_prefix, args.load_epoch)) N = args.num_epochs * args.num_examples / args.batch_size R = 0 T = 1e-20 score = np.zeros((args.batch_size, )) for t in range(N): dataiter.clear_history() data = dataiter.next() module.forward(data, is_train=False) act = module.get_outputs()[0].asnumpy() act = [ np.random.choice(dataiter.act_dim, p=act[i]) for i in range(act.shape[0]) ] dataiter.act(act) time.sleep(0.05) _, reward, _, done = dataiter.history[0] T += done.sum() score += reward R += (done * score).sum() score *= (1 - done) if t % 100 == 0: logging.info('n %d score: %f T: %f' % (t, R / T, T))
def test(): log_config() devs = mx.cpu() if args.gpus is None else [ mx.gpu(int(i)) for i in args.gpus.split(',')] # module dataiter = robo_data.RobosimsDataIter('scenes', args.batch_size, args.input_length, web_viz=True) print(dataiter.provide_data) net = sym.get_symbol_thor(dataiter.act_dim) module = mx.mod.Module(net, data_names=[d[0] for d in dataiter.provide_data], label_names=('policy_label', 'value_label'), context=devs) module.bind(data_shapes=dataiter.provide_data, label_shapes=[('policy_label', (args.batch_size,)), ('value_label', (args.batch_size, 1))], for_training=False) # load model assert args.load_epoch is not None assert args.model_prefix is not None module.load_params('%s-%04d.params'%(args.model_prefix, args.load_epoch)) N = args.num_epochs * args.num_examples / args.batch_size R = 0 T = 1e-20 score = np.zeros((args.batch_size,)) for t in range(N): dataiter.clear_history() data = dataiter.next() module.forward(data, is_train=False) act = module.get_outputs()[0].asnumpy() act = [np.random.choice(dataiter.act_dim, p=act[i]) for i in range(act.shape[0])] dataiter.act(act) time.sleep(0.05) _, reward, _, done = dataiter.history[0] T += done.sum() score += reward R += (done*score).sum() score *= (1-done) if t % 100 == 0: logging.info('n %d score: %f T: %f'%(t, R/T, T))