예제 #1
0
def test():
    log_config()

    devs = mx.cpu() if args.gpus is None else [
        mx.gpu(int(i)) for i in args.gpus.split(',')
    ]

    # module
    dataiter = rl_data.GymDataIter('scenes',
                                   args.batch_size,
                                   args.input_length,
                                   web_viz=True)
    print(dataiter.provide_data)
    net = sym.get_symbol_thor(dataiter.act_dim)
    module = mx.mod.Module(net,
                           data_names=[d[0] for d in dataiter.provide_data],
                           label_names=('policy_label', 'value_label'),
                           context=devs)
    module.bind(data_shapes=dataiter.provide_data,
                label_shapes=[('policy_label', (args.batch_size, )),
                              ('value_label', (args.batch_size, 1))],
                for_training=False)

    # load model
    assert args.load_epoch is not None
    assert args.model_prefix is not None
    module.load_params('%s-%04d.params' % (args.model_prefix, args.load_epoch))

    N = args.num_epochs * args.num_examples / args.batch_size

    R = 0
    T = 1e-20
    score = np.zeros((args.batch_size, ))
    for t in range(N):
        dataiter.clear_history()
        data = dataiter.next()
        module.forward(data, is_train=False)
        act = module.get_outputs()[0].asnumpy()
        act = [
            np.random.choice(dataiter.act_dim, p=act[i])
            for i in range(act.shape[0])
        ]
        dataiter.act(act)
        time.sleep(0.05)
        _, reward, _, done = dataiter.history[0]
        T += done.sum()
        score += reward
        R += (done * score).sum()
        score *= (1 - done)

        if t % 100 == 0:
            logging.info('n %d score: %f T: %f' % (t, R / T, T))
예제 #2
0
파일: a3c.py 프로젝트: Johnqczhang/mxnet
def test():
    log_config()

    devs = mx.cpu() if args.gpus is None else [
        mx.gpu(int(i)) for i in args.gpus.split(',')]

    # module
    dataiter = robo_data.RobosimsDataIter('scenes', args.batch_size, args.input_length, web_viz=True)
    print(dataiter.provide_data)
    net = sym.get_symbol_thor(dataiter.act_dim)
    module = mx.mod.Module(net, data_names=[d[0] for d in dataiter.provide_data], label_names=('policy_label', 'value_label'), context=devs)
    module.bind(data_shapes=dataiter.provide_data,
                label_shapes=[('policy_label', (args.batch_size,)), ('value_label', (args.batch_size, 1))],
                for_training=False)

    # load model
    assert args.load_epoch is not None
    assert args.model_prefix is not None
    module.load_params('%s-%04d.params'%(args.model_prefix, args.load_epoch))

    N = args.num_epochs * args.num_examples / args.batch_size

    R = 0
    T = 1e-20
    score = np.zeros((args.batch_size,))
    for t in range(N):
        dataiter.clear_history()
        data = dataiter.next()
        module.forward(data, is_train=False)
        act = module.get_outputs()[0].asnumpy()
        act = [np.random.choice(dataiter.act_dim, p=act[i]) for i in range(act.shape[0])]
        dataiter.act(act)
        time.sleep(0.05)
        _, reward, _, done = dataiter.history[0]
        T += done.sum()
        score += reward
        R += (done*score).sum()
        score *= (1-done)

        if t % 100 == 0:
            logging.info('n %d score: %f T: %f'%(t, R/T, T))