def test_dataset(self): args = super_args('supervised1') dataset = ds.build_dataset(args.ds, ProbStack, args.inst, args.obj, store=True) print(len(dataset)) print(dataset.index[0:4]) print(dataset.action[0:4]) # check dimensions assert list(dataset.state[0].shape) == [3, 20, 20] assert list(dataset.next_state[0].shape) == [3, 20, 20] assert list(dataset.action[0].shape) == [5], 'got shape {}'.format( dataset.action[0]) assert all([x[0] < 3 for x in dataset.action]) assert all([x[0] == i for x, i in zip(dataset.action, dataset.index)]) assert np.sum(dataset.state[0]) < np.sum( dataset.next_state[0]) # next is terminal assert dataset.reward[0] == 1. assert np.allclose(dataset.code[0], dataset.code[1]), \ 'target codes should be same for problem' assert dataset.reward[1] < 1., \ 'expected < 1, got {}'.format(dataset.reward[1]) assert np.allclose(dataset.state[0], dataset.next_state[1]) assert np.sum(dataset.state[2]) == 0, \ 'expected initial state to be all zeros, got {}'.format(np.sum(dataset.state[2]))
def test_gbp_run(self): trainer = self._setup_test_trainer(PolicyDiscContAG) args = trainer.args args.gbp.policy_steps = 2 args.gbp.action_steps = 2 # args.ds.post_process = dataset = ds.build_dataset(args.ds, ProbStack, args.inst, args.obj) trainer.train_distGBP(dataset, args.gbp)
def test_super_run(self): args = super_args('supervised2') zdim = 64 enc = EncodeState4(args.inst.num_spaces, zdim) dec = DecodeState4(args.inst.num_spaces, zdim) model = GBP(enc, dec, 5, zdim, 12, shared_size=12) trainer = GBPTrainer(None, model=model, argobj=args) dataset = ds.build_dataset(args.ds, ProbStack, args.inst, args.obj) print(len(dataset)) trainer.train_vanilla_supervised(dataset, args.train)
def test_debug1(self): trainer = self._setup_test_trainer() args = trainer.args args.train.testing = True dataset = ds.build_dataset(args.ds, ProbStack, args.inst, args.obj) trainer.train_vanilla_supervised(dataset, args.train) trainer.tr_supervised_episode(dataset, args.train) trainer.train_distGBP(dataset, args.train)
def test_debug2(self): """ TEST ALL TRAINING SEQUENCES """ params = [ (PolicyAllLogitsAG, ds.to_disc_disc), (PolicySimple, ds.to_vec), (PolicyAllLogitsIndependent, ds.to_disc_disc), (PolicyDiscContAG, ds.to_disc_cont), (PolicyDiscContGA, ds.to_disc_cont), (PolicyDiscContIndependant, ds.to_disc_cont), ] step = 0 for (p, infn) in params: print('\n-------------\n{}\n------------\nstarting{} {} {}'.format( step, p.__name__, infn, step)) args = super_args('na-{}{}') args.train.testing = True args.train.steps = 5 args.train.episodes = 2 args.ds.post_process = infn args.gbp.action_steps = 2 args.gbp.policy_steps = 2 args.ds.num_problems = 10 dataset = ds.build_dataset(args.ds, ProbStack, args.inst, args.obj) am = None if infn == ds.to_disc_cont: am = ActionReg([OneHot(3), CoordVec(4)], name='dc') elif infn == ds.to_cont_cont: am = ActionReg([CoordVec(1), CoordVec(4)], name='cc') elif infn == ds.to_vec: am = ActionReg([CoordVec(5)], name='vec') elif infn == ds.to_disc_disc: am = ActionReg([OneHot(3), OneHot([4, 20])], name='dd') trainer = self._setup_test_trainer(pclas=p, action_model=am, geomtry_fn=Noop()) trainer.action_model = am trainer.train_vanilla_supervised(dataset, args.train) print('vanilla .') trainer.tr_supervised_episode(dataset, args.train) print('tr_supervised .') trainer.train_distGBP(dataset, args.gbp) print('distGBP .') step += 1 print('*******************\nALL TESTS\n*******************')
def test_gbp_grad(self): """ """ trainer = self._setup_test_trainer( PolicyDiscContAG, './data/trained/ May-21-2019-12:23AM--tr_sup.pkl') args = trainer.args dataset = ds.build_dataset(args.ds, ProbStack, args.inst, args.obj) state = trainer.state_to_observation(dataset[1]) # print(state[0].size(), state[1].size()) print(trainer._lr) best_states, best_actions = trainer.gbp_step(state, args.gbp) print(best_actions)
def test_tr_run(self): args = super_args('tr_sup') zdim = 64 enc = EncodeState4(args.inst.num_spaces, zdim) dec = DecodeState4(args.inst.num_spaces, zdim) p = PolicyDiscContAG(zdim, shape=[3, 20, 20], geomtry_fn=Noop) model = GBP(enc, dec, 5, zdim, 12, shared_size=12, policy=p) trainer = GBPTrainer(None, model=model, argobj=args) dataset = ds.build_dataset(args.ds, ProbStack, args.inst, args.obj) print(len(dataset), args.train.episodes * args.ds.num_problems, args.ds.num_options) trainer.train_tr_supervised(dataset, args.train)
def run_model(self, policy, op): args = super_args('supervised2-{}{}'.format(op.__name__, policy.__name__)) zdim = 100 enc = EncodeState4(args.inst.num_spaces, zdim) dec = DecodeState4(args.inst.num_spaces, zdim) p = policy(zdim, shape=[3, 20, 20], geomtry_fn=op) model = GBP(enc, dec, 5, zdim, 12, shared_size=12, policy=p) trainer = GBPTrainer(None, model=model, argobj=args) dataset = ds.build_dataset(args.ds, ProbStack, args.inst, args.obj) print('\n-------------\n steps {}'.format( args.train.episodes * args.ds.num_problems, args.ds.num_options)) trainer.train_vanilla_supervised(dataset, args.train)
def test_gbp_gstep(self): """ """ trainer = self._setup_test_trainer(PolicyDiscContAG) args = trainer.args model = trainer.model dataset = ds.build_dataset(args.ds, ProbStack, args.inst, args.obj) state = trainer.state_to_observation(dataset[1]) action_noise = D.Normal(torch.zeros(5), torch.zeros(5).fill_(args.gbp.sigma)) xs_ = action_noise.sample().to(trainer.device) xs = torch.tensor(xs_, requires_grad=True, device=trainer.device) action = F.softmax(xs.unsqueeze(0)) states = model.transition(state, xs.unsqueeze(0)) reward = model.reward(state, action) full = model(state) print(full[0]) print(reward) print(xs) dx = dfdx(reward.sum(), xs) print(dx)