Example #1
0
    def test_dataset(self):
        args = super_args('supervised1')
        dataset = ds.build_dataset(args.ds,
                                   ProbStack,
                                   args.inst,
                                   args.obj,
                                   store=True)
        print(len(dataset))
        print(dataset.index[0:4])
        print(dataset.action[0:4])
        # check dimensions
        assert list(dataset.state[0].shape) == [3, 20, 20]
        assert list(dataset.next_state[0].shape) == [3, 20, 20]
        assert list(dataset.action[0].shape) == [5], 'got shape {}'.format(
            dataset.action[0])
        assert all([x[0] < 3 for x in dataset.action])
        assert all([x[0] == i for x, i in zip(dataset.action, dataset.index)])

        assert np.sum(dataset.state[0]) < np.sum(
            dataset.next_state[0])  # next is terminal
        assert dataset.reward[0] == 1.
        assert np.allclose(dataset.code[0], dataset.code[1]), \
            'target codes should be same for problem'

        assert dataset.reward[1] < 1., \
            'expected < 1, got {}'.format(dataset.reward[1])
        assert np.allclose(dataset.state[0], dataset.next_state[1])
        assert np.sum(dataset.state[2]) == 0, \
            'expected initial state to be all zeros, got {}'.format(np.sum(dataset.state[2]))
Example #2
0
    def test_gbp_run(self):
        trainer = self._setup_test_trainer(PolicyDiscContAG)
        args = trainer.args
        args.gbp.policy_steps = 2
        args.gbp.action_steps = 2
        # args.ds.post_process =

        dataset = ds.build_dataset(args.ds, ProbStack, args.inst, args.obj)
        trainer.train_distGBP(dataset, args.gbp)
Example #3
0
 def test_super_run(self):
     args = super_args('supervised2')
     zdim = 64
     enc = EncodeState4(args.inst.num_spaces, zdim)
     dec = DecodeState4(args.inst.num_spaces, zdim)
     model = GBP(enc, dec, 5, zdim, 12, shared_size=12)
     trainer = GBPTrainer(None, model=model, argobj=args)
     dataset = ds.build_dataset(args.ds, ProbStack, args.inst, args.obj)
     print(len(dataset))
     trainer.train_vanilla_supervised(dataset, args.train)
Example #4
0
    def test_debug1(self):
        trainer = self._setup_test_trainer()
        args = trainer.args
        args.train.testing = True

        dataset = ds.build_dataset(args.ds, ProbStack, args.inst, args.obj)

        trainer.train_vanilla_supervised(dataset, args.train)
        trainer.tr_supervised_episode(dataset, args.train)
        trainer.train_distGBP(dataset, args.train)
Example #5
0
    def test_debug2(self):
        """
        TEST ALL TRAINING SEQUENCES

        """
        params = [
            (PolicyAllLogitsAG, ds.to_disc_disc),
            (PolicySimple, ds.to_vec),
            (PolicyAllLogitsIndependent, ds.to_disc_disc),
            (PolicyDiscContAG, ds.to_disc_cont),
            (PolicyDiscContGA, ds.to_disc_cont),
            (PolicyDiscContIndependant, ds.to_disc_cont),
        ]
        step = 0
        for (p, infn) in params:
            print('\n-------------\n{}\n------------\nstarting{} {} {}'.format(
                step, p.__name__, infn, step))

            args = super_args('na-{}{}')
            args.train.testing = True
            args.train.steps = 5
            args.train.episodes = 2

            args.ds.post_process = infn

            args.gbp.action_steps = 2
            args.gbp.policy_steps = 2
            args.ds.num_problems = 10

            dataset = ds.build_dataset(args.ds, ProbStack, args.inst, args.obj)

            am = None
            if infn == ds.to_disc_cont:
                am = ActionReg([OneHot(3), CoordVec(4)], name='dc')
            elif infn == ds.to_cont_cont:
                am = ActionReg([CoordVec(1), CoordVec(4)], name='cc')
            elif infn == ds.to_vec:
                am = ActionReg([CoordVec(5)], name='vec')
            elif infn == ds.to_disc_disc:
                am = ActionReg([OneHot(3), OneHot([4, 20])], name='dd')

            trainer = self._setup_test_trainer(pclas=p,
                                               action_model=am,
                                               geomtry_fn=Noop())
            trainer.action_model = am
            trainer.train_vanilla_supervised(dataset, args.train)
            print('vanilla .')
            trainer.tr_supervised_episode(dataset, args.train)
            print('tr_supervised .')
            trainer.train_distGBP(dataset, args.gbp)
            print('distGBP .')
            step += 1

        print('*******************\nALL TESTS\n*******************')
Example #6
0
 def test_gbp_grad(self):
     """ """
     trainer = self._setup_test_trainer(
         PolicyDiscContAG,
         './data/trained/ May-21-2019-12:23AM--tr_sup.pkl')
     args = trainer.args
     dataset = ds.build_dataset(args.ds, ProbStack, args.inst, args.obj)
     state = trainer.state_to_observation(dataset[1])
     # print(state[0].size(), state[1].size())
     print(trainer._lr)
     best_states, best_actions = trainer.gbp_step(state, args.gbp)
     print(best_actions)
Example #7
0
    def test_tr_run(self):
        args = super_args('tr_sup')
        zdim = 64
        enc = EncodeState4(args.inst.num_spaces, zdim)
        dec = DecodeState4(args.inst.num_spaces, zdim)

        p = PolicyDiscContAG(zdim, shape=[3, 20, 20], geomtry_fn=Noop)

        model = GBP(enc, dec, 5, zdim, 12, shared_size=12, policy=p)
        trainer = GBPTrainer(None, model=model, argobj=args)
        dataset = ds.build_dataset(args.ds, ProbStack, args.inst, args.obj)
        print(len(dataset), args.train.episodes * args.ds.num_problems,
              args.ds.num_options)
        trainer.train_tr_supervised(dataset, args.train)
Example #8
0
    def run_model(self, policy, op):
        args = super_args('supervised2-{}{}'.format(op.__name__,
                                                    policy.__name__))
        zdim = 100
        enc = EncodeState4(args.inst.num_spaces, zdim)
        dec = DecodeState4(args.inst.num_spaces, zdim)
        p = policy(zdim, shape=[3, 20, 20], geomtry_fn=op)
        model = GBP(enc, dec, 5, zdim, 12, shared_size=12, policy=p)

        trainer = GBPTrainer(None, model=model, argobj=args)
        dataset = ds.build_dataset(args.ds, ProbStack, args.inst, args.obj)
        print('\n-------------\n steps {}'.format(
            args.train.episodes * args.ds.num_problems, args.ds.num_options))

        trainer.train_vanilla_supervised(dataset, args.train)
Example #9
0
    def test_gbp_gstep(self):
        """ """
        trainer = self._setup_test_trainer(PolicyDiscContAG)
        args = trainer.args
        model = trainer.model

        dataset = ds.build_dataset(args.ds, ProbStack, args.inst, args.obj)
        state = trainer.state_to_observation(dataset[1])

        action_noise = D.Normal(torch.zeros(5),
                                torch.zeros(5).fill_(args.gbp.sigma))
        xs_ = action_noise.sample().to(trainer.device)
        xs = torch.tensor(xs_, requires_grad=True, device=trainer.device)

        action = F.softmax(xs.unsqueeze(0))
        states = model.transition(state, xs.unsqueeze(0))
        reward = model.reward(state, action)
        full = model(state)
        print(full[0])
        print(reward)
        print(xs)
        dx = dfdx(reward.sum(), xs)
        print(dx)