def main():
    # hyper parameters
    env = NavigationTask()
    input_size = np.shape(env.getStateRep(True))
    hparams = {'input_size': input_size, 'num_actions': 10, 'epiode_length': 7}

    # environment params
    eparams = {'num_batches': 100, 'ep_per_batch': 128}

    numIts = 5

    print("Starting Policy Gradient")
    for i in range(numIts):
        print("######################")
        print("Try Number: ", i)
        with tf.Graph().as_default(), tf.Session() as sess:

            pi = SimplePolicy(hparams['input_size'], hparams['num_actions'])

            sess.run(tf.initialize_all_variables())

            for batch in range(0, eparams['num_batches']):
                num = 0
                total = 0
                for i in range(0, eparams['ep_per_batch']):
                    obs, acts, rews = policyRollout(pi, hparams)
                    num += 1 if 1 in rews else 0
                    total += 1
                    pi.train_step(obs, acts, rews)
                if batch % 50 == 0:
                    _, loss = pi.train_step(obs, acts, rews)
                    print("Accuracy ", batch, " : ", str(num / total))
 def generateTask(px, py, orien, gx, gy):
     direction = NavigationTask.oriens[orien]
     gs = np.array([gx, gy])
     env = NavigationTask(
         agent_start_pos=[np.array([px, py]), direction],
         goal_pos=gs)
     return env
def policyRollout(agent, hparams):

    #"Runs one episode"
    episode_length = hparams['epiode_length']
    env = NavigationTask()
    obs, acts, rews = [], [], []

    for i in range(0, episode_length):

        state = env.getStateRep(True)
        obs.append(state)
        actionProb, sampleAction = agent.act_inference(state)

        action = actionProb.argmax()
        sampleActionIndex = sampleAction.argmax()

        env.performAction(action)
        newState = env.getStateRep()
        reward = env.getReward()

        values = [action]
        acts.append(np.squeeze(np.eye(hparams['num_actions'])[values]))
        rews.append(reward)

    return obs, acts, rews
def mainQ():

    ts = "navigation-data-train-sequence-singularDiscrete.pickle"
    vs = "navigation-data-test-sequence-singularDiscrete.pickle"

    exampleEnv = NavigationTask()
    #trainSeqs = SingDiscSeqData(ts,exampleEnv)
    validSeqs = SingDiscSeqData(vs, exampleEnv)

    f = ForwardModelLSTM_SD(exampleEnv)
Beispiel #5
0
def main():
    ts = "navigation-data-state_to_reward-train.pickle"
    vs = "navigation-data-state_to_reward-valid.pickle"
    ############
    print('Reading Data')
    with open(ts,'rb') as inFile:
        print('\tReading',ts); trainSet = pickle.load(inFile)
    with open(vs,'rb') as inFile:
        print('\tReading',vs); validSet = pickle.load(inFile)
    env = NavigationTask()
    greedyvp = GreedyValuePredictor(env)
    greedyvp.train( trainSet, validSet)
    def generateTask(px,py,orien,gx,gy):
        direction = NavigationTask.oriens[orien]
        gs = np.array([gx, gy])
        env = NavigationTask(agent_start_pos=[np.array([px,py]), direction],goal_pos=gs)
        return env
    env = generateTask(0,1,2,3,2)
    state = avar( torch.FloatTensor(env.getStateRep()), requires_grad=False).view(1,-1)
    print(state.shape)
    greedyvp.forward(state).data.numpy()
    torch.save(greedyvp.state_dict(), "greedy_value_predictor")
def main():
    f_model_name = 'LSTM_FM_1_99'
    gvp_model_name = "greedy_value_predictor_3"

    numRepeats = 5
    tasks = [[6, generateTask(0, 0, 0, 12, 10)]]

    exampleEnv = NavigationTask()
    ForwardModel = LSTMForwardModel(74, 64)
    ForwardModel.load_state_dict(torch.load(f_model_name))
    GreedyVP = GreedyValuePredictor(exampleEnv)
    GreedyVP.load_state_dict(torch.load(gvp_model_name))

    print("Running the tasks")
    for i, task in enumerate(tasks):
        for j in range(numRepeats):
            task_state = task[1].getStateRep(oneHotOutput=False)
            px = int(task_state[0])
            py = int(task_state[1])
            orien = np.argmax(task_state[2:6])
            gx = int(task_state[-2])
            gy = int(task_state[-1])
            print("$$###############################")
            print("Repeat " + str(j) + " for " + str(gx) + " , " + str(gy))
            #print('www',px,py,orien,gx,gy)
            cenv = generateTask(px, py, orien, gx, gy)
            SimPolicy = SimulationPolicy(cenv)
            SimPolicy.trainSad(ForwardModel,
                               GreedyVP,
                               maxDepth=task[0],
                               niters=2000)

            s_0 = torch.unsqueeze(avar(torch.FloatTensor(cenv.getStateRep())),
                                  dim=0)
            tree = Tree(s_0, ForwardModel, SimPolicy, greedy_valueF, cenv,
                        task[0], 2)
            states, actions = tree.getBestPlan()

            for i in range(len(actions)):
                cenv.performAction(actions[i][0].data.numpy().argmax())
            r = cenv.getReward()
            correct = (r == 1)

            #print('Correct?',correct)
            if correct:
                print('Correct final state', str(gx), str(gy))
                torch.save(
                    SimPolicy.state_dict(), "SimPolicy_solve_" + str(gx) +
                    "_" + str(gy) + "_" + str(j))
def mainOld():

    ####################################################
    useFFANN = True
    trainingFFANN = False  # 1
    henaffHyperSearch = False  # 4
    runHenaffFFANN = False  # 5
    manualTest = False  # 2
    ####################################################

    if len(sys.argv) > 1:
        if sys.argv[1] == '1': trainingFFANN = True
        if sys.argv[1] == '2': manualTest = True
        if sys.argv[1] == '3': autoTest = True
        if sys.argv[1] == '4': henaffHyperSearch = True
        if sys.argv[1] == '5': runHenaffFFANN = True

    if useFFANN:

        f_model_name = 'forward-ffann-singDisc-noisy-2.pt'
        exampleEnv = NavigationTask()
        f = ForwardModelFFANN(exampleEnv)

        ################################################################################################################
        if trainingFFANN:
            ############
            ts = "navigation-data-train-single-singularDiscrete.pickle"
            vs = "navigation-data-test-single-singularDiscrete.pickle"
            preload_name = None
            saveName = 'forward-ffann-singDisc-noisy-3.pt'
            ############
            print('Reading Data')
            with open(ts, 'rb') as inFile:
                print('\tReading', ts)
                trainSet = pickle.load(inFile)
            with open(vs, 'rb') as inFile:
                print('\tReading', vs)
                validSet = pickle.load(inFile)
            if not preload_name is None:
                print('Loading from', f_model_name)
                f.load_state_dict(torch.load(f_model_name))
            f.runTraining(trainSet,
                          validSet,
                          maxIters=50000,
                          modelFilenameToSave=saveName,
                          testEvery=100)

        if manualTest:  # 2
            print('Loading from', f_model_name)
            f.load_state_dict(torch.load(f_model_name))
            print('Environment states')
            ###
            start_px = 7
            start_py = 9
            start_orien = 1
            action = [5, 1, 5]
            ###
            cstate = avar(
                torch.FloatTensor(
                    exampleEnv.singularDiscreteStateFromInts(
                        start_px, start_py, start_orien))).unsqueeze(0)
            for act in action:
                action1h = avar(
                    torch.FloatTensor(exampleEnv._intToOneHot(
                        act, 10))).unsqueeze(0)
                inputVal = torch.cat([cstate, action1h], dim=1)
                cstate = f.forward(inputVal)
            print(cstate)
            print("sx,sy,sorien =", start_px, ',', start_py, ',', start_orien)
            print("As =",
                  ",".join([NavigationTask.actions[a] for a in action]))
            print(
                "px,py,orien =",
                f.env.singularDiscreteStateToInts(
                    cstate.squeeze(0).data.numpy()))

        ################################################################################################################
        if runHenaffFFANN:  # 5
            print('Loading from', f_model_name)
            f.load_state_dict(torch.load(f_model_name))
            print('Environment states')
            start_px = 0
            start_py = 0
            start_orien = 0
            start_state = exampleEnv.singularDiscreteStateFromInts(
                start_px, start_py, start_orien)
            goal_state = [0, 2]
            print('Building planner')
            planner = HenaffPlanner(f, maxNumActions=2)
            print('Starting generation')
            actions = planner.generatePlan(
                start_state,  # The starting state of the agent (one-hot singDisc)
                goal_state,  # The goal state of the agent as two ints (e.g. [gx,gy])
                eta=0.01,  # The learning rate given to ADAM
                noiseSigma=
                None,  # Noise strength on inputs. Overwrites the default setting from the init
                niters=
                None,  # Number of optimization iterations. Overwrites the default setting from the init
                useCE=
                False,  # Specifies use of the cross-entropy loss, taken over subvectors of the state
                verbose=False,  # Specifies verbosity
                extraVerbose=False,  # Specifies extra verbosity
                useGumbel=
                False,  # Whether to use Gumbel-Softmax in the action sampling
                temp=0.01,  # The temperature of the Gumbel-Softmax method
                lambda_h=
                0.0  # Specify the strength of entropy regularization (negative values encourage entropy)
            )
            print('START STATE:', start_px, start_py, start_orien)
            print(
                'FINAL ACTIONS:', ", ".join([
                    str(a) + ' (' + NavigationTask.actions[a] + ')'
                    for a in actions
                ]))
            print('GOAL STATE:', goal_state)
            newEnv = NavigationTask(agent_start_pos=[
                np.array([start_px, start_py]),
                NavigationTask.oriens[start_orien]
            ],
                                    goal_pos=np.array(goal_state))
            for action in actions:
                newEnv.performAction(action)
            state = newEnv.getStateRep(oneHotOutput=False)
            pred_x = state[0]
            pred_y = state[1]
            pred_orien = NavigationTask.oriens[np.argmax(state[2:6])]
            print('PREDICTED FINAL STATE:', pred_x, pred_y, pred_orien)

        ################################################################################################################
        if henaffHyperSearch:
            print('Loading ', f_model_name)
            f.load_state_dict(torch.load(f_model_name))

            ##################### Hyper-params #####################
            # lambda_hs = [0.0,0.01,-0.01,0.05,-0.05,0.005,-0.005]            # Entropy strength
            # etas = [0.5,0.25,0.1,0.05,0.025,0.01,0.005,0.001,0.0005]        # Learning rate
            # useGumbels = [True,False]                                       # Whether to use Gumbel-softmax
            # temperatures = [0.1,0.01,0.001,1.0]                             # Temperature for Gumbel-softmax
            # noiseSigmas = [0.0,0.01,0.02,0.05,0.1,0.25,0.5,0.75,1.0,1.25]   # Noise strength on input
            ## Init try
            # lambda_hs = [0.0,0.005,-0.005]                                  # Entropy strength
            # etas = [0.5,0.25,0.1,0.05,0.025,0.01,0.005,0.001,0.0005]        # Learning rate
            # useGumbels = [True,False]                                       # Whether to use Gumbel-softmax
            # temperatures = [0.1,0.01,0.001]                             # Temperature for Gumbel-softmax
            # noiseSigmas = [0.0,0.05,0.1,0.5,1.0]   # Noise strength on input
            ## Only use ones with decent results
            lambda_hs = [0.0, -0.005]  # Entropy strength
            etas = [0.5, 0.25, 0.1, 0.005]  # Learning rate
            useGumbels = [True, False]  # Whether to use Gumbel-softmax
            temperatures = [0.1, 0.001]  # Temperature for Gumbel-softmax
            noiseSigmas = [0.5, 1.0]  # Noise strength on input
            ########################################################

            ###### Settings ######
            niters = 75
            verbose = False
            extraVerbose = False
            numRepeats = 10
            fileToWriteTo = 'hyper-param-results.txt'  # Set to None to do no writing
            distType = 1  # 0 = MSE, 1 = CE, 2 = dist

            ######################

            # Build an env with the given INT inputs
            def generateTask(px, py, orien, gx, gy):
                direction = NavigationTask.oriens[orien]
                gs = np.array([gx, gy])
                env = NavigationTask(
                    agent_start_pos=[np.array([px, py]), direction],
                    goal_pos=gs)
                return env

            # Function for running a single suite of tests (on one hyper-param set)
            def runTests(lh, eta, noiseLevel, ug, cnum, temp=None, distType=0):
                # Define tasks
                tasks = [[1, generateTask(0, 0, 0, 0, 2)],
                         [1, generateTask(5, 5, 1, 8, 5)],
                         [1, generateTask(3, 2, 2, 3, 0)],
                         [1, generateTask(9, 9, 3, 7, 9)],
                         [2, generateTask(0, 0, 0, 0, 6)],
                         [2, generateTask(0, 0, 0, 0, 8)],
                         [2, generateTask(2, 3, 0, 2, 8)],
                         [2, generateTask(0, 0, 0, 0, 10)],
                         [3, generateTask(1, 1, 0, 2, 2)]]
                # Choose dist type
                if distType == 0:
                    useCE = False
                    intDist = False
                elif distType == 1:
                    useCE = True
                    intDist = False
                elif distType == 2:
                    useCE = False
                    intDist = True
                # Display status
                wstring = cnum + ',lambda_h=' + str(lh) + ',eta=' + str(
                    eta) + ',sigma=' + str(noiseLevel) + ',dType=' + str(
                        distType) + ',ug=' + str(ug)
                if ug: wstring += ',temp=' + str(temp)
                # For each tasks, repeated a few times, attempt to solve the problem
                score, tot = 0, 0
                for i, task in enumerate(tasks):
                    #print(i)
                    for _ in range(numRepeats):
                        planner = HenaffPlanner(f, maxNumActions=task[0])
                        cenv = task[1]
                        actions = planner.generatePlan(
                            cenv.getStateRep(oneHotOutput=True),
                            eta=eta,
                            noiseSigma=noiseLevel,
                            niters=niters,
                            goal_state=None,
                            useCE=True,
                            verbose=verbose,
                            extraVerbose=extraVerbose,
                            useGumbel=ug,
                            temp=temp,
                            lambda_h=lh,
                            useIntDistance=intDist)
                        # Check for correctness
                        for a in actions:
                            cenv.performAction(a)
                        r = cenv.getReward()
                        correct = (r == 1)
                        tot += 1
                        if correct: score += 1
                wstring += ' -> Score:' + str(score) + '/' + str(tot)
                print(wstring)
                # Write output
                if not fileToWriteTo is None:
                    with open(fileToWriteTo, 'a') as filehandle:
                        filehandle.write(wstring + '\n')

            # Run tasks over all hyper-parameter settings
            N_p, cp = len(lambda_hs) * len(etas) * len(noiseSigmas) * (
                1 + len(temperatures)), 1
            for lambda_h in lambda_hs:
                for eta in etas:
                    for noiseLevel in noiseSigmas:
                        for ug in useGumbels:
                            if ug:
                                for temp in temperatures:
                                    ps = str(cp) + '/' + str(N_p)
                                    runTests(lambda_h,
                                             eta,
                                             noiseLevel,
                                             ug,
                                             ps,
                                             temp,
                                             distType=distType)
                                    cp += 1
                            else:
                                ps = str(cp) + '/' + str(N_p)
                                runTests(lambda_h,
                                         eta,
                                         noiseLevel,
                                         ug,
                                         ps,
                                         distType=distType)
                                cp += 1
def main():

    ####### Settings #######
    preloadModel = True
    runTraining = True  # Task 1
    testHenaff = False  # Task 2
    testFM = False  # Task 3
    ########################

    ############################ External Files ############################
    ts = "navigation-data-train-sequence-singularDiscrete.pickle"
    vs = "navigation-data-test-sequence-singularDiscrete.pickle"
    f_model_name_to_preload = 'forward-lstm-singDisc-TF0-ns-7.pt'
    f_model_name_to_save = 'forward-lstm-singDisc-TF0-ns-9.pt'  # For training
    ########################################################################

    # Define shell environment and empty forward model
    exampleEnv = NavigationTask()
    f = ForwardModelLSTM_SD(exampleEnv)

    # Preload the forward model, if wanted
    if preloadModel and not f_model_name_to_preload is None:
        f.load_state_dict(torch.load(f_model_name_to_preload))

    # Run training if desired
    if runTraining:
        trainSeqs = SingDiscSeqData(ts, exampleEnv)
        validSeqs = SingDiscSeqData(vs, exampleEnv)
        f.runTraining(
            trainSeqs,
            validSeqs,
            modelFilenameToSave=f_model_name_to_save,
            noiseSigma=0.01  # Note: does nothing
        )

    if testFM:
        # Start Location
        start_px = 1
        start_py = 1
        start_orien = 0
        start_state = exampleEnv.singularDiscreteStateFromInts(
            start_px, start_py, start_orien)
        # Actions
        actions = np.zeros((5, 10))
        actions[0, 8] = 1.0
        actions[1, 7] = 1.0
        actions[2, 2] = 1.0
        actions[3, 7] = 1.0
        actions[4, 8] = 1.0
        actions = avar(torch.FloatTensor(actions))
        #print(actions)
        #print(actions[0])
        # Get Prediction
        start_state = avar(torch.FloatTensor(start_state))
        outputs, hidden = f.runOnActionSequence(start_state.unsqueeze(0),
                                                actions,
                                                hidden=None)
        finalOutput = outputs[-1]
        print(finalOutput.max())
        print('Pred final state',
              f.env.singularDiscreteStateToInts(finalOutput.data.numpy()[0]))

    #
    if testHenaff:
        print('Environment states')
        start_px = 0
        start_py = 0
        start_orien = 0
        start_state = exampleEnv.singularDiscreteStateFromInts(
            start_px, start_py, start_orien)
        goal_state = [0, 2]
        maxNumActions = 2
        print('Building planner')
        planner = HenaffPlanner(f, maxNumActions=maxNumActions)
        print('Starting generation')
        actions = planner.generatePlan(
            start_state,  # The starting state of the agent (one-hot singDisc)
            goal_state,  # The goal state of the agent as two ints (e.g. [gx,gy])
            eta=0.01,  # The learning rate given to ADAM
            noiseSigma=
            0.25,  # Noise strength on inputs. Overwrites the default setting from the init
            niters=
            100,  # Number of optimization iterations. Overwrites the default setting from the init
            useGumbel=
            True,  # Whether to use Gumbel-Softmax in the action sampling
            temp=1.0,  # The temperature of the Gumbel-Softmax method
            lambda_h=
            0.0  # Specify the strength of entropy regularization (negative values encourage entropy)
        )
        print('START STATE:', start_px, start_py, start_orien)
        print(
            'FINAL ACTIONS:', ", ".join([
                str(a) + ' (' + NavigationTask.actions[a] + ')'
                for a in actions
            ]))
        print('GOAL STATE:', goal_state)
        newEnv = NavigationTask(agent_start_pos=[
            np.array([start_px, start_py]), NavigationTask.oriens[start_orien]
        ],
                                goal_pos=np.array(goal_state))
        for action in actions:
            newEnv.performAction(action)
        state = newEnv.getStateRep(oneHotOutput=False)
        pred_x = state[0]
        pred_y = state[1]
        pred_orien = NavigationTask.oriens[np.argmax(state[2:6])]
        print('PREDICTED FINAL STATE:', pred_x, pred_y, pred_orien)
Beispiel #9
0
def navmain():
    env = NavigationTask()  #(stochasticity=0.2)

    state_i = env.getStateRep()

    #get goal state
    state_f = env.getStateRep()
    inds = np.cumsum([0, env.w, env.h, len(env.oriens), env.w, env.h])
    state_f[inds[0]:inds[1]] = env._intToOneHot(env.goal_pos[0], env.w)
    state_f[inds[1]:inds[2]] = env._intToOneHot(env.goal_pos[1], env.h)

    state_i = env.getStateRep()  #get initial state

    #we want the goal state so replce the first position vector with the goal position vector
    with tf.Graph().as_default(), tf.Session() as sess:
        numActions = 2
        nIters = 100
        hp = Henaff_Planning(numActions, 10, 64, nIters,
                             0.0000001)  #initialize hennaff planning method
        init = tf.variables_initializer(hp.trainable_vars)
        sess.run(init)
        print(state_i, state_f)
        action_sequence = hp.optimize(state_i, state_f, env)

    #convert action sequence to [num_action,] action numer ids
    action_sequence = np.argmax(action_sequence, 1)
    action_sequence = np.reshape(action_sequence, [
        len(action_sequence),
    ])

    for action in action_sequence:
        print('\n')
        print('-Initial State-')
        env.display()
        print('-Action Taken-')
        env.performAction(action)
        print(env.actions[action])
        print('-Resultant State-')
        env.display()
Beispiel #10
0
            print('--')
        # Return the final action sequence
        return [x.max(0)[1].data[0] for x in x_t]


# Build an env with the given INT inputs
def generateTask(px, py, orien, gx, gy):
    direction = NavigationTask.oriens[orien]
    gs = np.array([gx, gy])
    env = NavigationTask(agent_start_pos=[np.array([px, py]), direction],
                         goal_pos=gs)
    return env


# Function for running a single suite of tests (on one hyper-param set)
env = NavigationTask()


def runTests(lh,
             eta,
             noiseLevel,
             ug,
             cnum,
             temp=None,
             distType=0,
             difficulty='Hard',
             tasks=None,
             verbose=False,
             extraVerbose=False):

    numRepeats = 5
Beispiel #11
0
def main():

    ####################################################
    useFFANN = True
    trainingFFANN = False # 1
    manualTest = False # 2
    autoTest = False # 3
    henaffHyperSearch = False # 4
    runHenaffFFANN = False # 5
    ####################################################

    print(sys.argv)

    if len(sys.argv) > 1:
        if sys.argv[1] == '1': trainingFFANN = True
        if sys.argv[1] == '2': manualTest = True
        if sys.argv[1] == '3': autoTest = True
        if sys.argv[1] == '4': henaffHyperSearch = True
        if sys.argv[1] == '5': runHenaffFFANN = True

    if useFFANN:

        f_model_name = 'forward-ffann-noisy-wan-1.pt' # 6 gets 99% on 0.1% noise
        exampleEnv = NavigationTask()
        f = ForwardModelFFANN(exampleEnv)

        ################################################################################################################
        if trainingFFANN:
            ############
            ts = "navigation-data-train-single-small.pickle"
            vs = "navigation-data-test-single-small.pickle"
            tsx_noisy = "noisier-actNoise-navigation-data-single.pickle"
            preload_name = f_model_name
            saveName = 'forward-ffann-noisy-wan-2.pt'
            ############
            print('Reading Data')
            with open(ts,'rb') as inFile:
                print('\tReading',ts); trainSet = pickle.load(inFile)
            with open(vs,'rb') as inFile:
                print('\tReading',vs); validSet = pickle.load(inFile)
            if not preload_name is None:
                print('Loading from',f_model_name)
                f.load_state_dict( torch.load(f_model_name) )
            f.train(trainSet,validSet,noisyDataSetTxLoc=tsx_noisy,f_model_name=saveName)
            print('Saving to',saveName)
            torch.save(f.state_dict(), saveName)

        ################################################################################################################
        elif manualTest:
            ###
            #f_model_name = 'forward-ffann-noisy6.pt'
            ###
            f.load_state_dict( torch.load(f_model_name) )
            start = np.zeros(74, dtype=np.float32)
            start[0+4] = 1
            start[15+6] = 1
            start[15+15+0] = 1
            start[15+15+4+8] = 1
            start[15+15+4+15+7] = 1
            start[15+15+4+15+15+4] = 1.0
            f.test(start)
            print('-----\n','Starting manualTest loop')
            for i in range(5):
                width, height = 15, 15
                p_0 = np.array([npr.randint(0,width),npr.randint(0,height)])
                start_pos = [p_0, r.choice(NavigationTask.oriens)]
                goal_pos = np.array([ npr.randint(0,width), npr.randint(0,height) ])
                checkEnv = NavigationTask(
                    width=width, height=height, agent_start_pos=start_pos, goal_pos=goal_pos,
                    track_history=True, stochasticity=0.0, maxSteps=10)
                s_0 = checkEnv.getStateRep()
                #a1, a2 = np.zeros(10), np.zeros(10)
                #a1[ npr.randint(0,10) ] = 1
                #a2[ npr.randint(0,10) ] = 1
                numActions = 3
                currState = avar( torch.FloatTensor(s_0).unsqueeze(0) )
                print('Start State')
                f.printState( currState[0] )
                actionSet = []
                for j in range(numActions):
                    action = np.zeros( 10 )
                    action[ npr.randint(0,10) ] = 1
                    action += npr.randn( 10 )*0.1
                    action = Utils.softmax( action )
                    print('\tSoft Noisy Action ',j,'=',action)
                    #### Apply Gumbel Softmax ####
                    temperature = 0.01
                    logProbAction = torch.log( avar(torch.FloatTensor(action)) ) 
                    actiong = GumbelSoftmax.gumbel_softmax(logProbAction, temperature)
                    ##############################
                    print('\tGumbel Action ',j,'=',actiong.data.numpy())
                    actionSet.append( actiong )
                    checkEnv.performAction( np.argmax(action) )
                    a = actiong  # avar( torch.FloatTensor(actiong) )
                    currState = f.forward( torch.cat([currState[0],a]).unsqueeze(0) )
                    print("Intermediate State",j)
                    f.printState( currState[0] )
                #checkEnv.performAction(np.argmax(a1))
                #checkEnv.performAction(np.argmax(a2))
                s_1 = checkEnv.getStateRep()
                #inval = np.concatenate( (s_0,a1) )
                #outval1 = f.forward( avar(torch.FloatTensor(inval).unsqueeze(0)) )
                #print(outval1.shape)
                #print(a2.shape)
                #inval2 = np.concatenate( (outval1[0].data.numpy(),a2) )
                #outval2 = f.forward( avar(torch.FloatTensor(inval2).unsqueeze(0)) )
                for action in actionSet:
                    f.printAction(action)
                print('Predicted')
                f.printState( currState[0] )
                print('Actual')
                s1 = avar( torch.FloatTensor( s_1 ).unsqueeze(0) )
                f.printState( s1[0] ) 
                print("Rough accuracy", torch.sum( (currState - s1).pow(2) ).data[0] )
                #print('Predicted',currState.data[0].numpy())
                #print('Actual',s_1)
                #outval1 = f.test(inval,s_1)
                print('----\n')
        if autoTest:
            print('Loading from',f_model_name)
            f.load_state_dict( torch.load(f_model_name) )
            # TODO

        ################################################################################################################
        if runHenaffFFANN:
            print('Loading from',f_model_name)
            f.load_state_dict( torch.load(f_model_name) )
            start = np.zeros(64)
            start[0] = 1
            start[15] = 1
            start[15+15] = 1
            start[15+15+4+0] = 1
            start[15+15+4+15+4] = 1
            print(f.env.deconcatenateOneHotStateVector(start))
            print('Building planner')
            planner = HenaffPlanner(f,maxNumActions=2)
            print('Starting generation')
            actions = planner.generatePlan(
                                start,
                                eta=0.1,
                                noiseSigma=0.5,
                                niters=500,
                                goal_state=None,
                                useCE=True,
                                verbose=True,
                                extraVerbose=False,
                                useGumbel=True,
                                temp=0.1,
                                lambda_h=-0.005,
                                useIntDistance=False
                                )
            print('FINAL ACTIONS:', actions)

        ################################################################################################################
        if henaffHyperSearch:
            print('Loading ',f_model_name)
            f.load_state_dict( torch.load(f_model_name) )
            
            ##################### Hyper-params #####################
            # lambda_hs = [0.0,0.01,-0.01,0.05,-0.05,0.005,-0.005]            # Entropy strength
            # etas = [0.5,0.25,0.1,0.05,0.025,0.01,0.005,0.001,0.0005]        # Learning rate
            # useGumbels = [True,False]                                       # Whether to use Gumbel-softmax
            # temperatures = [0.1,0.01,0.001,1.0]                             # Temperature for Gumbel-softmax
            # noiseSigmas = [0.0,0.01,0.02,0.05,0.1,0.25,0.5,0.75,1.0,1.25]   # Noise strength on input
            ## Init try
            # lambda_hs = [0.0,0.005,-0.005]                                  # Entropy strength
            # etas = [0.5,0.25,0.1,0.05,0.025,0.01,0.005,0.001,0.0005]        # Learning rate
            # useGumbels = [True,False]                                       # Whether to use Gumbel-softmax
            # temperatures = [0.1,0.01,0.001]                             # Temperature for Gumbel-softmax
            # noiseSigmas = [0.0,0.05,0.1,0.5,1.0]   # Noise strength on input
            ## Only use ones with decent results
            lambda_hs = [0.0,-0.005]                                  # Entropy strength
            etas = [0.5,0.25,0.1,0.005]        # Learning rate
            useGumbels = [True,False]                                       # Whether to use Gumbel-softmax
            temperatures = [0.1,0.001]                             # Temperature for Gumbel-softmax
            noiseSigmas = [0.5,1.0]   # Noise strength on input
            ########################################################

            ###### Settings ######
            niters = 75
            verbose = False
            extraVerbose = False
            numRepeats = 10
            fileToWriteTo = 'hyper-param-results.txt' # Set to None to do no writing
            distType = 1 # 0 = MSE, 1 = CE, 2 = dist
            ######################

            # Build an env with the given INT inputs
            def generateTask(px,py,orien,gx,gy):
                direction = NavigationTask.oriens[orien]
                gs = np.array([gx, gy])
                env = NavigationTask(agent_start_pos=[np.array([px,py]), direction],goal_pos=gs)
                return env

            # Function for running a single suite of tests (on one hyper-param set)
            def runTests(lh,eta,noiseLevel,ug,cnum,temp=None,distType=0):
                # Define tasks
                tasks = [
                    [1, generateTask(0,0,0,0,2)],
                    [1, generateTask(5,5,1,8,5)],
                    [1, generateTask(3,2,2,3,0)],
                    [1, generateTask(9,9,3,7,9)],
                    [2, generateTask(0,0,0,0,6)],
                    [2, generateTask(0,0,0,0,8)],
                    [2, generateTask(2,3,0,2,8)],
                    [2, generateTask(0,0,0,0,10)],
                    [3, generateTask(1,1,0,2,2)]
                ]
                # Choose dist type
                if distType == 0:   useCE = False; intDist = False
                elif distType == 1: useCE = True;  intDist = False
                elif distType == 2: useCE = False; intDist = True 
                # Display status
                wstring = cnum + ',lambda_h=' + str(lh) + ',eta=' + str(eta) + ',sigma=' + str(noiseLevel) + ',dType=' + str(distType) + ',ug=' + str(ug)
                if ug: wstring += ',temp=' + str(temp) 
                # For each tasks, repeated a few times, attempt to solve the problem
                score, tot = 0, 0
                for i, task in enumerate(tasks):
                    #print(i)
                    for _ in range(numRepeats):
                        planner = HenaffPlanner(f,maxNumActions=task[0])
                        cenv = task[1]
                        actions = planner.generatePlan(
                                cenv.getStateRep(oneHotOutput=True),
                                eta=eta,
                                noiseSigma=noiseLevel,
                                niters=niters,
                                goal_state=None,
                                useCE=True,
                                verbose=verbose,
                                extraVerbose=extraVerbose,
                                useGumbel=ug,
                                temp=temp,
                                lambda_h=lh,
                                useIntDistance=intDist )
                        # Check for correctness
                        for a in actions: cenv.performAction( a )
                        r = cenv.getReward()
                        correct = (r==1)
                        tot += 1
                        if correct: score += 1
                wstring += ' -> Score:' + str(score) + '/' + str(tot)
                print(wstring)
                # Write output
                if not fileToWriteTo is None:
                    with open(fileToWriteTo,'a') as filehandle:
                        filehandle.write( wstring + '\n' )

            # Run tasks over all hyper-parameter settings
            N_p, cp = len(lambda_hs)*len(etas)*len(noiseSigmas)*(1 + len(temperatures)), 1
            for lambda_h in lambda_hs:
                for eta in etas:
                    for noiseLevel in noiseSigmas:
                        for ug in useGumbels:
                            if ug:
                                for temp in temperatures: 
                                    ps = str(cp) + '/' + str(N_p)
                                    runTests(lambda_h,eta,noiseLevel,ug,ps,temp,distType=distType)
                                    cp += 1
                            else: 
                                ps = str(cp) + '/' + str(N_p)
                                runTests(lambda_h,eta,noiseLevel,ug,ps,distType=distType)
                                cp += 1
Beispiel #12
0
def main():
    ####################################################
    trainingLSTM = False
    overwrite = False
    runHenaff = False
    testFM = False
    ###
    useFFANN = True
    trainingFFANN = False
    ####################################################
    if useFFANN:
        f_model_name = 'forward-ffann-stochastic.pt'
        exampleEnv = NavigationTask()
        f = ForwardModelFFANN(exampleEnv)
        if trainingFFANN:
            ts = "navigation-data-train-single-small.pickle"
            vs = "navigation-data-test-single-small.pickle"
            print('Reading Data')
            with open(ts, 'rb') as inFile:
                print('\tReading', ts)
                trainSet = pickle.load(inFile)
            with open(vs, 'rb') as inFile:
                print('\tReading', vs)
                validSet = pickle.load(inFile)

            f.train(trainSet, validSet)
            print('Saving to', f_model_name)
            torch.save(f.state_dict(), f_model_name)
        else:
            f.load_state_dict(torch.load(f_model_name))
            start = np.zeros(74, dtype=np.float32)
            start[0 + 4] = 1
            start[15 + 6] = 1
            start[15 + 15 + 0] = 1
            start[15 + 15 + 4 + 8] = 1
            start[15 + 15 + 4 + 15 + 7] = 1
            start[15 + 15 + 4 + 15 + 15 + 4] = 1.0

            f.test(start)

            for i in range(10):
                width, height = 15, 15
                p_0 = np.array([npr.randint(0, width), npr.randint(0, height)])
                start_pos = [p_0, r.choice(NavigationTask.oriens)]
                goal_pos = np.array(
                    [npr.randint(0, width),
                     npr.randint(0, height)])
                checkEnv = NavigationTask(width=width,
                                          height=height,
                                          agent_start_pos=start_pos,
                                          goal_pos=goal_pos,
                                          track_history=True,
                                          stochasticity=0.0,
                                          maxSteps=10)
                s_0 = checkEnv.getStateRep()
                a = np.zeros(10)
                a[npr.randint(0, 10)] = 1
                inval = np.concatenate((s_0, a))
                checkEnv.performAction(np.argmax(a))
                s_1 = checkEnv.getStateRep()
                f.test(inval, s_1)
                print('----')

    else:
        f_model_name = 'forward-lstm-stochastic.pt'
        s = 'navigation'  # 'transport'
        trainf, validf = s + "-data-train-small.pickle", s + "-data-test-small.pickle"
        print('Reading Data')
        train, valid = SeqData(trainf), SeqData(validf)
        f = ForwardModelLSTM(train.lenOfInput, train.lenOfState)
        if trainingLSTM:
            if os.path.exists(f_model_name) and not overwrite:
                print('Loading from', f_model_name)
                f.load_state_dict(torch.load(f_model_name))
            else:
                f.train(train, valid)
                print('Saving to', f_model_name)
                torch.save(f.state_dict(), f_model_name)
            print('Q-test')
            bdata, blabels, _ = valid.next(2000, nopad=True)
            acc1, _ = f._accuracyBatch(bdata, blabels, valid.env)
            print(acc1)
        if runHenaff:
            print('Loading from', f_model_name)
            f.load_state_dict(torch.load(f_model_name))
            #        seq,label = train.randomTrainingPair()
            #        start = seq[0][0:64]
            #       start[63] = 0
            #       start[63-15] = 0
            #       start[15+15+4+5] = 1
            #       start[15+15+4+15+5] = 1
            #       start
            start = np.zeros(64)
            start[0] = 1
            start[15] = 1
            start[15 + 15] = 1
            start[15 + 15 + 4 + 0] = 1
            start[15 + 15 + 4 + 15 + 2] = 1
            print(train.env.deconcatenateOneHotStateVector(start))
            #sys.exit(0)
            print('Building planner')
            planner = HenaffPlanner(f)
            print('Starting generation')
            planner.generatePlan(start, train.env, niters=150)
        if testFM:
            f.load_state_dict(torch.load(f_model_name))
            start = np.zeros(64)
            start[0 + 2] = 1
            start[15 + 3] = 1
            start[15 + 15 + 0] = 1
            start[15 + 15 + 4 + 5] = 1
            start[15 + 15 + 4 + 15 + 5] = 1
            action = np.zeros(10)
            deconRes = train.env.deconcatenateOneHotStateVector(start)
            print('Start state')
            print('px', np.argmax(deconRes[0]))
            print('py', np.argmax(deconRes[1]))
            print('orien', np.argmax(deconRes[2]))
            print('gx', np.argmax(deconRes[3]))
            print('gy', np.argmax(deconRes[4]))
            action[5] = 1.0
            stateAction = [
                torch.cat([(torch.FloatTensor(start)),
                           (torch.FloatTensor(action))])
            ]
            #print('SA:',stateAction)
            #print('Start State')
            #printState( stateAction[0][0:-10], train.env )
            print('Action', NavigationTask.actions[np.argmax(action)])
            f.reInitialize()
            seq = avar(torch.cat(stateAction).view(
                len(stateAction), 1, -1))  # [seqlen x batchlen x hidden_size]
            result = f.forward(seq)
            print('PredState')
            printState(result, train.env)
Beispiel #13
0
def main():

    ####### Settings #######
    preloadModel = True
    runTraining = True  # Task 1
    testHenaff = False  # Task 2
    testFM = False  # Task 3
    henaffHyperSearch = False  # Task 4
    ########################

    ############################ External Files ############################
    ts = "navigation-data-train-sequence-singularDiscrete.pickle"
    vs = "navigation-data-test-sequence-singularDiscrete.pickle"
    f_model_name_to_preload = 'forward-lstm-singDisc-scratch-pt0d5-3.pt'
    f_model_name_to_save = 'forward-lstm-singDisc-scratch-pt0d5-4.pt'  # For training
    ########################################################################

    # Define shell environment and empty forward model
    exampleEnv = NavigationTask()
    f = ForwardModelLSTM_SD(exampleEnv)

    # Preload the forward model, if wanted
    if preloadModel and not f_model_name_to_preload is None:
        f.load_state_dict(torch.load(f_model_name_to_preload))

    # Run training if desired
    if runTraining:
        trainSeqs = SingDiscSeqData(ts, exampleEnv)
        validSeqs = SingDiscSeqData(vs, exampleEnv)
        print('Saving to', f_model_name_to_save)
        f.runTraining(trainSeqs,
                      validSeqs,
                      modelFilenameToSave=f_model_name_to_save)

    if testFM:
        # Start Location
        start_px = 1
        start_py = 1
        start_orien = 0
        start_state = exampleEnv.singularDiscreteStateFromInts(
            start_px, start_py, start_orien)
        # Actions
        actions = np.zeros((5, 10))
        actions[0, 8] = 1.0
        actions[1, 7] = 1.0
        actions[2, 2] = 1.0
        actions[3, 7] = 1.0
        actions[4, 8] = 1.0
        actions = avar(torch.FloatTensor(actions))
        #print(actions)
        #print(actions[0])
        # Get Prediction
        start_state = avar(torch.FloatTensor(start_state))
        outputs, hidden = f.runOnActionSequence(start_state.unsqueeze(0),
                                                actions,
                                                hidden=None)
        finalOutput = outputs[-1]
        print(finalOutput.max())
        print('Pred final state',
              f.env.singularDiscreteStateToInts(finalOutput.data.numpy()[0]))

    #
    if testHenaff:
        print('Environment states')
        start_px = 0
        start_py = 0
        start_orien = 0
        start_state = exampleEnv.singularDiscreteStateFromInts(
            start_px, start_py, start_orien)
        goal_state = [0, 2]
        maxNumActions = 2
        print('Building planner')
        planner = HenaffPlanner(f, maxNumActions=maxNumActions)
        print('Starting generation')
        actions = planner.generatePlan(
            start_state,  # The starting state of the agent (one-hot singDisc)
            goal_state,  # The goal state of the agent as two ints (e.g. [gx,gy])
            eta=0.01,  # The learning rate given to ADAM
            noiseSigma=
            0.25,  # Noise strength on inputs. Overwrites the default setting from the init
            niters=
            100,  # Number of optimization iterations. Overwrites the default setting from the init
            useGumbel=
            True,  # Whether to use Gumbel-Softmax in the action sampling
            temp=1.0,  # The temperature of the Gumbel-Softmax method
            lambda_h=
            0.0  # Specify the strength of entropy regularization (negative values encourage entropy)
        )
        print('START STATE:', start_px, start_py, start_orien)
        print(
            'FINAL ACTIONS:', ", ".join([
                str(a) + ' (' + NavigationTask.actions[a] + ')'
                for a in actions
            ]))
        print('GOAL STATE:', goal_state)
        newEnv = NavigationTask(agent_start_pos=[
            np.array([start_px, start_py]), NavigationTask.oriens[start_orien]
        ],
                                goal_pos=np.array(goal_state))
        for action in actions:
            newEnv.performAction(action)
        state = newEnv.getStateRep(oneHotOutput=False)
        pred_x = state[0]
        pred_y = state[1]
        pred_orien = NavigationTask.oriens[np.argmax(state[2:6])]
        print('PREDICTED FINAL STATE:', pred_x, pred_y, pred_orien)

    if henaffHyperSearch:
        print('Loading ', f_model_name)
        f.load_state_dict(torch.load(f_model_name))

        ##################### Hyper-params #####################
        lambda_hs = [0.0, -0.005]  # Entropy strength
        etas = [0.5, 0.25, 0.1, 0.005]  # Learning rate
        useGumbels = [True, False]  # Whether to use Gumbel-softmax
        temperatures = [0.1, 0.001]  # Temperature for Gumbel-softmax
        noiseSigmas = [0.5, 1.0]  # Noise strength on input
        ########################################################

        ###### Settings ######
        niters = 75
        verbose = False
        extraVerbose = False
        numRepeats = 10
        fileToWriteTo = 'hyper-param-results.txt'  # Set to None to do no writing
        distType = 1  # 0 = MSE, 1 = CE, 2 = dist

        ######################

        # Build an env with the given INT inputs
        def generateTask(px, py, orien, gx, gy):
            direction = NavigationTask.oriens[orien]
            gs = np.array([gx, gy])
            env = NavigationTask(
                agent_start_pos=[np.array([px, py]), direction], goal_pos=gs)
            return env

        # Function for running a single suite of tests (on one hyper-param set)
        def runTests(lh, eta, noiseLevel, ug, cnum, temp=None, distType=0):
            # Define tasks
            tasks = [[1, generateTask(0, 0, 0, 0, 2)],
                     [1, generateTask(5, 5, 1, 8, 5)],
                     [1, generateTask(3, 2, 2, 3, 0)],
                     [1, generateTask(9, 9, 3, 7, 9)],
                     [2, generateTask(0, 0, 0, 0, 6)],
                     [2, generateTask(0, 0, 0, 0, 8)],
                     [2, generateTask(2, 3, 0, 2, 8)],
                     [2, generateTask(0, 0, 0, 0, 10)],
                     [3, generateTask(1, 1, 0, 2, 2)]]
            # Choose dist type
            if distType == 0:
                useCE = False
                intDist = False
            elif distType == 1:
                useCE = True
                intDist = False
            elif distType == 2:
                useCE = False
                intDist = True
            # Display status
            wstring = cnum + ',lambda_h=' + str(lh) + ',eta=' + str(
                eta) + ',sigma=' + str(noiseLevel) + ',dType=' + str(
                    distType) + ',ug=' + str(ug)
            if ug: wstring += ',temp=' + str(temp)
            # For each tasks, repeated a few times, attempt to solve the problem
            score, tot = 0, 0
            for i, task in enumerate(tasks):
                #print(i)
                for _ in range(numRepeats):
                    planner = HenaffPlanner(f, maxNumActions=task[0])
                    cenv = task[1]
                    actions = planner.generatePlan(
                        cenv.getStateRep(oneHotOutput=True),
                        eta=eta,
                        noiseSigma=noiseLevel,
                        niters=niters,
                        goal_state=None,
                        useCE=True,
                        verbose=verbose,
                        extraVerbose=extraVerbose,
                        useGumbel=ug,
                        temp=temp,
                        lambda_h=lh,
                        useIntDistance=intDist)
                    # Check for correctness
                    for a in actions:
                        cenv.performAction(a)
                    r = cenv.getReward()
                    correct = (r == 1)
                    tot += 1
                    if correct: score += 1
            wstring += ' -> Score:' + str(score) + '/' + str(tot)
            print(wstring)
            # Write output
            if not fileToWriteTo is None:
                with open(fileToWriteTo, 'a') as filehandle:
                    filehandle.write(wstring + '\n')

        # Run tasks over all hyper-parameter settings
        N_p, cp = len(lambda_hs) * len(etas) * len(noiseSigmas) * (
            1 + len(temperatures)), 1
        for lambda_h in lambda_hs:
            for eta in etas:
                for noiseLevel in noiseSigmas:
                    for ug in useGumbels:
                        if ug:
                            for temp in temperatures:
                                ps = str(cp) + '/' + str(N_p)
                                runTests(lambda_h,
                                         eta,
                                         noiseLevel,
                                         ug,
                                         ps,
                                         temp,
                                         distType=distType)
                                cp += 1
                        else:
                            ps = str(cp) + '/' + str(N_p)
                            runTests(lambda_h,
                                     eta,
                                     noiseLevel,
                                     ug,
                                     ps,
                                     distType=distType)
                            cp += 1
Beispiel #14
0
def main():
    ####################################################
    trainingLSTM = False
    overwrite = False
    runHenaff = False
    testFM = False
    ###
    useFFANN = True
    trainingFFANN = False
    manualTest = False
    autoTest = False
    henaffHyperSearch = False
    runHenaffFFANN = True #True
    ####################################################
    if useFFANN:
        f_model_name = 'forward-ffann-noisy-wan-1.pt' # 6 gets 99% on 0.1% noise
        exampleEnv = NavigationTask()
        f = ForwardModelFFANN(exampleEnv)

        if trainingFFANN:
            ############
            ts = "navigation-data-train-single-small.pickle"
            vs = "navigation-data-test-single-small.pickle"
            tsx_noisy = "noisier-actNoise-navigation-data-single.pickle"
            preload_name = f_model_name
            saveName = 'forward-ffann-noisy-wan-2.pt'
            ############
            print('Reading Data')
            with open(ts,'rb') as inFile:
                print('\tReading',ts); trainSet = pickle.load(inFile)
            with open(vs,'rb') as inFile:
                print('\tReading',vs); validSet = pickle.load(inFile)
            if not preload_name is None:
                print('Loading from',f_model_name)
                f.load_state_dict( torch.load(f_model_name) )
            f.train(trainSet,validSet,noisyDataSetTxLoc=tsx_noisy,f_model_name=saveName)
            print('Saving to',saveName)
            torch.save(f.state_dict(), saveName)

        elif manualTest:
            def softmax(x):
                e_x = np.exp(x - np.max(x))
                return e_x / e_x.sum()
            ###
            #f_model_name = 'forward-ffann-noisy6.pt'
            ###
            f.load_state_dict( torch.load(f_model_name) )
            start = np.zeros(74, dtype=np.float32)
            start[0+4] = 1
            start[15+6] = 1
            start[15+15+0] = 1
            start[15+15+4+8] = 1
            start[15+15+4+15+7] = 1
            start[15+15+4+15+15+4] = 1.0
            f.test(start)
            print('-----\n','Starting manualTest loop')
            for i in range(5):
                width, height = 15, 15
                p_0 = np.array([npr.randint(0,width),npr.randint(0,height)])
                start_pos = [p_0, r.choice(NavigationTask.oriens)]
                goal_pos = np.array([ npr.randint(0,width), npr.randint(0,height) ])
                checkEnv = NavigationTask(
                    width=width, height=height, agent_start_pos=start_pos, goal_pos=goal_pos,
                    track_history=True, stochasticity=0.0, maxSteps=10)
                s_0 = checkEnv.getStateRep()
                #a1, a2 = np.zeros(10), np.zeros(10)
                #a1[ npr.randint(0,10) ] = 1
                #a2[ npr.randint(0,10) ] = 1
                numActions = 3
                currState = avar( torch.FloatTensor(s_0).unsqueeze(0) )
                print('Start State')
                f.printState( currState[0] )
                actionSet = []
                for j in range(numActions):
                    action = np.zeros( 10 )
                    action[ npr.randint(0,10) ] = 1
                    action += npr.randn( 10 )*0.1
                    action = softmax( action )
                    print('\tSoft Noisy Action ',j,'=',action)
                    #### Apply Gumbel Softmax ####
                    temperature = 0.01
                    logProbAction = torch.log( avar(torch.FloatTensor(action)) ) 
                    actiong = gumbel_softmax(logProbAction, temperature)
                    ##############################
                    print('\tGumbel Action ',j,'=',actiong.data.numpy())
                    actionSet.append( actiong )
                    checkEnv.performAction( np.argmax(action) )
                    a = actiong  # avar( torch.FloatTensor(actiong) )
                    currState = f.forward( torch.cat([currState[0],a]).unsqueeze(0) )
                    print("Intermediate State",j)
                    f.printState( currState[0] )
                #checkEnv.performAction(np.argmax(a1))
                #checkEnv.performAction(np.argmax(a2))
                s_1 = checkEnv.getStateRep()
                #inval = np.concatenate( (s_0,a1) )
                #outval1 = f.forward( avar(torch.FloatTensor(inval).unsqueeze(0)) )
                #print(outval1.shape)
                #print(a2.shape)
                #inval2 = np.concatenate( (outval1[0].data.numpy(),a2) )
                #outval2 = f.forward( avar(torch.FloatTensor(inval2).unsqueeze(0)) )
                for action in actionSet:
                    f.printAction(action)
                print('Predicted')
                f.printState( currState[0] )
                print('Actual')
                s1 = avar( torch.FloatTensor( s_1 ).unsqueeze(0) )
                f.printState( s1[0] ) 
                print("Rough accuracy", torch.sum( (currState - s1).pow(2) ).data[0] )
                #print('Predicted',currState.data[0].numpy())
                #print('Actual',s_1)
                #outval1 = f.test(inval,s_1)
                print('----\n')
        if autoTest:
            print('Loading from',f_model_name)
            f.load_state_dict( torch.load(f_model_name) )


        if runHenaffFFANN:
            print('Loading from',f_model_name)
            f.load_state_dict( torch.load(f_model_name) )
            start = np.zeros(64)
            start[0] = 1
            start[15] = 1
            start[15+15] = 1
            start[15+15+4+0] = 1
            start[15+15+4+15+4] = 1
            print(f.env.deconcatenateOneHotStateVector(start))
            #sys.exit(0)
            print('Building planner')
            planner = HenaffPlanner(f,maxNumActions=2)
            print('Starting generation')
            actions = planner.generatePlan(start,niters=100,extraVerbose=False)

        if henaffHyperSearch:
            print('Loading from',f_model_name)
            f.load_state_dict( torch.load(f_model_name) )
            ### Hyper-params ###
            lambda_h = 0.01  # Entropy strength
            eta = 0.5        # Learning rate
            ###
    else:
        f_model_name = 'forward-lstm-stochastic.pt'    
        s = 'navigation' # 'transport'
        trainf, validf = s + "-data-train-small.pickle", s + "-data-test-small.pickle"
        print('Reading Data')
        train, valid = SeqData(trainf), SeqData(validf)
        f = ForwardModelLSTM(train.lenOfInput,train.lenOfState)
        if trainingLSTM:
            if os.path.exists(f_model_name) and not overwrite:
                print('Loading from',f_model_name)
                f.load_state_dict( torch.load(f_model_name) )
            else:
                f.train(train,valid)
                print('Saving to',f_model_name)
                torch.save(f.state_dict(), f_model_name)
            print('Q-test')
            bdata, blabels, _ = valid.next(2000, nopad=True)
            acc1, _ = f._accuracyBatch(bdata,blabels,valid.env)
            print(acc1)
        if runHenaff:
            print('Loading from',f_model_name)
            f.load_state_dict( torch.load(f_model_name) )
    #        seq,label = train.randomTrainingPair()
    #        start = seq[0][0:64]
     #       start[63] = 0
     #       start[63-15] = 0
     #       start[15+15+4+5] = 1
     #       start[15+15+4+15+5] = 1
     #       start
            start = np.zeros(64)
            start[0] = 1
            start[15] = 1
            start[15+15] = 1
            start[15+15+4+0] = 1
            start[15+15+4+15+2] = 1
            print(train.env.deconcatenateOneHotStateVector(start))
            #sys.exit(0)
            print('Building planner')
            planner = HenaffPlanner(f)
            print('Starting generation')
            planner.generatePlan(start,train.env,niters=150)
        if testFM:
            f.load_state_dict( torch.load(f_model_name) )
            start = np.zeros(64)
            start[0+2] = 1
            start[15+3] = 1
            start[15+15+0] = 1
            start[15+15+4+5] = 1
            start[15+15+4+15+5] = 1
            action = np.zeros(10)
            deconRes = train.env.deconcatenateOneHotStateVector(start)
            print('Start state')
            print('px',    np.argmax(deconRes[0]) )
            print('py',    np.argmax(deconRes[1]) )
            print('orien', np.argmax(deconRes[2]) )
            print('gx',    np.argmax(deconRes[3]) )
            print('gy',    np.argmax(deconRes[4]) )
            action[5] = 1.0
            stateAction = [torch.cat([(torch.FloatTensor(start)), (torch.FloatTensor(action))])]
            #print('SA:',stateAction)
            #print('Start State')
            #printState( stateAction[0][0:-10], train.env )
            print('Action',NavigationTask.actions[np.argmax( action )])
            f.reInitialize()
            seq = avar(torch.cat(stateAction).view(len(stateAction), 1, -1)) # [seqlen x batchlen x hidden_size]
            result = f.forward(seq)
            print('PredState')
            printState( result, train.env )