예제 #1
0
    def popBackUntil(self, pop_back_until):
        if DEBUG_MODE and STACK_PRINTING:
            dump_incremental(
                "StackBEFORE" + str(self.t) + str(self.current_feature),
                self.Stack)
        SSAimplementor.popBackUntil(self, pop_back_until)
        self.update_pointers()

        if DEBUG_MODE and STACK_PRINTING:
            dump_incremental("Stack" + str(self.t) + str(self.current_feature),
                             self.Stack)
예제 #2
0
        agent.num_episodes = 0
        agent.learner.printDevelopmentAtari(frames=0)
        agent.index = 0
    starttime = time.time()

    #print(agent.learner.__dict__)

    for i in range(agent.index,len(envs)):
        env=envs[i]
        print("starting mountain car environment")
        if not interrupted:
            agent.taskblock_t=0
            agent.learner.new_task([i])
        for item in env.__dict__.items():
            print(item)
        while agent.taskblock_t<FRAMES_PER_TASK:
            print("starting new episode at taskblock_t: ", agent.taskblock_t)
            consumed_steps=perform_episode(args.VISUAL, env, agent, args.run*100000+agent.num_episodes, agent.total_t)
            agent.taskblock_t+=consumed_steps
            agent.total_t+=consumed_steps # need to add because primitive data types not passed by reference
            agent.num_episodes+=1
            agent.learner.printDevelopmentAtari(frames=agent.total_t)
            walltime_consumed = time.time() - starttime
            if walltime_consumed >= 0.9*walltime:
                break
        agent.learner.end_task()

    agent.learner.save(filename)
    dump_incremental(filename + "_agent", agent)
    print("stopping at time ", walltime_consumed)
    exit(0)
예제 #3
0
 def save_stats(self, filename):
     if self.stats is not None:
         dump_incremental(filename + '_stats_object', self.stats)
예제 #4
0
        )  # network has full action set, but only uses minimal for each task
        agent.total_t = 0
        agent.num_episodes = 0
        agent.learner.printDevelopmentAtari(frames=0)
        agent.index = 0
    starttime = time.time()

    if args.experiment_type == "print_diversity":
        data = random_data()
        output_div = []
        performance_diversities = []
        div = agent.learner.get_output_diversity(data, metric_type="totalvar")
        print("div = " + str(div))
        output_div.append(div)
        performance_diversities.append(None)
        dump_incremental(filename + "_outputdiversity_totalvar",
                         (output_div, performance_diversities))
        exit(0)
    elif args.experiment_type == "randomBaseline":
        iterations = 10
        randomBaselines = {}
        total_episodes = 0
        total_t = 0
        for i in indices:
            terminal_file = [
                open("terminal_x_file" + str(i) + ".txt", "w+"),
                open("terminal_theta_file" + str(i) + ".txt", "w+")
            ]
            performances = []
            j = indices[i]
            env = envs[j]
            print("task ", j)