if abs(cpg['jacobianTorques'][2, self.workerID]) < TORQUE_THRES:
            on_ground = 0.5

        if not self.workerID:
            print(a_dist, a, pred_too_high, too_high, np.var(state))
        return state, a, value, rnn_state, pred_too_high, too_high, pred_on_ground, on_ground

if __name__ == "__main__":
    ## begin Tensorflow initialization:

    # initialize groupLock
    groups = [['main'], []]
    for i in range(N_WORKERS):
        workerName = 'W_%i' % i  # worker name
        groups[1].append(workerName)
    groupLock = GroupLock.GroupLock(groups)

    groupLock.acquire(0, 'main')  #lock here

    SESS = tf.Session()

    # Copy cpg0 as initial cpg value
    cpg = copy(cpg0)

    # initilize optimizers and workers
    with tf.device("/cpu:0"):
        trainer = tf.contrib.opt.NadamOptimizer(LR_AC, use_locking=True)
        #trainer = tf.train.AdadeltaOptimizer(LR_AC)
        GLOBAL_AC = ACNet(
            GLOBAL_NET_SCOPE)  # Global net: we only need its parameters
        workers = []  #creat workers
Exemple #2
0
    n = 1  #counter of total number of agents (for naming)
    for ma in range(NUM_META_AGENTS):
        #         num_agents=((ma%4)+1)*2
        #         print(num_agents)
        #         num_workers=num_agents
        num_agents = NUM_THREADS
        gameEnv = mapf_gym.MAPFEnv(num_agents=num_agents,
                                   DIAGONAL_MOVEMENT=DIAG_MVMT,
                                   SIZE=GRID_SIZE,
                                   PROB=OBSTACLE_DENSITY,
                                   FULL_HELP=FULL_HELP)
        gameEnvs.append(gameEnv)

        # Create groupLock
        workerNames = ["worker_" + str(i) for i in range(n, n + num_workers)]
        groupLock = GroupLock.GroupLock([workerNames, workerNames])
        groupLocks.append(groupLock)

        # Create worker classes
        workersTmp = []
        for i in range(ma * num_workers + 1, (ma + 1) * num_workers + 1):
            workersTmp.append(Worker(gameEnv, ma, n, a_size, groupLock))
            n += 1
        workers.append(workersTmp)

    global_summary = tf.summary.FileWriter(train_path)
    saver = tf.train.Saver(max_to_keep=2)

    with tf.Session(config=config) as sess:
        sess.run(tf.global_variables_initializer())
        coord = tf.train.Coordinator()
Exemple #3
0
    def multiThreadedJob(self, episodeNumber):
        workers = []
        worker_threads = []
        workerNames = ["worker_" + str(i + 1) for i in range(NUM_THREADS)]
        groupLock = GroupLock.GroupLock([workerNames, workerNames])  # TODO

        workersPerMetaAgent = NUM_THREADS

        for a in range(NUM_THREADS):
            agentID = a + 1

            workers.append(
                Worker(self.metaAgentID,
                       agentID,
                       workersPerMetaAgent,
                       self.env,
                       self.localNetwork,
                       self.sess,
                       groupLock,
                       learningAgent=True,
                       global_step=self.global_step))

        for w in workers:
            groupLock.acquire(0, w.name)
            worker_work = lambda: w.work(episodeNumber, self.coord, self.saver,
                                         self.weightVars)
            t = threading.Thread(target=(worker_work))
            t.start()

            worker_threads.append(t)

        self.coord.join(worker_threads)

        jobResults = []
        loss_metrics = []
        perf_metrics = []
        is_imitation = None
        for w in workers:
            if w.learningAgent:
                if JOB_TYPE == JOB_OPTIONS.getGradient:
                    jobResults = jobResults + w.allGradients
                elif JOB_TYPE == JOB_OPTIONS.getExperience:
                    jobResults.append(w.experienceBuffer)

            is_imitation = False  # w.is_imitation

            loss_metrics.append(w.loss_metrics)
            perf_metrics.append(w.perf_metrics)

        avg_loss_metrics = list(np.mean(np.array(loss_metrics), axis=0))

        if not is_imitation:
            # perf_metrics structure:
            #
            # w.perf_metrics = [
            #    episode_step_count,
            #    episode_values,
            #    episode_inv_count,
            #    episode_stop_count,
            #    episode_reward,
            #    targets_done
            # ]

            perf_metrics = np.array(perf_metrics)
            avg_perf_metrics = np.mean(perf_metrics[:, :4], axis=0)
            episode_reward = np.sum(perf_metrics[:, 4])
            targets_done = np.sum(perf_metrics[:, 5])
            avg_perf_metrics = list(avg_perf_metrics) + [
                episode_reward, targets_done
            ]
            all_metrics = avg_loss_metrics + avg_perf_metrics
        else:
            all_metrics = avg_loss_metrics

        return jobResults, all_metrics, is_imitation