if abs(cpg['jacobianTorques'][2, self.workerID]) < TORQUE_THRES: on_ground = 0.5 if not self.workerID: print(a_dist, a, pred_too_high, too_high, np.var(state)) return state, a, value, rnn_state, pred_too_high, too_high, pred_on_ground, on_ground if __name__ == "__main__": ## begin Tensorflow initialization: # initialize groupLock groups = [['main'], []] for i in range(N_WORKERS): workerName = 'W_%i' % i # worker name groups[1].append(workerName) groupLock = GroupLock.GroupLock(groups) groupLock.acquire(0, 'main') #lock here SESS = tf.Session() # Copy cpg0 as initial cpg value cpg = copy(cpg0) # initilize optimizers and workers with tf.device("/cpu:0"): trainer = tf.contrib.opt.NadamOptimizer(LR_AC, use_locking=True) #trainer = tf.train.AdadeltaOptimizer(LR_AC) GLOBAL_AC = ACNet( GLOBAL_NET_SCOPE) # Global net: we only need its parameters workers = [] #creat workers
n = 1 #counter of total number of agents (for naming) for ma in range(NUM_META_AGENTS): # num_agents=((ma%4)+1)*2 # print(num_agents) # num_workers=num_agents num_agents = NUM_THREADS gameEnv = mapf_gym.MAPFEnv(num_agents=num_agents, DIAGONAL_MOVEMENT=DIAG_MVMT, SIZE=GRID_SIZE, PROB=OBSTACLE_DENSITY, FULL_HELP=FULL_HELP) gameEnvs.append(gameEnv) # Create groupLock workerNames = ["worker_" + str(i) for i in range(n, n + num_workers)] groupLock = GroupLock.GroupLock([workerNames, workerNames]) groupLocks.append(groupLock) # Create worker classes workersTmp = [] for i in range(ma * num_workers + 1, (ma + 1) * num_workers + 1): workersTmp.append(Worker(gameEnv, ma, n, a_size, groupLock)) n += 1 workers.append(workersTmp) global_summary = tf.summary.FileWriter(train_path) saver = tf.train.Saver(max_to_keep=2) with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator()
def multiThreadedJob(self, episodeNumber): workers = [] worker_threads = [] workerNames = ["worker_" + str(i + 1) for i in range(NUM_THREADS)] groupLock = GroupLock.GroupLock([workerNames, workerNames]) # TODO workersPerMetaAgent = NUM_THREADS for a in range(NUM_THREADS): agentID = a + 1 workers.append( Worker(self.metaAgentID, agentID, workersPerMetaAgent, self.env, self.localNetwork, self.sess, groupLock, learningAgent=True, global_step=self.global_step)) for w in workers: groupLock.acquire(0, w.name) worker_work = lambda: w.work(episodeNumber, self.coord, self.saver, self.weightVars) t = threading.Thread(target=(worker_work)) t.start() worker_threads.append(t) self.coord.join(worker_threads) jobResults = [] loss_metrics = [] perf_metrics = [] is_imitation = None for w in workers: if w.learningAgent: if JOB_TYPE == JOB_OPTIONS.getGradient: jobResults = jobResults + w.allGradients elif JOB_TYPE == JOB_OPTIONS.getExperience: jobResults.append(w.experienceBuffer) is_imitation = False # w.is_imitation loss_metrics.append(w.loss_metrics) perf_metrics.append(w.perf_metrics) avg_loss_metrics = list(np.mean(np.array(loss_metrics), axis=0)) if not is_imitation: # perf_metrics structure: # # w.perf_metrics = [ # episode_step_count, # episode_values, # episode_inv_count, # episode_stop_count, # episode_reward, # targets_done # ] perf_metrics = np.array(perf_metrics) avg_perf_metrics = np.mean(perf_metrics[:, :4], axis=0) episode_reward = np.sum(perf_metrics[:, 4]) targets_done = np.sum(perf_metrics[:, 5]) avg_perf_metrics = list(avg_perf_metrics) + [ episode_reward, targets_done ] all_metrics = avg_loss_metrics + avg_perf_metrics else: all_metrics = avg_loss_metrics return jobResults, all_metrics, is_imitation