def main(discount, epochs, learning_rate): """ Run maximum entropy inverse reinforcement learning on the gridworld MDP. discount: MDP discount factor. float. epochs: Gradient descent iterations. int. learning_rate: Gradient descent learning rate. float. """ try: starttime = datetime.datetime.now() path = "/home/ubuntu/Data/KDDI/#201111.CDR-data/vks2564k/slot/" id_traj = load.load_directory_trajectory(path) print(len(id_traj)) trajectories = id_traj.values() g = load.load_graph_traj(trajectories) g.set_start("53397561") gw = gridworld.Gridworld(g, discount) feature_matrix = gw.feature_matrix(g) if not os.path.exists(path + "param/"): os.mkdir(path + "param/") maxent.t_irl(g, feature_matrix, trajectories, epochs, learning_rate, path + "param/") endtime = datetime.datetime.now() print("finished reading files with time of" + str(endtime - starttime)) except Exception: print("mian class wrong") raise
def main(epochs, learning_rate, discount, number): """ discount: MDP discount factor. float. epochs: Gradient descent iterations. int. learning_rate: Gradient descent learning rate. float. """ try: starttime = datetime.datetime.now() mesh_list = read_list("/home/ubuntu/Data/Tokyo/MeshCode/Tokyo.csv", number) print(len(mesh_list)) print(mesh_list) for mesh_id in mesh_list: # if not os.path.exists("/home/ubuntu/Data/PT_Result/commuter/test_sim/" + mesh_id + "/"): # os.mkdir("/home/ubuntu/Data/PT_Result/commuter/test_sim/" + mesh_id + "/") # # if not os.path.exists("/home/ubuntu/Data/PT_Result/commuter/test_param/" + mesh_id + "/"): # os.mkdir("/home/ubuntu/Data/PT_Result/commuter/test_param/" + mesh_id + "/") if os.path.exists("/home/ubuntu/Data/pflow_data/pflow-csv/" + mesh_id + "/train_irl.csv"): id_traj = load.load_trajectory( "/home/ubuntu/Data/pflow_data/pflow-csv/" + mesh_id + "/train_irl.csv") # parameter set numbers if len(id_traj) > 200: # for i in range(len(id_traj)/50): trajectories = random.sample(id_traj.values(), 200) g = load.load_graph_traj(trajectories) g.set_start(mesh_id) print(g.get_start()) gw = gridworld.Gridworld(g, discount, "") feature_matrix = gw.feature_matrix(g) # train# maxent.t_irl( g, feature_matrix, trajectories, epochs, learning_rate, "/home/ubuntu/Data/PT_Result/param_15/" + mesh_id + "_" + str(1) + "_") fo = open("/home/ubuntu//Data/PT_Result/finished_mesh.csv", "a") fo.write(mesh_id + "/n") fo.close() endtime = datetime.datetime.now() print("finished reading files with time of" + str(endtime - starttime)) except Exception: print("mian class wrong") raise
def main(discount, epochs, learning_rate, target): """ Run maximum entropy inverse reinforcement learning on the gridworld MDP. discount: MDP discount factor. float. epochs: Gradient descent iterations. int. learning_rate: Gradient descent learning rate. float. """ try: starttime = datetime.datetime.now() path = "/home/ubuntu/Data/PT_Result/" + target + "/" if not os.path.exists(path + "sim/"): os.mkdir(path + "sim/") if not os.path.exists(path + "param/"): os.mkdir(path + "param/") if os.path.exists(path + "training/"): id_traj = load.load_trajectory( "/home/ubuntu/Data/PT_Result/commuter/training/PT_commuter_irl_revised.csv" ) # parameter set numbers for i in range(10000): trajectories = random.sample(id_traj.values(), 200) print trajectories g = load.load_graph_traj(trajectories) gw = gridworld.Gridworld(g, discount) feature_matrix = gw.feature_matrix(g) # train# print("training ", path) maxent.t_irl(g, feature_matrix, trajectories, epochs, learning_rate, path + "param/" + str(i)) endtime = datetime.datetime.now() print("finished reading files with time of" + str(endtime - starttime)) except Exception: print("mian class wrong") raise
def main(date, discount, epochs, learning_rate, train=True): """ Run maximum entropy inverse reinforcement learning on the gridworld MDP. discount: MDP discount factor. float. epochs: Gradient descent iterations. int. learning_rate: Gradient descent learning rate. float. """ try: starttime = datetime.datetime.now() path = "D:/ClosePFLOW/53393575/" if not os.path.exists(path + "sim/"): os.mkdir(path + "sim/") if not os.path.exists(path + "param/"): os.mkdir(path + "param/") tools.move_files(path) if os.path.exists(path + "training/"): id_traj = load.load_directory_trajectory(path + "training/") # parameter set numbers for i in range(26): trajectories = random.sample(id_traj.values(), 50) g = load.load_graph_traj(trajectories) gw = gridworld.Gridworld(g, discount) feature_matrix = gw.feature_matrix(g) # train# print "training ", path maxent.t_irl(g, feature_matrix, trajectories, epochs, learning_rate, path + "param/" + str(i)) endtime = datetime.datetime.now() print "finished reading files with time of" + str(endtime - starttime) except Exception: print "mian class wrong" raise
def main(): root = "/home/ubuntu/Data/pflow_data/pflow-csv/" mesh_list = read_list("/home/ubuntu/Data/Tokyo/MeshCode/Tokyo.csv") list_dirs = os.walk(root) count = 0 print mesh_list for root, dirs, files in list_dirs: with open("/home/ubuntu/Data/PT_Result/exp1/result.csv", "w") as f: for d in dirs: if d in mesh_list: file_list = os.listdir(os.path.join(root, d)) if len(file_list) > 100 and "train_irl.csv" in file_list: count += 1 id_traj = load.load_trajectory(os.path.join(root, d) + "/train_irl.csv") train, validation = train_test_split(id_traj.values(), test_size=0.4) g = load.load_graph_traj(train) gw = gridworld.Gridworld(g, 0.9) feature_matrix = gw.feature_matrix(g) path = "/home/ubuntu/Data/PT_Result/exp1/" # train if not os.path.exists(path + "parameter/" + d + "param.csv"): maxent.t_irl(g, feature_matrix, train, 200, 0.2, path + "parameter/" + d) # simulation t_alpha = read_param(path + "parameter/" + os.listdir(path+"parameter/")[0]) r = dict() for t in range(12, 48): r[t] = dict().fromkeys(g.get_edges(), 0) for edge in g.get_edges(): for t in range(12, 48): if t in t_alpha.keys(): r[t][edge] = feature_matrix[edge].dot(t_alpha[t]) if not os.path.exists(path + "sim/" + d + "/"): os.mkdir(path + "sim/" + d + "/") for i in range(80): tools.generate_temporal_traj(g, r, d, 0.5, path + "sim/" + d + "/", d + "_" + str(i)) # markov chain if not os.path.exists(path + "markov/" + d + "/"): os.mkdir(path + "markov/" + d + "/") for i in range(80): pairs = makepairs(train) cfd = nltk.ConditionalFreqDist(pairs) generate(cfd, str(i), path + "markov/" + d + "/" + str(i) + ".csv", d) # expansion validation expansion10_trajecotry = random.sample(train, int(len(train)*0.1)) diff_list = [] for validation_traj in validation: min_dist = sys.maxint for traj in expansion10_trajecotry: dist = traj_dist((traj, validation_traj)) if dist < min_dist: min_dist = dist diff_list.append(min_dist) expansion10_score = np.average(diff_list) expansion50_trajecotry = random.sample(train, int(len(train) * 0.5)) diff_list = [] for validation_traj in validation: min_dist = sys.maxint for traj in expansion50_trajecotry: dist = traj_dist((traj, validation_traj)) if dist < min_dist: min_dist = dist diff_list.append(min_dist) expansion50_score = np.average(diff_list) # validation markov_id_traj = load.load_directory_trajectory(path + "markov/" + d + "/") diff_list = [] print markov_id_traj.keys() for traj in validation: min_dist = sys.maxint for markov_id in markov_id_traj.keys(): dist = traj_dist((traj, markov_id_traj[markov_id])) if dist < min_dist: min_dist = dist diff_list.append(min_dist) markov_score = np.average(diff_list) sim_id_traj = load.load_directory_trajectory(path + "sim/" + d + "/") diff_list = [] for traj in validation: min_dist = sys.maxint for sim_id in sim_id_traj.keys(): dist = traj_dist((traj, sim_id_traj[sim_id])) if dist < min_dist: min_dist = dist if min_dist > 10: continue diff_list.append(min_dist) sim_score = np.average(diff_list) print d+","+str(sim_score)+","+str(markov_score)+","+str(expansion10_score)+","+str(expansion50_score) f.write(d+","+str(sim_score)+","+str(markov_score)+","+str(expansion10_score)+","+str(expansion50_score)) f.write("\n") if count > 80: f.close()
def main(date, discount, epochs, learning_rate, train=True): """ Run maximum entropy inverse reinforcement learning on the gridworld MDP. discount: MDP discount factor. float. epochs: Gradient descent iterations. int. learning_rate: Gradient descent learning rate. float. """ """ # this part is used for calculate uniform reward parameter id_trajectory = load.load_trajectory(10000) print tools.motion_model_policy(id_trajectory) for i in range(1000): graph_trajectories = tools.choose_trajectory(1000, id_trajectory) g = load.load_graph_traj(graph_trajectories) sample_trajectories = sample(graph_trajectories, 100) gw = gridworld.Gridworld(g, 0.9) feature_matrix = gw.feature_matrix(g) alpha = maxent.irl(g, feature_matrix, sample_trajectories, 40, 0.05) path = str("D:/Ubicomp/alpha" + str(i) + ".txt") numpy.savetxt(path, alpha) """ """ this part is usedfor temporal reward parameter training """ try: starttime = datetime.datetime.now() path = "D:/ClosePFLOW/" dirs = os.listdir(path) for dirname in dirs: directory = path + dirname + "/" print directory if not os.path.exists(directory + "sim/"): os.mkdir(directory + "sim/") tools.move_files(directory) if os.path.exists(directory + "training/"): id_traj = load.load_directory_trajectory(directory + "training/") if (len(id_traj) >= 40 and not os.path.exists(directory + "param.csv") ) or os.path.getsize(directory + "param.csv") > 2038: trajectories = id_traj.values() g = load.load_graph_traj(trajectories) gw = gridworld.Gridworld(g, discount) feature_matrix = gw.feature_matrix(g) # train# print "training ", directory maxent.t_irl(g, feature_matrix, trajectories, epochs, learning_rate, directory) indicator = 0 i = 0 while indicator <= 5000: sample_id = [] trajectories = [] for k in range(indicator, indicator + 100): sample_id.append(id_list[k]) for sid in sample_id: trajectories.append(id_traj.get(sid)) start_state = [] for traj in trajectories: start_state.append(traj[12][0]) training_data = "C:/Users/PangYanbo/Desktop/UbiResult/TrainingTrajectoriesGroup_" + str( i) + ".csv" with open(training_data, "wb") as f: for k in range(100): for j in range(12, 47): if j in trajectories[k].keys(): f.write( str(j) + ',' + trajectories[k][j][1].get_origin() + ',' + trajectories[k][j][1].get_destination() + ',' + trajectories[k][j][1].get_mode() + '\n') # initial environment based on trajectories g = load.load_graph_traj(trajectories) gw = gridworld.Gridworld(g, discount) feature_matrix = gw.feature_matrix(g) print g if train: # training the model maxent.t_irl(g, feature_matrix, trajectories, epochs, learning_rate, date) else: # simulation for start in start_state: # read alpha from saved file root = "C:/Users/PangYanbo/Desktop/UbiResult/param/" para_list = list( os.path.join(root, name) for name in os.listdir(root)) for filename in para_list: if os.path.isdir(filename): para_list.remove(filename) param_path = random.choice(para_list) agent_id = param_path[43:-4] print agent_id, param_path t_alpha = {} with open(param_path, 'r') as f: t = 12 for line in f: line = line.strip('\n') tokens = line.split(",") param = numpy.zeros(11) for j in range(11): if len(tokens) > j: param[j] = tokens[j] t_alpha[t] = param.copy() t += 1 r = dict() for t in range(12, 48): r[t] = dict().fromkeys(g.get_edges(), 0) for edge in g.get_edges(): for t in range(12, 48): if t in t_alpha.keys(): r[t][edge] = feature_matrix[edge].dot( t_alpha[t]) tools.generate_temporal_traj(g, r, start, 0.5, i, agent_id) i += 1 indicator += 50 endtime = datetime.datetime.now() print "finished reading files with time of" + str(endtime - starttime) except Exception: print "something wrong" raise
def main(mesh_id): """ discount: MDP discount factor. float. epochs: Gradient descent iterations. int. learning_rate: Gradient descent learning rate. float. """ discount = .9 epochs = 400 learning_rate = 3 try: starttime = datetime.datetime.now() if not os.path.exists("/home/ubuntu/Data/PT_Result/100expert_1agent/" + mesh_id + "/"): os.mkdir("/home/ubuntu/Data/PT_Result/100expert_1agent/" + mesh_id + '/') if os.path.exists("/home/ubuntu/Data/pflow_data/pflow-csv/" + mesh_id + "/train_irl.csv"): id_traj = load.load_trajectory("/home/ubuntu/Data/pflow_data/pflow-csv/" + mesh_id + "/train_irl.csv") # parameter set numbers for i in range(3): print(type(list(id_traj.values()))) trajectories = random.sample(list(id_traj.values()), 100) # save out expert data writeout.write_trajs(trajectories, "/home/ubuntu/Data/PT_Result/100expert_1agent/" + mesh_id + "/training_data.csv") g = load.load_graph_traj(trajectories) g.set_start(mesh_id) print(g.get_start()) gw = gridworld.Gridworld(g, discount) feature_matrix = gw.feature_matrix(g) # train# maxent.t_irl(g, feature_matrix, trajectories, epochs, learning_rate, "/home/ubuntu/Data/PT_Result/100expert_1agent/" + mesh_id + "/" + str(i+3)+"_") # alpha = load.load_param("/home/ubuntu/Data/PT_Result/100expert_1agent/" + mesh_id + "/" + str(i) + # "_" + 'param.csv') # r = dict() # for t in range(12, 48): # r[t] = dict().fromkeys(g.get_edges(), 0) # # for t in range(12, 48): # for edge in g.get_edges(): # if t in alpha.keys(): # r[t][edge] = feature_matrix[t][edge].dot(alpha[t]) # # for j in range(20): # tools.simple_trajectory(g, r, mesh_id, "/home/ubuntu/Data/PT_Result/100expert_1agent/" + mesh_id + # "/", mesh_id + "_" + str(j)) endtime = datetime.datetime.now() print ("finished reading files with time of" + str(endtime - starttime)) except Exception: print("mian class wrong") raise