コード例 #1
0
def main(discount, epochs, learning_rate):
    """
    Run maximum entropy inverse reinforcement learning on the gridworld MDP.

    discount: MDP discount factor. float.
    epochs: Gradient descent iterations. int.
    learning_rate: Gradient descent learning rate. float.
    """

    try:
        starttime = datetime.datetime.now()
        path = "/home/ubuntu/Data/KDDI/#201111.CDR-data/vks2564k/slot/"

        id_traj = load.load_directory_trajectory(path)

        print(len(id_traj))

        trajectories = id_traj.values()
        g = load.load_graph_traj(trajectories)
        g.set_start("53397561")
        gw = gridworld.Gridworld(g, discount)
        feature_matrix = gw.feature_matrix(g)

        if not os.path.exists(path + "param/"):
            os.mkdir(path + "param/")

        maxent.t_irl(g, feature_matrix, trajectories, epochs, learning_rate,
                     path + "param/")

        endtime = datetime.datetime.now()

        print("finished reading files with time of" + str(endtime - starttime))
    except Exception:
        print("mian class wrong")
        raise
コード例 #2
0
def main(epochs, learning_rate, discount, number):
    """
    discount: MDP discount factor. float.
    epochs: Gradient descent iterations. int.
    learning_rate: Gradient descent learning rate. float.
    """

    try:
        starttime = datetime.datetime.now()

        mesh_list = read_list("/home/ubuntu/Data/Tokyo/MeshCode/Tokyo.csv",
                              number)
        print(len(mesh_list))
        print(mesh_list)

        for mesh_id in mesh_list:
            # if not os.path.exists("/home/ubuntu/Data/PT_Result/commuter/test_sim/" + mesh_id + "/"):
            #     os.mkdir("/home/ubuntu/Data/PT_Result/commuter/test_sim/" + mesh_id + "/")
            #
            # if not os.path.exists("/home/ubuntu/Data/PT_Result/commuter/test_param/" + mesh_id + "/"):
            #     os.mkdir("/home/ubuntu/Data/PT_Result/commuter/test_param/" + mesh_id + "/")

            if os.path.exists("/home/ubuntu/Data/pflow_data/pflow-csv/" +
                              mesh_id + "/train_irl.csv"):
                id_traj = load.load_trajectory(
                    "/home/ubuntu/Data/pflow_data/pflow-csv/" + mesh_id +
                    "/train_irl.csv")

                # parameter set numbers
                if len(id_traj) > 200:
                    # for i in range(len(id_traj)/50):
                    trajectories = random.sample(id_traj.values(), 200)
                    g = load.load_graph_traj(trajectories)
                    g.set_start(mesh_id)
                    print(g.get_start())
                    gw = gridworld.Gridworld(g, discount, "")
                    feature_matrix = gw.feature_matrix(g)

                    # train#

                    maxent.t_irl(
                        g, feature_matrix, trajectories, epochs, learning_rate,
                        "/home/ubuntu/Data/PT_Result/param_15/" + mesh_id +
                        "_" + str(1) + "_")

            fo = open("/home/ubuntu//Data/PT_Result/finished_mesh.csv", "a")
            fo.write(mesh_id + "/n")
            fo.close()

        endtime = datetime.datetime.now()

        print("finished reading files with time of" + str(endtime - starttime))
    except Exception:
        print("mian class wrong")
        raise
コード例 #3
0
def main(discount, epochs, learning_rate, target):
    """
    Run maximum entropy inverse reinforcement learning on the gridworld MDP.

    discount: MDP discount factor. float.
    epochs: Gradient descent iterations. int.
    learning_rate: Gradient descent learning rate. float.
    """

    try:
        starttime = datetime.datetime.now()
        path = "/home/ubuntu/Data/PT_Result/" + target + "/"

        if not os.path.exists(path + "sim/"):
            os.mkdir(path + "sim/")

        if not os.path.exists(path + "param/"):
            os.mkdir(path + "param/")

        if os.path.exists(path + "training/"):
            id_traj = load.load_trajectory(
                "/home/ubuntu/Data/PT_Result/commuter/training/PT_commuter_irl_revised.csv"
            )

            # parameter set numbers
            for i in range(10000):
                trajectories = random.sample(id_traj.values(), 200)
                print trajectories
                g = load.load_graph_traj(trajectories)
                gw = gridworld.Gridworld(g, discount)
                feature_matrix = gw.feature_matrix(g)

                # train#
                print("training ", path)
                maxent.t_irl(g, feature_matrix, trajectories, epochs,
                             learning_rate, path + "param/" + str(i))

        endtime = datetime.datetime.now()

        print("finished reading files with time of" + str(endtime - starttime))
    except Exception:
        print("mian class wrong")
        raise
コード例 #4
0
def main(date, discount, epochs, learning_rate, train=True):
    """
    Run maximum entropy inverse reinforcement learning on the gridworld MDP.

    discount: MDP discount factor. float.
    epochs: Gradient descent iterations. int.
    learning_rate: Gradient descent learning rate. float.
    """

    try:
        starttime = datetime.datetime.now()
        path = "D:/ClosePFLOW/53393575/"

        if not os.path.exists(path + "sim/"):
            os.mkdir(path + "sim/")

        if not os.path.exists(path + "param/"):
            os.mkdir(path + "param/")

        tools.move_files(path)

        if os.path.exists(path + "training/"):
            id_traj = load.load_directory_trajectory(path + "training/")

            # parameter set numbers
            for i in range(26):
                trajectories = random.sample(id_traj.values(), 50)
                g = load.load_graph_traj(trajectories)
                gw = gridworld.Gridworld(g, discount)
                feature_matrix = gw.feature_matrix(g)

                # train#
                print "training ", path
                maxent.t_irl(g, feature_matrix, trajectories, epochs,
                             learning_rate, path + "param/" + str(i))

        endtime = datetime.datetime.now()

        print "finished reading files with time of" + str(endtime - starttime)
    except Exception:
        print "mian class wrong"
        raise
コード例 #5
0
def main():
    root = "/home/ubuntu/Data/pflow_data/pflow-csv/"

    mesh_list = read_list("/home/ubuntu/Data/Tokyo/MeshCode/Tokyo.csv")

    list_dirs = os.walk(root)
    count = 0
    print mesh_list
    for root, dirs, files in list_dirs:
        with open("/home/ubuntu/Data/PT_Result/exp1/result.csv", "w") as f:
            for d in dirs:
                if d in mesh_list:
                    file_list = os.listdir(os.path.join(root, d))
                    if len(file_list) > 100 and "train_irl.csv" in file_list:
                        count += 1
                        id_traj = load.load_trajectory(os.path.join(root, d) + "/train_irl.csv")

                        train, validation = train_test_split(id_traj.values(), test_size=0.4)

                        g = load.load_graph_traj(train)
                        gw = gridworld.Gridworld(g, 0.9)
                        feature_matrix = gw.feature_matrix(g)

                        path = "/home/ubuntu/Data/PT_Result/exp1/"

                        # train
                        if not os.path.exists(path + "parameter/" + d + "param.csv"):
                            maxent.t_irl(g, feature_matrix, train, 200, 0.2, path + "parameter/" + d)

                        # simulation

                        t_alpha = read_param(path + "parameter/" + os.listdir(path+"parameter/")[0])

                        r = dict()
                        for t in range(12, 48):
                            r[t] = dict().fromkeys(g.get_edges(), 0)

                        for edge in g.get_edges():
                            for t in range(12, 48):
                                if t in t_alpha.keys():
                                    r[t][edge] = feature_matrix[edge].dot(t_alpha[t])

                        if not os.path.exists(path + "sim/" + d + "/"):
                            os.mkdir(path + "sim/" + d + "/")

                        for i in range(80):
                            tools.generate_temporal_traj(g, r, d, 0.5, path + "sim/" + d + "/", d + "_" + str(i))

                        # markov chain
                        if not os.path.exists(path + "markov/" + d + "/"):
                            os.mkdir(path + "markov/" + d + "/")

                        for i in range(80):
                            pairs = makepairs(train)

                            cfd = nltk.ConditionalFreqDist(pairs)

                            generate(cfd, str(i), path + "markov/" + d + "/" + str(i) + ".csv", d)

                        # expansion validation
                        expansion10_trajecotry = random.sample(train, int(len(train)*0.1))

                        diff_list = []

                        for validation_traj in validation:
                            min_dist = sys.maxint
                            for traj in expansion10_trajecotry:
                                dist = traj_dist((traj, validation_traj))

                                if dist < min_dist:
                                    min_dist = dist

                            diff_list.append(min_dist)

                        expansion10_score = np.average(diff_list)

                        expansion50_trajecotry = random.sample(train, int(len(train) * 0.5))

                        diff_list = []

                        for validation_traj in validation:
                            min_dist = sys.maxint
                            for traj in expansion50_trajecotry:
                                dist = traj_dist((traj, validation_traj))

                                if dist < min_dist:
                                    min_dist = dist

                            diff_list.append(min_dist)

                        expansion50_score = np.average(diff_list)

                        # validation

                        markov_id_traj = load.load_directory_trajectory(path + "markov/" + d + "/")

                        diff_list = []

                        print markov_id_traj.keys()
                        for traj in validation:
                            min_dist = sys.maxint
                            for markov_id in markov_id_traj.keys():

                                dist = traj_dist((traj, markov_id_traj[markov_id]))

                                if dist < min_dist:
                                    min_dist = dist
                            diff_list.append(min_dist)

                        markov_score = np.average(diff_list)

                        sim_id_traj = load.load_directory_trajectory(path + "sim/" + d + "/")

                        diff_list = []

                        for traj in validation:
                            min_dist = sys.maxint
                            for sim_id in sim_id_traj.keys():
                                dist = traj_dist((traj, sim_id_traj[sim_id]))

                                if dist < min_dist:
                                    min_dist = dist
                            if min_dist > 10:
                                continue
                            diff_list.append(min_dist)

                        sim_score = np.average(diff_list)

                        print d+","+str(sim_score)+","+str(markov_score)+","+str(expansion10_score)+","+str(expansion50_score)
                        f.write(d+","+str(sim_score)+","+str(markov_score)+","+str(expansion10_score)+","+str(expansion50_score))
                        f.write("\n")

                        if count > 80:
                            f.close()
コード例 #6
0
def main(date, discount, epochs, learning_rate, train=True):
    """
    Run maximum entropy inverse reinforcement learning on the gridworld MDP.

    discount: MDP discount factor. float.
    epochs: Gradient descent iterations. int.
    learning_rate: Gradient descent learning rate. float.
    """
    """
    # this part is used for calculate uniform reward parameter

    id_trajectory = load.load_trajectory(10000)

    print tools.motion_model_policy(id_trajectory)

    for i in range(1000):
        graph_trajectories = tools.choose_trajectory(1000, id_trajectory)

        g = load.load_graph_traj(graph_trajectories)

        sample_trajectories = sample(graph_trajectories, 100)

        gw = gridworld.Gridworld(g, 0.9)

        feature_matrix = gw.feature_matrix(g)

        alpha = maxent.irl(g, feature_matrix, sample_trajectories, 40, 0.05)

        path = str("D:/Ubicomp/alpha" + str(i) + ".txt")

        numpy.savetxt(path, alpha)

    """
    """
    this part is usedfor temporal reward parameter training
    """

    try:
        starttime = datetime.datetime.now()
        path = "D:/ClosePFLOW/"

        dirs = os.listdir(path)

        for dirname in dirs:
            directory = path + dirname + "/"
            print directory

            if not os.path.exists(directory + "sim/"):
                os.mkdir(directory + "sim/")

            tools.move_files(directory)

            if os.path.exists(directory + "training/"):
                id_traj = load.load_directory_trajectory(directory +
                                                         "training/")
                if (len(id_traj) >= 40
                        and not os.path.exists(directory + "param.csv")
                    ) or os.path.getsize(directory + "param.csv") > 2038:
                    trajectories = id_traj.values()
                    g = load.load_graph_traj(trajectories)
                    gw = gridworld.Gridworld(g, discount)
                    feature_matrix = gw.feature_matrix(g)

                    # train#
                    print "training ", directory
                    maxent.t_irl(g, feature_matrix, trajectories, epochs,
                                 learning_rate, directory)

        indicator = 0
        i = 0

        while indicator <= 5000:
            sample_id = []
            trajectories = []
            for k in range(indicator, indicator + 100):
                sample_id.append(id_list[k])

            for sid in sample_id:
                trajectories.append(id_traj.get(sid))

            start_state = []

            for traj in trajectories:
                start_state.append(traj[12][0])

            training_data = "C:/Users/PangYanbo/Desktop/UbiResult/TrainingTrajectoriesGroup_" + str(
                i) + ".csv"

            with open(training_data, "wb") as f:
                for k in range(100):
                    for j in range(12, 47):
                        if j in trajectories[k].keys():
                            f.write(
                                str(j) + ',' +
                                trajectories[k][j][1].get_origin() + ',' +
                                trajectories[k][j][1].get_destination() + ',' +
                                trajectories[k][j][1].get_mode() + '\n')

            # initial environment based on trajectories

            g = load.load_graph_traj(trajectories)
            gw = gridworld.Gridworld(g, discount)
            feature_matrix = gw.feature_matrix(g)

            print g

            if train:

                # training the model

                maxent.t_irl(g, feature_matrix, trajectories, epochs,
                             learning_rate, date)
            else:

                # simulation

                for start in start_state:

                    # read alpha from saved file
                    root = "C:/Users/PangYanbo/Desktop/UbiResult/param/"
                    para_list = list(
                        os.path.join(root, name) for name in os.listdir(root))
                    for filename in para_list:
                        if os.path.isdir(filename):
                            para_list.remove(filename)

                    param_path = random.choice(para_list)

                    agent_id = param_path[43:-4]

                    print agent_id, param_path

                    t_alpha = {}
                    with open(param_path, 'r') as f:
                        t = 12
                        for line in f:
                            line = line.strip('\n')
                            tokens = line.split(",")
                            param = numpy.zeros(11)
                            for j in range(11):
                                if len(tokens) > j:
                                    param[j] = tokens[j]
                            t_alpha[t] = param.copy()
                            t += 1

                    r = dict()
                    for t in range(12, 48):
                        r[t] = dict().fromkeys(g.get_edges(), 0)

                    for edge in g.get_edges():
                        for t in range(12, 48):
                            if t in t_alpha.keys():
                                r[t][edge] = feature_matrix[edge].dot(
                                    t_alpha[t])
                    tools.generate_temporal_traj(g, r, start, 0.5, i, agent_id)

            i += 1
            indicator += 50

        endtime = datetime.datetime.now()

        print "finished reading files with time of" + str(endtime - starttime)
    except Exception:
        print "something wrong"
        raise
コード例 #7
0
def main(mesh_id):
    """
    discount: MDP discount factor. float.
    epochs: Gradient descent iterations. int.
    learning_rate: Gradient descent learning rate. float.
    """

    discount = .9
    epochs = 400
    learning_rate = 3

    try:
        starttime = datetime.datetime.now()

        if not os.path.exists("/home/ubuntu/Data/PT_Result/100expert_1agent/" + mesh_id + "/"):
            os.mkdir("/home/ubuntu/Data/PT_Result/100expert_1agent/" + mesh_id + '/')

        if os.path.exists("/home/ubuntu/Data/pflow_data/pflow-csv/" + mesh_id + "/train_irl.csv"):
            id_traj = load.load_trajectory("/home/ubuntu/Data/pflow_data/pflow-csv/" + mesh_id + "/train_irl.csv")

            # parameter set numbers
            for i in range(3):
                print(type(list(id_traj.values())))
                trajectories = random.sample(list(id_traj.values()), 100)

                # save out expert data
                writeout.write_trajs(trajectories, "/home/ubuntu/Data/PT_Result/100expert_1agent/"
                                     + mesh_id + "/training_data.csv")

                g = load.load_graph_traj(trajectories)
                g.set_start(mesh_id)
                print(g.get_start())
                gw = gridworld.Gridworld(g, discount)
                feature_matrix = gw.feature_matrix(g)

                # train#

                maxent.t_irl(g, feature_matrix, trajectories, epochs, learning_rate,
                             "/home/ubuntu/Data/PT_Result/100expert_1agent/" + mesh_id + "/" + str(i+3)+"_")

                # alpha = load.load_param("/home/ubuntu/Data/PT_Result/100expert_1agent/" + mesh_id + "/" + str(i) +
                #                         "_" + 'param.csv')

                # r = dict()
                # for t in range(12, 48):
                #     r[t] = dict().fromkeys(g.get_edges(), 0)
                #
                # for t in range(12, 48):
                #     for edge in g.get_edges():
                #         if t in alpha.keys():
                #             r[t][edge] = feature_matrix[t][edge].dot(alpha[t])
                #
                # for j in range(20):
                #     tools.simple_trajectory(g, r, mesh_id, "/home/ubuntu/Data/PT_Result/100expert_1agent/" + mesh_id +
                #                             "/", mesh_id + "_" + str(j))

        endtime = datetime.datetime.now()

        print ("finished reading files with time of" + str(endtime - starttime))
    except Exception:
        print("mian class wrong")
        raise