コード例 #1
0
def t_main_1(data_manager, plot_time_reward):
    data_manager = data_manager
    envManager = EnvironmentManager(data_manager)
    envManager.auto_create_multi_singleprocess_envs()
    plot_data = {
        "time": [],
        "rewards_max": [],
        "rewards_mean": [],
        "reward_min": []
    }
    for i in range(1):
        Env, params = envManager.next_environment()
        agent = LSTM(params)
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            baseline_reward = 0
            a = [0, 1]
            # init_input:[[0 1]]
            init_input = np.array(a).reshape(1, 2)
            start_time = time.time()
            summary_writter = tf.summary.FileWriter("Mylog/test1", sess.graph)
            # 训练1000次
            for j in range(MainConfig.num_train):
                x, agr_params, action, _ = agent.getArgParams(sess, init_input)
                init_input = x[-1]
                # 不使用神经网络
                rewards = Env.run(action)
                # 5:定义log写入流
                summarize(summary_writter, np.max(rewards), j, 'max_reward')
                summarize(summary_writter, np.mean(rewards), j, 'mean_reward')
                step_time = time.time()
                one_time = step_time - start_time
                plot_data["time"].append(one_time)
                plot_data["rewards_max"].append(np.max(rewards))
                plot_data["rewards_mean"].append(np.mean(rewards))
                plot_data["reward_min"].append(np.min(rewards))
                if j % 100 == 0:
                    plot = pd.DataFrame(data=plot_data)
                    plot.to_csv(plot_time_reward, index=False)
                if j == 0:
                    baseline_reward = np.mean(rewards)

                print("else: normal training, rewards:", rewards)
                loss, ratio = agent.learn(False, sess, x, agr_params, rewards,
                                          baseline_reward, j)
                print("i=", i, " j=", j, "average_reward=", np.mean(rewards),
                      " baseline_reward=", baseline_reward, " loss=", loss,
                      "\n")
                summarize(summary_writter, loss, j, 'loss')
                summarize(summary_writter, ratio, j, 'ratio')
                reward_c = np.mean(rewards)
                baseline_reward = baseline_reward * AgentConfig.dr + (
                    1 - AgentConfig.dr) * reward_c
            plot = pd.DataFrame(data=plot_data)
            plot.to_csv(plot_time_reward, index=False)
    print("---------训练结束!----------")
コード例 #2
0
def t_main_meta_diff():
    # 环境初始化
    nnet = NNet(22)
    data_manager_img = DataManager(6)
    data_manager_crowd = DataManager(12)
    data_manager_pr = DataManager(14)
    data_manager_opt = DataManager(9)

    envManagerImg = EnvironmentManager(data_manager_img)
    envManagerCrowd = EnvironmentManager(data_manager_crowd)
    envManagerPr = EnvironmentManager(data_manager_pr)
    envManagerOpt = EnvironmentManager(data_manager_opt)

    envManagerImg.auto_create_multi_singleprocess_envs()
    envManagerCrowd.auto_create_multi_singleprocess_envs()
    envManagerPr.auto_create_multi_singleprocess_envs()
    envManagerOpt.auto_create_multi_singleprocess_envs()

    env_img, _ = envManagerImg.next_environment()
    env_crowd, _ = envManagerCrowd.next_environment()
    env_pr, _ = envManagerPr.next_environment()
    env_opt, _ = envManagerOpt.next_environment()

    # 获取image-segmentation数据集的元特征
    m = MetaFeatureExtractor(0)
    meta_vec_img = [
        m.num_ins, m.log_num_ins, m.num_feature, m.log_num_feature, m.dimen,
        m.log_dimen, m.inv_dimen, m.log_inv_dimen, m.kurtosis_min,
        m.kurtosis_max, m.kurtosis_mean, m.kurtosis_std, m.skewness_min,
        m.skewness_max, m.skewness_mean, m.skewness_std, m.entropy
    ]
    # 使用高斯分布处理元特征
    meta_vec_img = (meta_vec_img -
                    np.mean(meta_vec_img)) / np.std(meta_vec_img)
    meta_vec_img = [meta_vec_img] * 8
    meta_vec_img = np.array(meta_vec_img)

    # 获取Crowdsourced数据集的元特征
    m1 = MetaFeatureExtractor(2)
    meta_vec_crowd = [
        m1.num_ins, m1.log_num_ins, m1.num_feature, m1.log_num_feature,
        m1.dimen, m1.log_dimen, m1.inv_dimen, m1.log_inv_dimen,
        m1.kurtosis_min, m1.kurtosis_max, m1.kurtosis_mean, m1.kurtosis_std,
        m1.skewness_min, m1.skewness_max, m1.skewness_mean, m1.skewness_std,
        m1.entropy
    ]
    # 使用高斯分布处理元特征
    meta_vec_crowd = (meta_vec_crowd -
                      np.mean(meta_vec_crowd)) / np.std(meta_vec_crowd)
    meta_vec_crowd = [meta_vec_crowd] * 8
    meta_vec_crowd = np.array(meta_vec_crowd)

    # 获取pr-handwritten数据集的元特征
    m2 = MetaFeatureExtractor(3)
    meta_vec_pr = [
        m2.num_ins, m2.log_num_ins, m2.num_feature, m2.log_num_feature,
        m2.dimen, m2.log_dimen, m2.inv_dimen, m2.log_inv_dimen,
        m2.kurtosis_min, m2.kurtosis_max, m2.kurtosis_mean, m2.kurtosis_std,
        m2.skewness_min, m2.skewness_max, m2.skewness_mean, m2.skewness_std,
        m2.entropy
    ]
    # 使用高斯分布处理元特征
    meta_vec_pr = (meta_vec_pr - np.mean(meta_vec_pr)) / np.std(meta_vec_pr)
    meta_vec_pr = [meta_vec_pr] * 8
    meta_vec_pr = np.array(meta_vec_pr)

    # 获取optdigits数据集的元特征
    m3 = MetaFeatureExtractor(4)
    meta_vec_opt = [
        m3.num_ins, m3.log_num_ins, m3.num_feature, m3.log_num_feature,
        m3.dimen, m3.log_dimen, m3.inv_dimen, m3.log_inv_dimen,
        m3.kurtosis_min, m3.kurtosis_max, m3.kurtosis_mean, m3.kurtosis_std,
        m3.skewness_min, m3.skewness_max, m3.skewness_mean, m3.skewness_std,
        m3.entropy
    ]
    # 使用高斯分布处理元特征
    meta_vec_opt = (meta_vec_opt -
                    np.mean(meta_vec_opt)) / np.std(meta_vec_opt)
    meta_vec_opt = [meta_vec_opt] * 8
    meta_vec_opt = np.array(meta_vec_opt)

    img_agr = np.loadtxt("../MyValidate_time/test-meta3/img_sample.csv",
                         delimiter=",")
    crowd_agr = np.loadtxt(
        "../MyValidate_time/test-meta3/crowdsourced_sample.csv", delimiter=",")
    pr_agr = np.loadtxt("../MyValidate_time/test-meta3/pr_sample.csv",
                        delimiter=",")
    opt_agr = np.loadtxt("../MyValidate_time/test-meta3/optdigits_sample.csv",
                         delimiter=",")

    # 训练预测网络(将4个数据集放在一起进行训练)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        # summary_writter = tf.summary.FileWriter(path, sess.graph)
        # 1 2 3 4数据集训练预测网络 10steps
        for i in range(4):
            img_agr_train = np.hstack(
                (img_agr[8 * i:8 * (i + 1), :5], meta_vec_img))
            img_agr_label = img_agr[8 * i:8 * (i + 1), 5]

            crowd_agr_train = np.hstack(
                (crowd_agr[8 * i:8 * (i + 1), :5], meta_vec_crowd))
            crowd_agr_label = crowd_agr[8 * i:8 * (i + 1), 5]

            pr_agr_train = np.hstack(
                (pr_agr[8 * i:8 * (i + 1), :5], meta_vec_pr))
            pr_agr_label = pr_agr[8 * i:8 * (i + 1), 5]

            opt_agr_train = np.hstack(
                (opt_agr[8 * i:8 * (i + 1), :5], meta_vec_opt))
            opt_agr_label = opt_agr[8 * i:8 * (i + 1), 5]

            nnet.store_transition(img_agr_train, img_agr_label)
            nnet.store_transition(crowd_agr_train, crowd_agr_label)
            nnet.store_transition(pr_agr_train, pr_agr_label)
            nnet.store_transition(opt_agr_train, opt_agr_label)
            nnet.train_net(sess, i)
        print('------预测网络训练结束(1 2 3 4数据集)------')

        # 定义随机动作用于对比实验
        # 对数据进行shuffle
        action_sample = np.loadtxt(
            "../MyValidate_time/test-meta3/action_sample.csv", delimiter=",")
        agr_sample = np.loadtxt("../MyValidate_time/test-meta3/agr_sample.csv",
                                delimiter=",")
        random.shuffle(action_sample)
        random.shuffle(agr_sample)

        # 记录所有预测网络reward
        reward_img_pre_total = []
        reward_crowd_pre_total = []
        reward_pr_pre_total = []
        reward_opt_pre_total = []

        # 定义训练轮数
        n_step = 2

        # 预测网络预测reward
        start_time = time.time()
        for i in range(n_step):
            img_ran_action = np.hstack(
                (agr_sample[8 * i:8 * (i + 1), :], meta_vec_img))
            crowd_ran_action = np.hstack(
                (agr_sample[8 * i:8 * (i + 1), :], meta_vec_crowd))
            pr_ran_action = np.hstack(
                (agr_sample[8 * i:8 * (i + 1), :], meta_vec_pr))
            opt_ran_action = np.hstack(
                (agr_sample[8 * i:8 * (i + 1), :], meta_vec_opt))

            reward_img_pre = nnet.get_reward(sess, img_ran_action)
            reward_crowd_pre = nnet.get_reward(sess, crowd_ran_action)
            reward_pr_pre = nnet.get_reward(sess, pr_ran_action)
            reward_opt_pre = nnet.get_reward(sess, opt_ran_action)

            reward_img_pre_total.append(reward_img_pre)
            reward_crowd_pre_total.append(reward_crowd_pre)
            reward_pr_pre_total.append(reward_pr_pre)
            reward_opt_pre_total.append(reward_opt_pre)
        step_time = time.time()
        pre_time = step_time - start_time
        print('预测网络耗时:', pre_time)
        print('------预测网络预测reward结束------')

        # 记录所有真实环境reward
        reward_img_true_total = []
        reward_crowd_true_total = []
        reward_pr_true_total = []
        reward_opt_true_total = []

        # 真实环境测试
        start_time = time.time()
        for i in range(n_step):
            reward_img_true = env_img.run(action_sample[8 * i:8 * (i + 1), :])
            reward_crowd_true = env_crowd.run(action_sample[8 * i:8 *
                                                            (i + 1), :])
            reward_pr_true = env_pr.run(action_sample[8 * i:8 * (i + 1), :])
            reward_opt_true = env_opt.run(action_sample[8 * i:8 * (i + 1), :])

            reward_img_true_total.append(reward_img_true)
            reward_crowd_true_total.append(reward_crowd_true)
            reward_pr_true_total.append(reward_pr_true)
            reward_opt_true_total.append(reward_opt_true)
        step_time = time.time()
        true_time = step_time - start_time
        print('真实环境耗时:', true_time)
        print('------真实环境获得reward结束------')

        reward_img_pre_total = np.array(reward_img_pre_total).reshape(
            n_step, 8)
        reward_img_true_total = np.array(reward_img_true_total).reshape(
            n_step, 8)
        reward_crowd_pre_total = np.array(reward_crowd_pre_total).reshape(
            n_step, 8)
        reward_crowd_true_total = np.array(reward_crowd_true_total).reshape(
            n_step, 8)
        reward_pr_pre_total = np.array(reward_pr_pre_total).reshape(n_step, 8)
        reward_pr_true_total = np.array(reward_pr_true_total).reshape(
            n_step, 8)
        reward_opt_pre_total = np.array(reward_opt_pre_total).reshape(
            n_step, 8)
        reward_opt_true_total = np.array(reward_opt_true_total).reshape(
            n_step, 8)

        # 计算真实reward与预测网络预测的reward的距离
        dis_pre = np.square(
            np.mean(reward_img_pre_total - reward_img_true_total))
        dis_crowd = np.square(
            np.mean(reward_crowd_pre_total - reward_crowd_true_total))
        dis_pr = np.square(np.mean(reward_pr_pre_total - reward_pr_true_total))
        dis_opt = np.square(
            np.mean(reward_opt_pre_total - reward_opt_true_total))
        print('数据集1的误差为:', dis_pre)
        print('数据集2的误差为:', dis_crowd)
        print('数据集3的误差为:', dis_pr)
        print('数据集4的误差为:', dis_opt)
        print('------实验结束------')
コード例 #3
0
def t_main_pre(data_manager, plot_time_reward):
    data_manager = data_manager
    m = MetaFeatureExtractor(4)
    meta_vec = [
        m.num_ins, m.log_num_ins, m.num_feature, m.log_num_feature, m.dimen,
        m.log_dimen, m.inv_dimen, m.log_inv_dimen, m.kurtosis_min,
        m.kurtosis_max, m.kurtosis_mean, m.kurtosis_std, m.skewness_min,
        m.skewness_max, m.skewness_mean, m.skewness_std, m.entropy
    ]
    meta_vec = [meta_vec] * 8
    meta_vec = np.array(meta_vec)
    envManager = EnvironmentManager(data_manager)
    envManager.auto_create_multi_singleprocess_envs()
    plot_data = {
        "time": [],
        "rewards_max": [],
        "rewards_mean": [],
        "reward_min": []
    }
    for i in range(1):
        # 当前Env为随机森林,params表示该算法超参数预选值维度 如[12 10 21 21 9]
        Env, params = envManager.next_environment()
        agent = LSTM(params)
        nnet = NNet(len(params) + 17)
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            baseline_reward = 0
            a = [0, 1]
            init_input = np.array(a).reshape(1, 2)
            start_time = time.time()
            summary_writter = tf.summary.FileWriter("Mylog/test-meta2",
                                                    sess.graph)
            # 训练1000次
            for j in range(5):
                # init_input:[[0 1]]
                # action为选取到的动作,agr_params为对应的正态分布下的值
                x, agr_params, action, _ = agent.getArgParams(sess, init_input)
                data1 = pd.DataFrame(agr_params)
                data2 = pd.DataFrame(action)
                data1.to_csv('../MyValidate_time/test-meta3/agr_sample.csv',
                             index=False,
                             header=False,
                             mode='a')
                data2.to_csv('../MyValidate_time/test-meta3/action_sample.csv',
                             index=False,
                             header=False,
                             mode='a')

                # print('本次得到的action为:',action)
                # stack_data = np.hstack((agr_params, meta_vec))
                # # 使用神经网络
                # # 前25轮训练预测网络
                # if j <= MainConfig.t1:
                #     rewards = Env.run(action)
                #     # 将样本加入到文件中
                #     data1 = pd.DataFrame(np.c_[agr_params, rewards])
                #     # data1.to_csv('../MyValidate_time/test-meta3/pr_sample.csv', index=False, header=False, mode='a')
                #     # data1.to_csv('../MyValidate_time/test-meta3/img_sample.csv', index=False, header=False, mode='a')
                #     # data1.to_csv('../MyValidate_time/test-meta3/crowdsourced_sample.csv', index=False, header=False,
                #     #             mode='a')
                #     data1.to_csv('../MyValidate_time/test-meta3/optdigits_sample.csv', index=False, header=False,mode='a')
                #     summarize(summary_writter, np.max(rewards), j, 'max_reward')
                #     summarize(summary_writter, np.mean(rewards), j, 'mean_reward')
                #     step_time = time.time()
                #     one_time = step_time - start_time
                #     plot_data["time"].append(one_time)
                #     plot_data["rewards_max"].append(np.max(rewards))
                #     plot_data["rewards_mean"].append(np.mean(rewards))
                #     plot_data["reward_min"].append(np.min(rewards))
                #     nnet.store_transition(stack_data, rewards)
                #     nnet.train_net(sess, j)
                #     nnet.train_net(sess, j)
                # if j > MainConfig.t1 and j < MainConfig.t2:
                #     rewards = nnet.get_reward(sess, stack_data)
                #     rewards = np.array(rewards).reshape(AgentConfig.batch_size)
                #     summarize(summary_writter, np.max(rewards), j, 'max_reward')
                #     summarize(summary_writter, np.mean(rewards), j, 'mean_reward')
                # if j >= MainConfig.t2:
                #     rewards = Env.run(action)
                #     step_time = time.time()
                #     one_time = step_time - start_time
                #     plot_data["time"].append(one_time)
                #     plot_data["rewards_max"].append(np.max(rewards))
                #     plot_data["rewards_mean"].append(np.mean(rewards))
                #     plot_data["reward_min"].append(np.min(rewards))
                #     summarize(summary_writter, np.max(rewards), j, 'max_reward')
                #     summarize(summary_writter, np.mean(rewards), j, 'mean_reward')
                # if j % 100 == 0:
                #     plot = pd.DataFrame(data=plot_data)
                #     plot.to_csv(plot_time_reward, index=False)
                # if j == 0:
                #     baseline_reward = np.mean(rewards)
                # print("else: normal training, rewards:", rewards)
                # loss, ratio = agent.learn(False, sess, x, agr_params, rewards, baseline_reward, j)
                # print("i=", i, " j=", j, "average_reward=", np.mean(rewards), " baseline_reward=", baseline_reward,
                #       " loss=", loss, "\n")
                # summarize(summary_writter, loss, j, 'loss')
                # summarize(summary_writter, ratio, j, 'ratio')
                # reward_c = np.mean(rewards)
                # baseline_reward = baseline_reward * AgentConfig.dr + (1 - AgentConfig.dr) * reward_c
            # 存储300次 每次的实时reward和time
            # plot = pd.DataFrame(data=plot_data)
            # plot.to_csv(plot_time_reward, index=False)
    print("---------训练结束!----------")
コード例 #4
0
def t_main_no_meta_diff():
    # 环境初始化
    nnet = NNet(5)
    data_manager_img = DataManager(6)
    data_manager_crowd = DataManager(12)
    data_manager_pr = DataManager(14)
    data_manager_opt = DataManager(9)

    envManagerImg = EnvironmentManager(data_manager_img)
    envManagerCrowd = EnvironmentManager(data_manager_crowd)
    envManagerPr = EnvironmentManager(data_manager_pr)
    envManagerOpt = EnvironmentManager(data_manager_opt)

    envManagerImg.auto_create_multi_singleprocess_envs()
    envManagerCrowd.auto_create_multi_singleprocess_envs()
    envManagerPr.auto_create_multi_singleprocess_envs()
    envManagerOpt.auto_create_multi_singleprocess_envs()

    env_img, _ = envManagerImg.next_environment()
    env_crowd, _ = envManagerCrowd.next_environment()
    env_pr, _ = envManagerPr.next_environment()
    env_opt, _ = envManagerOpt.next_environment()

    img_agr = np.loadtxt("../MyValidate_time/test-meta3/img_sample.csv",
                         delimiter=",")
    crowd_agr = np.loadtxt(
        "../MyValidate_time/test-meta3/crowdsourced_sample.csv", delimiter=",")
    pr_agr = np.loadtxt("../MyValidate_time/test-meta3/pr_sample.csv",
                        delimiter=",")
    opt_agr = np.loadtxt("../MyValidate_time/test-meta3/optdigits_sample.csv",
                         delimiter=",")

    # 训练预测网络(将四个数据集放在一起进行训练)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        # summary_writter = tf.summary.FileWriter(path, sess.graph)
        # 1 2 3 4数据集训练预测网络 10steps
        for i in range(4):
            img_agr_train = img_agr[8 * i:8 * (i + 1), :5]
            img_agr_label = img_agr[8 * i:8 * (i + 1), 5]

            crowd_agr_train = crowd_agr[8 * i:8 * (i + 1), :5]
            crowd_agr_label = crowd_agr[8 * i:8 * (i + 1), 5]

            pr_agr_train = pr_agr[8 * i:8 * (i + 1), :5]
            pr_agr_label = pr_agr[8 * i:8 * (i + 1), 5]

            opt_agr_train = opt_agr[8 * i:8 * (i + 1), :5]
            opt_agr_label = opt_agr[8 * i:8 * (i + 1), 5]

            nnet.store_transition(img_agr_train, img_agr_label)
            nnet.store_transition(crowd_agr_train, crowd_agr_label)
            nnet.store_transition(pr_agr_train, pr_agr_label)
            nnet.store_transition(opt_agr_train, opt_agr_label)
            nnet.train_net(sess, i)
        print('------预测网络训练结束(1 2 3 4数据集)------')

        # 定义随机动作用于对比实验
        # 对数据进行shuffle
        action_sample = np.loadtxt(
            "../MyValidate_time/test-meta3/action_sample.csv", delimiter=",")
        agr_sample = np.loadtxt("../MyValidate_time/test-meta3/agr_sample.csv",
                                delimiter=",")
        random.shuffle(action_sample)
        random.shuffle(agr_sample)

        # 记录所有预测网络reward
        reward_img_pre_total = []
        reward_crowd_pre_total = []
        reward_pr_pre_total = []
        reward_opt_pre_total = []

        # 定义训练轮数
        n_step = 2

        # 预测网络预测reward
        start_time = time.time()
        for i in range(n_step):
            img_ran_action = agr_sample[8 * i:8 * (i + 1), :]
            crowd_ran_action = agr_sample[8 * i:8 * (i + 1), :]
            pr_ran_action = agr_sample[8 * i:8 * (i + 1), :]
            opt_ran_action = agr_sample[8 * i:8 * (i + 1), :]

            reward_img_pre = nnet.get_reward(sess, img_ran_action)
            reward_crowd_pre = nnet.get_reward(sess, crowd_ran_action)
            reward_pr_pre = nnet.get_reward(sess, pr_ran_action)
            reward_opt_pre = nnet.get_reward(sess, opt_ran_action)

            reward_img_pre_total.append(reward_img_pre)
            reward_crowd_pre_total.append(reward_crowd_pre)
            reward_pr_pre_total.append(reward_pr_pre)
            reward_opt_pre_total.append(reward_opt_pre)
        step_time = time.time()
        pre_time = step_time - start_time
        print('预测网络耗时:', pre_time)
        print('------预测网络预测reward结束------')

        # 记录所有真实环境reward
        reward_img_true_total = []
        reward_crowd_true_total = []
        reward_pr_true_total = []
        reward_opt_true_total = []

        # 真实环境测试
        start_time = time.time()
        for i in range(n_step):
            reward_img_true = env_img.run(action_sample[8 * i:8 * (i + 1), :])
            reward_crowd_true = env_crowd.run(action_sample[8 * i:8 *
                                                            (i + 1), :])
            reward_pr_true = env_pr.run(action_sample[8 * i:8 * (i + 1), :])
            reward_opt_true = env_opt.run(action_sample[8 * i:8 * (i + 1), :])

            reward_img_true_total.append(reward_img_true)
            reward_crowd_true_total.append(reward_crowd_true)
            reward_pr_true_total.append(reward_pr_true)
            reward_opt_true_total.append(reward_opt_true)
        step_time = time.time()
        true_time = step_time - start_time
        print('真实环境耗时:', true_time)
        print('------真实环境获得reward结束------')

        reward_img_pre_total = np.array(reward_img_pre_total).reshape(
            n_step, 8)
        reward_img_true_total = np.array(reward_img_true_total).reshape(
            n_step, 8)
        reward_crowd_pre_total = np.array(reward_crowd_pre_total).reshape(
            n_step, 8)
        reward_crowd_true_total = np.array(reward_crowd_true_total).reshape(
            n_step, 8)
        reward_pr_pre_total = np.array(reward_pr_pre_total).reshape(n_step, 8)
        reward_pr_true_total = np.array(reward_pr_true_total).reshape(
            n_step, 8)
        reward_opt_pre_total = np.array(reward_opt_pre_total).reshape(
            n_step, 8)
        reward_opt_true_total = np.array(reward_opt_true_total).reshape(
            n_step, 8)

        # 计算真实reward与预测网络预测的reward的距离
        dis_pre = np.square(
            np.mean(reward_img_pre_total - reward_img_true_total))
        dis_crowd = np.square(
            np.mean(reward_crowd_pre_total - reward_crowd_true_total))
        dis_pr = np.square(np.mean(reward_pr_pre_total - reward_pr_true_total))
        dis_opt = np.square(
            np.mean(reward_opt_pre_total - reward_opt_true_total))
        print('数据集1的误差为:', dis_pre)
        print('数据集2的误差为:', dis_crowd)
        print('数据集3的误差为:', dis_pr)
        print('数据集4的误差为:', dis_opt)
        print('------实验结束------')
コード例 #5
0
def t_main_bp_pre(data_manager, plot_time_reward):
    data_manager = data_manager
    envManager = EnvironmentManager(data_manager)
    envManager.auto_create_multi_singleprocess_envs()
    plot_data = {
        "time": [],
        "rewards_max": [],
        "rewards_mean": [],
        "reward_min": []
    }
    real_data = {"reward": [], "action": []}
    pre_data = {"reward": [], "action": []}
    for i in range(1):
        Env, params = envManager.next_environment()
        agent = LSTM(params)
        nnet = NNet(len(params))
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            baseline_reward = 0
            a = [0, 1]
            init_input = np.array(a).reshape(1, 2)
            start_time = time.time()
            for i in range(1):
                nnet._init_net(sess, len(params))
                for j in range(MainConfig.num_train):
                    x, agr_params, action, _ = agent.getArgParams(
                        sess, init_input)
                    np.set_printoptions(suppress=True)

                    # 使用神经网络
                    rewards = Env.run(action)
                    for ii in range(8):
                        real_data["action"].append(agr_params[ii])
                        real_data["reward"].append(rewards[ii])
                    np.savetxt(
                        "../validate_time/params_data_agent(chen)/real_data_action.csv",
                        real_data["action"],
                        delimiter=',')
                    np.savetxt(
                        "../validate_time/params_data_agent(chen)/real_data_reward.csv",
                        real_data["reward"],
                        delimiter=',')
                    step_time = time.time()
                    one_time = step_time - start_time
                    plot_data["time"].append(one_time)
                    plot_data["rewards_max"].append(np.max(rewards))
                    plot_data["rewards_mean"].append(np.mean(rewards))
                    plot_data["reward_min"].append(np.min(rewards))
                    nnet.store_transition(agr_params, rewards)
                    nnet.train_net(sess, j)
                    nnet.train_net(sess, j)
                    if j % MainConfig.num_train - 1 == 0:
                        plot = pd.DataFrame(data=plot_data)
                        plot.to_csv(plot_time_reward, index=False)
                    agent.check_topData(x, agr_params, rewards)
                    if j == 0:
                        baseline_reward = np.mean(rewards)
                    # 每十步 讲使用引导数据池 即最好的reward的超参数值   用于减少方差
                    if (j + 1) % 10 == 0:
                        x, agr_params, rewards = agent.getInput()
                        print("if: algorithm rectify, rewards:",
                              np.array(rewards).flatten())
                        loss, ratio = agent.learn(True, sess, x, agr_params,
                                                  rewards, baseline_reward, j)
                        print("i=", i, " j=", j, "average_reward=",
                              np.mean(rewards), " baseline_reward=",
                              baseline_reward, " loss=", loss, "\n")
                    else:
                        print("else: normal training, rewards:", rewards)
                        loss, ratio = agent.learn(False, sess, x, agr_params,
                                                  rewards, baseline_reward, j)
                        print("i=", i, " j=", j, "average_reward=",
                              np.mean(rewards), " baseline_reward=",
                              baseline_reward, " loss=", loss, "\n")
                    reward_c = np.mean(rewards)
                    baseline_reward = baseline_reward * AgentConfig.dr + (
                        1 - AgentConfig.dr) * reward_c
                for j in range(MainConfig.pre_num):
                    x, agr_params, action, _ = agent.getArgParams(
                        sess, init_input)
                    np.set_printoptions(suppress=True)
                    # 使用model
                    rewards = nnet.get_reward(sess, agr_params)
                    for ii in range(8):
                        pre_data["action"].append(agr_params[ii])
                        pre_data["reward"].append(rewards[ii])
                    # if j % (MainConfig.pre_num-1) == 0:
                    np.savetxt(
                        "../validate_time/params_data_agent(chen)/pre_data_action.csv",
                        pre_data["action"],
                        delimiter=',')
                    np.savetxt(
                        "../validate_time/params_data_agent(chen)/pre_data_reward.csv",
                        pre_data["reward"],
                        delimiter=',')
                    rewards = np.array(rewards).reshape(AgentConfig.batch_size)
                    loss, ratio = agent.learn(False, sess, x, agr_params,
                                              rewards, baseline_reward, j)
                    reward_c = np.mean(rewards)
                    baseline_reward = baseline_reward * AgentConfig.dr + (
                        1 - AgentConfig.dr) * reward_c

            #存储300次 每次的实时reward和time
            plot = pd.DataFrame(data=plot_data)
            plot.to_csv(plot_time_reward, index=False)
    print("---------训练结束!----------")
コード例 #6
0
def t_main_bp_pre_test(data_manager,plot_time_reward):
    data_manager = data_manager
    envManager = EnvironmentManager(data_manager)
    envManager.auto_create_multi_singleprocess_envs()
    plot_data = {"time": [], "rewards_max": [], "rewards_mean": [], "reward_min": []}
    for i in range(1):
        Env, params = envManager.next_environment()
        agent = LSTM(params)
        nnet = NNet(len(params))
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            baseline_reward = 0
            a = [0, 1]
            init_input = np.array(a).reshape(1, 2)
            start_time = time.time()
            summary_writter_reward = tf.summary.FileWriter("log/reward", sess.graph)
            summary_writter_reward_real = tf.summary.FileWriter("log/reward_real", sess.graph)
            summary_writter_baseline = tf.summary.FileWriter("log/baseline", sess.graph)
            summary_writter_baseline_real = tf.summary.FileWriter("log/baseline_real", sess.graph)
            summary_writter_err = tf.summary.FileWriter("log/err", sess.graph)
            for j in range(MainConfig.num_train):
                x, agr_params, action, _ = agent.getArgParams(sess, init_input)
                # 使用神经网络
                if j <= 25:
                    rewards = Env.run(action)
                    nnet.store_transition(agr_params, rewards)
                    nnet.train_net(sess, j)
                    nnet.train_net(sess, j)
                    rewards_real = rewards
                if j > 25 :
                    rewards = nnet.get_reward(sess, agr_params)
                    rewards = np.array(rewards).reshape(AgentConfig.batch_size)
                    rewards_real = Env.run(action)
                summarize(summary_writter_reward, np.mean(rewards), j, "reward")
                summarize(summary_writter_reward_real, np.mean(rewards_real), j, "reward")
                # if j >= 150:
                #     rewards = Env.run(action)
                # 记录下top 级的数据,等到合适的时候再放进模型训练;
                # if j <= 25 or j >= 150:
                #     agent.check_topData(x, agr_params, rewards)
                if j == 0:
                    baseline_reward = np.mean(rewards)
                    baseline_reward_real = np.mean(rewards_real)
                # # 每十步 讲使用引导数据池 即最好的reward的超参数值   用于减少方差
                # if (j + 1) % 10 == 0:
                #     x, agr_params, rewards = agent.getInput()
                #     print("if: algorithm rectify, rewards:", np.array(rewards).flatten())
                #     loss, ratio = agent.learn(True, sess, x, agr_params, rewards, baseline_reward, j)
                #     print("i=", i, " j=", j, "average_reward=", np.mean(rewards), " baseline_reward=",
                #           baseline_reward,
                #           " loss=", loss, "\n")
                # else:
                print("else: normal training, rewards:", rewards)
                loss, ratio = agent.learn(False, sess, x, agr_params, rewards, baseline_reward, j)
                print("i=", i, " j=", j, "average_reward=", np.mean(rewards), " baseline_reward=",
                      baseline_reward,
                      " loss=", loss, "\n")
                reward_c = np.mean(rewards)
                reward_c_real = np.mean(rewards_real)
                baseline_reward = baseline_reward * AgentConfig.dr + (1-AgentConfig.dr) * reward_c
                baseline_reward_real = baseline_reward_real * AgentConfig.dr + (1 - AgentConfig.dr) * reward_c_real
                err_all = (reward_c - np.mean(baseline_reward)) - (reward_c_real - np.mean(baseline_reward_real))
                summarize(summary_writter_baseline, np.mean(baseline_reward), j, "baseline")
                summarize(summary_writter_baseline_real, np.mean(baseline_reward_real), j, "baseline")
                summarize(summary_writter_err, np.mean(err_all), j, "err")
            #存储300次 每次的实时reward和time
            plot = pd.DataFrame(data=plot_data)
            plot.to_csv(plot_time_reward, index=False)
    print("---------训练结束!----------")