rnn_agent.summarize(ep_r, global_counter, 'reward_900')

    ##开始训练
    for ep in range(Epoch):

        # if Inflow_rate < 600:
        #     Inflow_rate += 20

        #每次开始训练重新注册环境
        myTrafficNet = TrafficLightGridNetwork(
            name='grid',
            vehicles=vehicles,
            net_params=get_myNetParams(),
        )
        env = MyMultiTrafficLightGridPOEnv(env_params=flow_params['env'],
                                           sim_params=flow_params['sim'],
                                           network=myTrafficNet)

        # test phase
        if ep >= 0:
            print('测试阶段:')
            print(ep)
            record_line(each_line_path, "*** Epoch: {} ***\n".format(ep))
            queue, speed, ret = [], [], []
            for i in range(3):
                ep_r, ep_q, ep_v = [], [], []
                state = env.reset()
                state = normalize_formation(state, Agent_NUM)
                _state = [n for a in state for n in a]
                _state = np.array(_state).reshape([-1, num_state * Agent_NUM])
def record_line(log_path, line):
	with open(log_path, 'a') as fp:
		fp.writelines(line)
		fp.writelines("\n")
	return True


if __name__ == "__main__":

		myTrafficNet = TrafficLightGridNetwork(
			name = 'grid',
			vehicles =  vehicles,
			net_params = myNetParams,
		)
		env = MyMultiTrafficLightGridPOEnv(
			env_params=flow_params['env'], sim_params=flow_params['sim'], network=myTrafficNet
		)

		# Perparations for agents
		from flow.core.ppo_agent import *
		# used for ploting
		import matplotlib.pyplot as plt
		import numpy as np

		Reward_num = 0	#0代表多个rewards,1代表1个
		NAME = '2x2_600_PPO_Hierarchy_SOFT_try0'

		#*********************
		Epoch = 100	#100 10
		sub_train_epi = 5	# 20 1
		steps = 210	# 210 10

def record_line(log_path, line):
    with open(log_path, 'a') as fp:
        fp.writelines(line)
        fp.writelines("\n")
    return True


if __name__ == "__main__":
    myTrafficNet = TrafficLightGridNetwork(
        name = 'grid',
        vehicles =  vehicles,
        net_params = myNetParams,
    )
    env = MyMultiTrafficLightGridPOEnv(
        env_params=flow_params['env'], sim_params=flow_params['sim'], network=myTrafficNet)
    env.sim_params.render = True
    env.restart_simulation(sim_params=sumoparams, render=True)
    #   print(env.scenario.get_edge_list())
    # Perpare agent.
    from flow.core.ppo_rnn_discrete import *
############################################################################
############################################################################
    Agent_NUM = N_ROWS * N_COLUMNS
    Reward_num = 1    #0代表多个rewards,1代表1个
    write = 1   #是否将模拟哦结果写入文档。0否,1是
    NAME = '3x3_400_discreteRNN_ALL_SOFT_try1_numcar15_aRL5'
    num_state = 198
    rnn_agent = PPO(s_dim=num_state*Agent_NUM,a_dim=Agent_NUM,name=NAME)
    rnn_agent.restore_params(NAME,10)
############################################################################