def test(args, shared_model): ptitle('Test Agent') log = {} setup_logger('{}_log'.format(args.env), r'{0}{1}_log'.format(args.log_dir, args.env)) log['{}_log'.format(args.env)] = logging.getLogger('{}_log'.format( args.env)) d_args = vars(args) for k in d_args.keys(): log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k])) torch.manual_seed(args.seed) env = create_env(args.env) reward_sum = 0 start_time = time.time() num_tests = 0 reward_total_sum = 0 player = Agent(None, env, args, None) player.model = A3C_MLP(player.env.observation_space.shape[0], player.env.action_space) player.state = player.env.reset() player.state = torch.from_numpy(player.state).float() player.model.eval() max_score = 0 while True: if player.done: player.model.load_state_dict(shared_model.state_dict()) player.action_test() reward_sum += player.reward if player.done: num_tests += 1 reward_total_sum += reward_sum reward_mean = reward_total_sum / num_tests log['{}_log'.format(args.env)].info( "Time {0}, episode reward {1}, episode length {2}, reward mean {3:.4f}" .format( time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), reward_sum, player.eps_len, reward_mean)) if args.save_max and reward_sum >= max_score: max_score = reward_sum state_to_save = player.model.state_dict() torch.save(state_to_save, '{0}{1}.dat'.format(args.save_model_dir, args.env)) reward_sum = 0 player.eps_len = 0 state = player.env.reset() time.sleep(60) player.state = torch.from_numpy(state).float()
def test(self, iteration, show='none', save_max=False): env = create_env(self.args) player = Agent(None, env, self.args, None) player.gpu_id = self.gpu_id if self.args.model == 'MLP': player.model = A3C_MLP( player.env.observation_space.shape[0], player.env.action_space, self.args.stack_frames) if self.args.model == 'CONV': player.model = A3C_CONV(self.args.stack_frames, player.env.action_space) # load the input model if self.gpu_id >= 0: with torch.cuda.device(self.gpu_id): player.model.load_state_dict(self.shared_model.state_dict()) else: player.model.load_state_dict(self.shared_model.state_dict()) player.state = player.env.reset(self.args) player.state = torch.from_numpy(player.state).float() if self.gpu_id >= 0: with torch.cuda.device(self.gpu_id): player.model = player.model.cuda() player.state = player.state.cuda() player.model.eval() while True: player.action_test() if self.args.show != 'none' or show != 'none': player.env.render() self.reward_sum += player.reward if player.done: self.num_tests += 1 self.reward_total_sum += self.reward_sum reward_mean = self.reward_total_sum / self.num_tests self.reward_sum = 0 player.eps_len = 0 state = player.env.reset(self.args) player.state = torch.from_numpy(state).float() if self.gpu_id >= 0: with torch.cuda.device(self.gpu_id): player.state = player.state.cuda() if self.args.show != 'none' or show != 'none': player.env.close() break return self.reward_total_sum
def _init_model(self, model_type, env, stack_frames=0, load=False, load_file="./model.bin"): if model_type == 'MLP': model = A3C_MLP(env.observation_space.shape[0], env.action_space, stack_frames) if model_type == 'CONV': model = A3C_CONV(stack_frames, env.action_space) if load: saved_state = torch.load(load_file, map_location=lambda storage, loc: storage) model.load_state_dict(saved_state) return model
def train(rank, args, shared_model, optimizer): ptitle('Training Agent: {}'.format(rank)) gpu_id = args.gpu_ids[rank % len(args.gpu_ids)] torch.manual_seed(args.seed + rank) if gpu_id >= 0: torch.cuda.manual_seed(args.seed + rank) env = create_env(args.env, args) if optimizer is None: if args.optimizer == 'RMSprop': optimizer = optim.RMSprop(shared_model.parameters(), lr=args.lr) if args.optimizer == 'Adam': optimizer = optim.Adam(shared_model.parameters(), lr=args.lr) env.seed(args.seed + rank) player = Agent(None, env, args, None) player.gpu_id = gpu_id if args.model == 'MLP': player.model = A3C_MLP(player.env.observation_space.shape[0], player.env.action_space, args.stack_frames) if args.model == 'CONV': player.model = A3C_CONV(args.stack_frames, player.env.action_space) player.state = player.env.reset() player.state = torch.from_numpy(player.state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda() player.model = player.model.cuda() player.model.train() while True: if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model.load_state_dict(shared_model.state_dict()) else: player.model.load_state_dict(shared_model.state_dict()) if player.done: if gpu_id >= 0: with torch.cuda.device(gpu_id): player.cx = Variable(torch.zeros(1, 128).cuda()) player.hx = Variable(torch.zeros(1, 128).cuda()) else: player.cx = Variable(torch.zeros(1, 128)) player.hx = Variable(torch.zeros(1, 128)) else: player.cx = Variable(player.cx.data) player.hx = Variable(player.hx.data) for step in range(args.num_steps): player.action_train() if player.done: break if player.done: player.eps_len = 0 state = player.env.reset() player.state = torch.from_numpy(state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda() if gpu_id >= 0: with torch.cuda.device(gpu_id): R = torch.zeros(1, 1).cuda() else: R = torch.zeros(1, 1) if not player.done: state = player.state if args.model == 'CONV': state = state.unsqueeze(0) value, _, _, _ = player.model( (Variable(state), (player.hx, player.cx))) R = value.data player.values.append(Variable(R)) policy_loss = 0 value_loss = 0 R = Variable(R) if gpu_id >= 0: with torch.cuda.device(gpu_id): gae = torch.zeros(1, 1).cuda() else: gae = torch.zeros(1, 1) for i in reversed(range(len(player.rewards))): R = args.gamma * R + player.rewards[i] advantage = R - player.values[i] value_loss = value_loss + 0.5 * advantage.pow(2) # Generalized Advantage Estimataion # print(player.rewards[i]) delta_t = player.rewards[i] + args.gamma * \ player.values[i + 1].data - player.values[i].data gae = gae * args.gamma * args.tau + delta_t policy_loss = policy_loss - \ (player.log_probs[i].sum() * Variable(gae)) - \ (0.01 * player.entropies[i].sum()) player.model.zero_grad() (policy_loss + 0.5 * value_loss).backward() ensure_shared_grads(player.model, shared_model, gpu=gpu_id >= 0) optimizer.step() player.clear_actions()
# https://github.com/pytorch/examples/tree/master/mnist_hogwild # Training settings # Implemented multiprocessing using locks but was not beneficial. Hogwild # training was far superior if __name__ == '__main__': args = parser.parse_args() torch.manual_seed(args.seed) if args.gpu_ids == -1: args.gpu_ids = [-1] else: torch.cuda.manual_seed(args.seed) mp.set_start_method('spawn') env = create_env(args.env, args) if args.model == 'MLP': shared_model = A3C_MLP(env.observation_space.shape[0], env.action_space, args.stack_frames) if args.model == 'CONV': shared_model = A3C_CONV(args.stack_frames, env.action_space) if args.load: saved_state = torch.load('{0}{1}.dat'.format(args.load_model_dir, args.env), map_location=lambda storage, loc: storage) shared_model.load_state_dict(saved_state) shared_model.share_memory() if args.shared_optimizer: if args.optimizer == 'RMSprop': optimizer = SharedRMSprop(shared_model.parameters(), lr=args.lr) if args.optimizer == 'Adam': optimizer = SharedAdam(shared_model.parameters(), lr=args.lr,
torch.set_default_tensor_type('torch.FloatTensor') print("begin loading models") saved_state = torch.load( '{0}{1}.dat'.format(args.load_model_dir, args.env), map_location=lambda storage, loc: storage) print("finished loading models") torch.manual_seed(args.seed) env = create_env(args.env, -1) num_tests = 0 reward_total_sum = 0 player = Agent(None, env, args, None, -1) player.model = A3C_MLP(env.observation_space, env.action_space, args.stack_frames) if args.new_gym_eval: player.env = gym.wrappers.Monitor( player.env, "{}_monitor".format(args.env), force=True) player.model.load_state_dict(saved_state) player.model.eval() for i_episode in range(1): speed = [] player.state = player.env.reset() player.state = torch.from_numpy(player.state).float() player.eps_len = 0 reward_sum = 0 while True:
def test(args, shared_model): ptitle('Test Agent') gpu_id = args.gpu_ids[-1] log = {} setup_logger('{}_log'.format(args.env), r'{0}{1}_log'.format(args.log_dir, args.env)) log['{}_log'.format(args.env)] = logging.getLogger( '{}_log'.format(args.env)) d_args = vars(args) for k in d_args.keys(): log['{}_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k])) torch.manual_seed(args.seed) if gpu_id >= 0: torch.cuda.manual_seed(args.seed) env = create_env(args.env, args) reward_sum = 0 start_time = time.time() num_tests = 0 reward_total_sum = 0 player = Agent(None, env, args, None) player.gpu_id = gpu_id if args.model == 'MLP': player.model = A3C_MLP( player.env.observation_space.shape[0], player.env.action_space, args.stack_frames) if args.model == 'CONV': player.model = A3C_CONV(args.stack_frames, player.env.action_space) player.state = player.env.reset() player.state = torch.from_numpy(player.state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model = player.model.cuda() player.state = player.state.cuda() player.model.eval() while True: if player.done: if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model.load_state_dict(shared_model.state_dict()) else: player.model.load_state_dict(shared_model.state_dict()) player.action_test() reward_sum += player.reward if player.done: num_tests += 1 reward_total_sum += reward_sum reward_mean = reward_total_sum / num_tests log['{}_log'.format(args.env)].info( "Time {0}, episode reward {1}, episode length {2}, reward mean {3:.4f}". format( time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), reward_sum, player.eps_len, reward_mean)) if reward_sum > args.save_score_level: player.model.load_state_dict(shared_model.state_dict()) state_to_save = player.model.state_dict() torch.save(state_to_save, '{0}{1}.dat'.format( args.save_model_dir, args.env)) reward_sum = 0 player.eps_len = 0 state = player.env.reset() time.sleep(60) player.state = torch.from_numpy(state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda()
def test(rank, args, shared_model): writer = SummaryWriter('8_27_test') model_buffer = Model_Buffer(args) test_episodes = args.test_episodes ptitle('Test Agent') log = {} setup_logger('{}_log'.format(args.env), r'{0}{1}_log'.format(args.log_dir, args.env)) print("logfile check", r'{0} {1}_log'.format(args.log_dir, args.env)) print("logs in test", args.log_dir) log['{}_log'.format(args.env)] = logging.getLogger( # 将logger放进字典 '{}_log'.format(args.env)) d_args = vars(args) # vars() 函数返回对象object的属性和属性值的字典对象。 for k in d_args.keys(): log['{}_log'.format(args.env)].info('{0}: {1}'.format( k, d_args[k])) # 输出参数信息 # for i in range(100): # log['{}_log'.format(args.env)].info('{0}'.format(i)) # print('we prefix seed = -1 when testing') # args.seed = -1 torch.manual_seed(args.seed) env = create_env(args.env, args.seed) # env = gym.make(args.env) # env.seed(args.seed) start_time = time.time() num_tests = 0 # 当前玩的回合数 player = Agent(None, env, args, None, rank) player.model = A3C_MLP(player.env.observation_space, player.env.action_space, args.stack_frames) # 设置model player.state = player.env.reset() # 设置state player.state = torch.from_numpy(player.state).float() player.done = True player.model.eval() # 设为eval模式 is_model_empty = True is_testing = False while True: model_buffer.put(shared_model) # 测试够一大回合,初始化 if player.done and np.mod(num_tests, test_episodes) == 0 and not is_testing: reward_episode = 0 success_rate = 0 load_model = model_buffer.get() # 获取公共model model_queue_size = model_buffer.qsize() if load_model: is_testing = True is_model_empty = False training_steps = load_model[1] training_episodes = load_model[2] # 用公共model实例化player_model(传入参数) load_model[0]保存的参数 player.model.load_state_dict(load_model[0]) else: is_model_empty = True # 未获取到model time.sleep(10) if not is_model_empty: player.action_test() # log['{}_log'.format(args.env)].info("test steps {}".format(1)) reward_episode += player.reward if 'is_success' in player.info.keys(): # 判断是否因成功而done success_rate += 1 if player.done: # 到达目标位置或撞毁或太远时为done,一回合结束 # print("crash detected") # eps_len_temp = player.eps_len #? num_tests += 1 # done时test回合数加一 player.eps_len = 0 # player这一回合所走的步数归零 state = player.env.reset() player.state = torch.from_numpy(state).float() if np.mod(num_tests, test_episodes) == 0: # 测试够一大回合,开始统计信息 is_testing = False reward_episode = reward_episode / test_episodes writer.add_scalar('success_num/Test', success_rate, training_steps) success_rate = success_rate / test_episodes log['{}_log'.format(args.env)].info( "Time {0}, training episodes {1}, training steps {2}, reward episode {3}, success_rate {4}, " "model cached {5}".format( time.strftime( "%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), training_episodes, training_steps, reward_episode, success_rate, model_queue_size)) writer.add_scalar('success_rate/Test', success_rate, training_steps) # save model: state_to_save = player.model.state_dict() # torch.save(state_to_save, '{0}{1}.dat'.format(args.save_model_dir, args.env)) # torch.save(state_to_save, '{0}{1}_pre.dat'.format(args.save_model_dir, args.env)) torch.save(state_to_save, '{0}{1}.dat'.format(args.log_dir, args.env)) torch.save(state_to_save, '{0}{1}_pre.dat'.format(args.log_dir, args.env)) if training_steps > args.training_steps: break
def train(rank, args, input_model=None, max_iter=100000, step_test=-1, log=False): if rank >= 0: torch.manual_seed(args.seed + rank) gpu_id = args.gpu_ids[rank % len(args.gpu_ids)] torch.manual_seed(args.seed + rank) if gpu_id >= 0: torch.cuda.manual_seed(args.seed + rank) env = create_env(args) env.seed(args.seed + rank) if log: log = setup_logger("{0}_{1}_log".format(args.scale_legs, rank), "logs/{0}_{1}_log".format(args.scale_legs, rank)) # player initialization player = Agent(None, env, args, None) player.gpu_id = gpu_id if args.model == 'MLP': player.model = A3C_MLP(player.env.observation_space.shape[0], player.env.action_space, args.stack_frames) if args.model == 'CONV': player.model = A3C_CONV(args.stack_frames, player.env.action_space) # load the input model to the player if input_model != None: if gpu_id >= 0: with torch.cuda.device(gpu_id): player.model.load_state_dict(input_model.state_dict()) else: player.model.load_state_dict(input_model.state_dict()) # initialize the player optimizer optimizer = None if args.optimizer == 'RMSprop': optimizer = optim.RMSprop(player.model.dictForOptimizer(), lr=args.lr) if args.optimizer == 'Adam': optimizer = optim.Adam(player.model.dictForOptimizer(), lr=args.lr) else: optimizer = optim.SGD(player.model.dictForOptimizer(), lr=args.lr) # reset the environment and initialize the player state player.state = player.env.reset(args) player.state = torch.from_numpy(player.state).float() # If on GPU, do as GPU if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda() player.model = player.model.cuda() player.model.train() last_iter = 0 mean_buf = Buffer(5) # Start looping over episodes for iteration in range(max_iter): last_iter += iteration # reset cx and hx if the enlvironmnent is over. if player.done: if gpu_id >= 0: with torch.cuda.device(gpu_id): player.cx = Variable(torch.zeros(1, 128).cuda()) player.hx = Variable(torch.zeros(1, 128).cuda()) else: player.cx = Variable(torch.zeros(1, 128)) player.hx = Variable(torch.zeros(1, 128)) else: player.cx = Variable(player.cx.data) player.hx = Variable(player.hx.data) # Roll out actions and collect reward for one episode for step in range(args.num_steps): player.action_train() if player.done: break if player.done: player.eps_len = 0 # reset state state = player.env.reset(args) player.state = torch.from_numpy(state).float() if gpu_id >= 0: with torch.cuda.device(gpu_id): player.state = player.state.cuda() if gpu_id >= 0: with torch.cuda.device(gpu_id): R = torch.zeros(1, 1).cuda() else: R = torch.zeros(1, 1) if not player.done: state = player.state if args.model == 'CONV': state = state.unsqueeze(0) value, _, _, _ = player.model( (Variable(state), (player.hx, player.cx))) R = value.data player.values.append(Variable(R)) policy_loss = 0 value_loss = 0 R = Variable(R) if gpu_id >= 0: with torch.cuda.device(gpu_id): gae = torch.zeros(1, 1).cuda() else: gae = torch.zeros(1, 1) for i in reversed(range(len(player.rewards))): R = args.gamma * R + player.rewards[i] advantage = R - player.values[i] value_loss = value_loss + 0.5 * advantage.pow(2) # Generalized Advantage Estimataion delta_t = player.rewards[i] + args.gamma * \ player.values[i + 1].data - player.values[i].data gae = gae * args.gamma * args.tau + delta_t policy_loss = policy_loss - \ (player.log_probs[i].sum() * Variable(gae)) - \ (0.01 * player.entropies[i].sum()) player.model.zero_grad() (policy_loss + 0.5 * value_loss).backward() optimizer.step() player.clear_actions() if step_test > 0 and iteration % step_test == 0: tester = Tester(args, player.model) score = tester.test(last_iter) mean_buf.push(score) recent_mean = sum(mean_buf.bf) / mean_buf.current_size text = "Iteration {0}, episode reward {1}, recent reward mean {2}".format( iteration, score, recent_mean) log.info(text) tester = Tester(args, player.model) fitness = tester.test(last_iter) return fitness
parser.add_argument('--amsgrad', default=True, metavar='AM', help='Adam optimizer amsgrad parameter') # Based on # https://github.com/pytorch/examples/tree/master/mnist_hogwild # Training settings # Implemented multiprocessing using locks but was not beneficial. Hogwild # training was far superior if __name__ == '__main__': args = parser.parse_args() torch.manual_seed(args.seed) env = create_env(args.env) shared_model = A3C_MLP(env.observation_space.shape[0], env.action_space) if args.load: saved_state = torch.load('{0}{1}.dat'.format(args.load_model_dir, args.env), map_location=lambda storage, loc: storage) shared_model.load_state_dict(saved_state) shared_model.share_memory() if args.shared_optimizer: optimizer = SharedAdam(shared_model.parameters(), lr=args.lr, amsgrad=args.amsgrad) optimizer.share_memory() else: optimizer = None
setup_logger('{}_mon_log'.format(args.env), r'{0}{1}_mon_log'.format(args.log_dir, args.env)) log['{}_mon_log'.format(args.env)] = logging.getLogger('{}_mon_log'.format( args.env)) torch.manual_seed(args.seed) d_args = vars(args) for k in d_args.keys(): log['{}_mon_log'.format(args.env)].info('{0}: {1}'.format(k, d_args[k])) env = create_env("{}".format(args.env)) num_tests = 0 reward_total_sum = 0 player = Agent(None, env, args, None) player.model = A3C_MLP(env.observation_space.shape[0], env.action_space) if args.new_gym_eval: player.env = gym.wrappers.Monitor(player.env, "{}_monitor".format(args.env), force=True) player.model.load_state_dict(saved_state) player.model.eval() best_reward = 0 best_speed = 0 for i_episode in range(args.num_episodes): player.state = player.env.reset() player.state = torch.from_numpy(player.state).float() player.eps_len = 0
def train(rank, args, shared_model, optimizer): # optimizer为shared_model的 init = True ptitle('Training Agent: {}'.format(rank)) torch.manual_seed(args.seed + rank) env = create_env(args.env, args.seed + rank) # env = gym.make(args.env) # env.seed(args.seed + rank) if optimizer is None: if args.optimizer == 'RMSprop': optimizer = optim.RMSprop(shared_model.parameters(), lr=args.lr) if args.optimizer == 'Adam': optimizer = optim.Adam(shared_model.parameters(), lr=args.lr) player = Agent(None, env, args, None, rank) player.model = A3C_MLP(player.env.observation_space, player.env.action_space, args.stack_frames) player.state = player.env.reset() player.state = torch.from_numpy(player.state).float() player.model.train() #固定使用场景为train if rank == 1: # file = open(os.path.join(args.log_dir, 'TD_Error.txt'), 'w+') writer = SummaryWriter('8_27_train') local_step_counter = 0 while True: if init: # 初始化 shared_model.training_steps.weight.data \ .copy_(torch.Tensor([0])) shared_model.training_steps.bias.data \ .copy_(torch.Tensor([0])) init = False player.model.load_state_dict( shared_model.state_dict()) # synchronize parameters for step in range(args.num_steps): # print("thread", rank, local_step_counter, shared_model.training_steps.weight.data.cpu().numpy()) local_step_counter += 1 # update step counters shared_model.training_steps.weight.data \ .copy_(torch.Tensor([1]) + shared_model.training_steps.weight.data) # 总步骤(各个worker所走步数之和)T每次加一 player.action_train() # core if player.done: break terminal = False if player.done or player.eps_len >= args.max_episode_length: # 玩家完成或者超出最大迭代次数 terminal = True shared_model.done_nums += 1 if 'is_success' in player.info.keys(): shared_model.success_num += 1 R = torch.zeros(1) if not player.done: # 结算 state = player.state # A3C,value和policy net是用的同一个网络 value, _, _ = player.model(Variable(state)) R = value.data if terminal: #重置 shared_model.training_steps.bias.data \ .copy_(torch.Tensor([1]) + shared_model.training_steps.bias.data) # 总步数加一 player.eps_len = 0 state = player.env.reset() player.state = torch.from_numpy(state).float() player.reset_flag = True player.values.append(Variable(R)) policy_loss = 0 value_loss = 0 R = Variable(R) gae = torch.zeros(1, 1) for i in reversed(range(len(player.rewards))): R = args.gamma * R + np.float(player.rewards[i]) # reward advantage = R - player.values[i] # advantage value_loss = value_loss + 0.5 * advantage.pow(2) # 公式(10) 更新w if rank == 1: # file.write(str(advantage.pow(2).data.cpu().numpy()[0])) # file.write(' ') # file.write( # str(int(shared_model.training_steps.weight.data.cpu().numpy()[0]))) # file.write('\n') writer.add_scalar( 'TD-error/train', advantage.pow(2).data.cpu().numpy()[0], shared_model.training_steps.weight.data.cpu().numpy()[0]) player.values[i] = player.values[i].float() player.values[i + 1] = player.values[i + 1].float() delta_t = player.rewards[i] + args.gamma * \ player.values[i + 1].data - \ player.values[i].data # a2c计算td-error # GAE算法 gae = gae * args.gamma * args.tau + delta_t policy_loss = policy_loss - \ (player.log_probs[i].sum() * Variable(gae)) - \ (0.01 * player.entropies[i].sum()) # 更新theta 公式(9) """ 每个线程和环境交互到一定量的数据后,就计算在自己线程里的神经网络损失函数的梯度, 但是这些梯度却并不更新自己线程里的神经网络,而是去更新公共的神经网络。 也就是n个线程会独立的使用累积的梯度分别更新公共部分的神经网络模型参数。 每隔一段时间,线程会将自己的神经网络的参数更新为公共神经网络的参数,进而指导后面的环境交互。 """ player.model.zero_grad() # policy_loss + 0.5 * value_loss即为loss if rank == 1: writer.add_scalar( 'VLoss/train', value_loss, shared_model.training_steps.weight.data.cpu().numpy()[0]) writer.add_scalar( 'PLoss/train', policy_loss, shared_model.training_steps.weight.data.cpu().numpy()[0]) (policy_loss + 0.5 * value_loss).backward() # 计算该worder的损失函数梯度 ensure_shared_grads(player.model, shared_model) # 该worker将自己的参数传给公用的模型 optimizer.step( ) # optimizer为shared_model的 step()将参数更新值施加到shared_model的parameters 上 player.clear_actions() if shared_model.training_steps.weight.data.cpu().numpy( ) > args.training_steps: print('num of success={0},training episodes={1},success_rate={2}'. format(shared_model.success_num, shared_model.done_nums, shared_model.success_num / shared_model.done_nums)) break