def train_A2C(start_time_tests = [31*24*3600, 304*24*3600], episode_length_test = 14*24*3600, load = False): '''Method to train (or load a pre-trained) A2C agent. Testing periods have to be introduced already here to not use these during training. Parameters ---------- start_time_tests : list of integers Time in seconds from the beginning of the year that will be used for testing. These periods should be excluded in the training process. By default the first day of February and the first day of November are used. episode_length_test : integer Number of seconds indicating the length of the testing periods. By default two weeks are reserved for testing. load : boolean Boolean indicating whether the algorithm is loaded (True) or needs to be trained (False) ''' excluding_periods = [] for start_time_test in start_time_tests: excluding_periods.append((start_time_test,start_time_test+episode_length_test)) # Summer period (from June 21st till September 22nd). # Excluded since no heating during this period (nothing to learn). excluding_periods.append((173*24*3600, 266*24*3600)) env = BoptestGymEnvRewardWeightCost(url = url, actions = ['oveHeaPumY_u'], observations = {'reaTZon_y':(280.,310.)}, random_start_time = True, excluding_periods = excluding_periods, max_episode_length = 1*24*3600, warmup_period = 3*3600, Ts = 900) env = NormalizedObservationWrapper(env) env = NormalizedActionWrapper(env) model = A2C('MlpPolicy', env, verbose=1, gamma=0.99, seed=seed, tensorboard_log=os.path.join('results')) if not load: model.learn(total_timesteps=int(1e5)) # Save the agent model = A2C.load(os.path.join(utilities.get_root_path(), 'examples', 'agents', 'a2c_bestest_hydronic_heatpump')) else: # Load the trained agent model = A2C.load(os.path.join(utilities.get_root_path(), 'examples', 'agents', 'a2c_bestest_hydronic_heatpump')) return env, model, start_time_tests
def evaluate(modelname, env): n_cores = 4 obs = env.reset() model = A2C.load(modelname) wr = 0 win = 0 total_health_diff = 0 loss = 0 episodes = 0 total_episodes = 100 while episodes < total_episodes: action, _states = model.predict(obs) obs, rewards, dones, info = env.step(action) #print(rewards[0]) time.sleep(.04) # print(rewards[0]) env.render(mode="human") for i in range(4): if dones[i] == True: if info[i]["p1_health"] < info[i]["p2_health"]: loss += 1 else: win += 1 total_health_diff += info[i]["p1_health"] - info[i]["p2_health"] wr = win / (win + loss) episodes += 1 return wr, total_health_diff / total_episodes
def _train(env_id, agent, model_params, total_steps, is_evaluation=False): if is_evaluation: # evaluate_policy() must only take one environment envs = SubprocVecEnv([make_env(env_id)]) else: envs = SubprocVecEnv([make_env(env_id) for _ in range(NUM_CPU)]) envs = VecNormalize( envs) # normalize the envs during training and evaluation # Load pretrained model during training. if not is_evaluation and os.path.exists(agent + '_' + env_id): if agent == 'ppo2': model = PPO2.load(agent + '_' + env_id) elif agent == 'a2c': model = A2C.load(agent + '_' + env_id) else: if agent == 'ppo2': model = PPO2(MlpLstmPolicy, envs, nminibatches=1, verbose=1, **model_params) elif agent == 'a2c': model = A2C(MlpLstmPolicy, envs, verbose=1, **model_params) model.learn(total_timesteps=total_steps) return envs, model
def main(mode="train"): env = gym.make("snakebot-v0") if mode == "train": model = ac(policy=MlpLnLstmPolicy, env=env, verbose=0, tensorboard_log="a2c_snakebot_tensorboard") model.learn(total_timesteps=2000, callback=callback) print("Saving model to snake_dqn.pkl...") model.save("snake_a2c.pkl") print("done.") del model # remove to demonstrate saving and loading if mode == "test": model = ac.load("snake_a2c.pkl") obs = env.reset() done = False env.set_done(5000) while not done: action, _states = model.predict(obs) obs, rewards, done, info = env.step(action) # env.render() print(obs)
def train_a2c(seed): """ test A2C on the uav_env(cartesian,discrete) :param seed: (int) random seed for A2C """ """ A2C(policy, env, gamma=0.99, n_steps=5, vf_coef=0.25, ent_coef=0.01, max_grad_norm=0.5, learning_rate=0.0007, alpha=0.99, epsilon=1e-05, lr_schedule='linear', verbose=0,tensorboard_log=None, _init_setup_model=True) """ algo = 'A2C' num_timesteps = 3000000 env = set_up_env(seed) global best_mean_reward, n_steps best_mean_reward, n_steps = -np.inf, 0 model = A2C(policy=MlpPolicy, env=env, gamma=0.99, n_steps=5, vf_coef=0.25, ent_coef=0.01, max_grad_norm=0.5, learning_rate=0.0007, alpha=0.99, epsilon=1e-05, lr_schedule='linear', verbose=0, tensorboard_log="./logs/{}/tensorboard/{}/".format(EXPERIMENT_NATURE, algo)) model.learn(total_timesteps=num_timesteps, callback=callback, seed=seed, log_interval=500, tb_log_name="seed_{}".format(seed)) model = A2C.load(log_dir + 'best_model.pkl') evaluation = evaluate_model(env, model, 100) os.makedirs('./logs/{}/csv/{}/'.format(EXPERIMENT_NATURE, algo), exist_ok=True) os.rename('/tmp/gym/monitor.csv', "./logs/{}/csv/{}/seed_{}.csv".format(EXPERIMENT_NATURE, algo, seed)) env.close() del model, env gc.collect() return evaluation
def main(): alg_input = input("Select algorithm (PPO2 or A2C only):") if alg_input != "PPO2" and alg_input != "A2C" and alg_input != "ppo2" and alg_input != "a2c": print("Not an option (PPO2 or A2C only) !") alg_input = input("Select algorithm (PPO2 or A2C only):") model_input = "trained_agents\\" + input( "Select model to test(input filename, eg. a2c_wf_2):") env = gym.make("WARFLEET-v0") # The algorithms require a vectorized environment to run env = DummyVecEnv([lambda: env]) log_dir = "./logs/" done = False stage_reward = 0 turns = 0 if alg_input == "PPO2" or alg_input == "ppo2": model = PPO2.load(model_input, env=env, tensorboard_log=log_dir) elif alg_input == "A2C" or alg_input == "a2c": model = A2C.load(model_input, env=env, tensorboard_log=log_dir) obs = env.reset() while not done: action, _states = model.predict(obs) obs, reward, done, info = env.step(action) stage_reward += reward turns = turns + 1 # env.render() print("Reward: {} /42".format(stage_reward)) print("Turns: {}".format(turns)) env.close()
def load_a2c(): loaded_model = A2C.load(save_dir + "/A2C_tutorial") print("loaded", loaded_model.predict(obs, deterministic=True)) print("load gamma=", loaded_model.gamma, ", n_steps=", loaded_model.n_steps) # 模型保存模型的超参数和网络参数, 但不保存环境 env. 在 load 模型后需要重新设置环境. loaded_model.set_env(DummyVecEnv([lambda: gym.make("Pendulum-v0")])) loaded_model.learn(8000)
def load_model(tickers): '''Load in the pretrained model from the trained models folder ''' # model = run_model(tickers,start="2020-01-01T09:30:00-04:00", end="2020-12-31T09:30:00-04:00") model = A2C.load( "trained_models/2021-03-22 18:25:09.528982/A2C_30k_dow_120.zip") return model
def train_agent_with_a2c(load=False): from stable_baselines.common.policies import MlpPolicy from stable_baselines.common.vec_env import SubprocVecEnv from stable_baselines import A2C # multiprocess environment n_cpu = 4 env = SubprocVecEnv([lambda: gym.make('F16GCAS-v0') for i in range(n_cpu)]) env = gym.make("F16GCAS-v0") class CustomPolicy(MlpPolicy): def __init__(self, *args, **kwargs): super(CustomPolicy, self).__init__(*args, **kwargs, layers=[128, 128]) if not load: model = A2C(env=env, verbose=1, policy=CustomPolicy) # model.learn(total_timesteps=1000000) ExpData = ExpertDataset("./lqr_export.npz") model.pretrain(ExpData, n_epochs=100) else: model = A2C.load(ROOT+"/trained_models/TDRL/f16/a2c/128_128", env=env) with model.graph.as_default(): for i in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='model/pi/'): print(i) return model
def NewPotential(current_window, algorithm='PPO'): # Determine the pretrained agent if algorithm == 'A2C': model = A2C.load("pretrained_A2C") elif algorithm == 'PPO': model = PPO2.load("pretrained_PPO") elif algorithm == 'ACKTR': model = ACKTR.load("pretrained_ACKTR") elif algorithm == 'ACER': model = ACER.load("pretrained_ACER") else: raise ValueError("%s is not a valid algorithm." % algorithm) if len(current_window) != model.observation_space.shape[0]: raise ValueError("%s is does not match the model's window size." % len(current_window)) action, _states = model.predict(current_window, deterministic=False) voltages = np.linspace(0, 1, num=model.action_space.n) if action >= 0 and action <= model.action_space.n - 1: voltage = voltages[action] else: raise ValueError( "Received invalid action={} which is not part of the action space". format(action)) return voltage
def test_save_callback(self): ''' Test that the model performance can be monitored and results can be checked and saved as the model improves. This test trains an agent for a short period of time, without loading a pre-trained model. Therefore, this test also checks that a RL from stable-baselines can be trained. ''' # Define logging directory. Monitoring data and agent model will be stored here log_dir = os.path.join(utilities.get_root_path(), 'examples', 'agents', 'monitored_A2C') # Perform a short training example with callback env, _, _ = run_save_callback.train_A2C_with_callback( log_dir=log_dir, tensorboard_log=None) # Load the trained agent model = A2C.load(os.path.join(log_dir, 'best_model')) # Test one step with the trained model obs = env.reset() df = pd.DataFrame([model.predict(obs)[0][0]], columns=['value']) df.index.name = 'keys' ref_filepath = os.path.join(utilities.get_root_path(), 'testing', 'references', 'save_callback.csv') self.compare_ref_values_df(df, ref_filepath) # Remove model to prove further testing shutil.rmtree(log_dir, ignore_errors=True)
def run_agent(envs, parameters): '''Train an agent.''' path = Path(parameters['path']) dummy_env = OptVecEnv(envs) set_global_seeds(parameters.setdefault('seed')) save_path = str(path / 'model.pkl') alg = parameters['alg'] if alg == 'PPO': with open(save_path, 'rb') as pkl: model = PPO2.load(pkl, env=dummy_env) elif alg == 'A2C': with open(save_path, 'rb') as pkl: model = A2C.load(pkl, env=dummy_env) try: done = False observations = dummy_env.reset() while not done: action = model.predict(observations) print(action[0].ravel().tolist()) observations, rewards, dones, infos = dummy_env.step(action[0]) done = any(dones) info = infos[0] yield info['weights'] finally: dummy_env.close()
def train(game, num_timesteps, num_envs, dir_name, model_name, prev_model_name): dir_name = get_valid_filename(dir_name) model_name = get_valid_filename(model_name) log_dir = f"logs/{dir_name}/{model_name}-training" model_dir = f"models/{dir_name}" os.makedirs(log_dir, exist_ok=True) os.makedirs(model_dir, exist_ok=True) env = make_vec_envs(game, False, num_envs) prev_model_path = f"{model_dir}/{prev_model_name}.zip" if prev_model_name is not None and os.path.exists(prev_model_path): model = A2C.load(prev_model_path, env=env) model.tensorboard_log = log_dir else: model = A2C(policy="MlpPolicy", env=env, gamma=0.8, n_steps=64, learning_rate=0.00025, verbose=1, tensorboard_log=log_dir) model.learn(num_timesteps) model.save(f"{model_dir}/{model_name}.zip") env.close()
def train_agent(train, pickle_file, agent_type, env_kwargs, parms): bin_path = "bin/" + pickle_file if (path.exists(bin_path)): if agent_type == "a2c": print("Loading A2C Agent") RL_model = A2C.load( bin_path, tensorboard_log=f"{config.TENSORBOARD_LOG_DIR}/{agent_type}") elif agent_type == "ddpg": print("Loading DDPG Agent") RL_model = DDPG.load( bin_path, tensorboard_log=f"{config.TENSORBOARD_LOG_DIR}/{agent_type}") elif agent_type == "ppo": print("Loading PPO2 Agent") RL_model = PPO2.load( bin_path, tensorboard_log=f"{config.TENSORBOARD_LOG_DIR}/{agent_type}") else: e_train_gym = ipenv.PortfolioAllocEnv(df=train, **env_kwargs) env_train, _ = e_train_gym.get_sb_env() agent = ipagent.IPRLAgent(env=env_train) model = agent.get_model(model_name=agent_type, model_kwargs=parms) RL_model = agent.train_model(model=model, tb_log_name=agent_type, total_timesteps=1000000) RL_model.save(bin_path) return RL_model
def test_variable_episode(self): ''' Test that a model can be trained using variable episode length. The method that is used to determine whether the episode is terminated or not is defined by the user. This test trains an agent for a short period of time, without loading a pre-trained model. Therefore, this test also checks that a RL from stable-baselines can be trained. This test also uses the save callback to check that the variable episode length is being effectively used. Notice that this test also checks that child classes can be nested since the example redefines the `compute_reward` and the `compute_done` methods. ''' # Define logging directory. Monitoring data and agent model will be stored here log_dir = os.path.join(utilities.get_root_path(), 'examples', 'agents', 'variable_episode_A2C') # Perform a short training example with callback env, _, _ = run_variable_episode.train_A2C_with_variable_episode( log_dir=log_dir, tensorboard_log=None) # Load the trained agent model = A2C.load(os.path.join(log_dir, 'best_model')) # Test one step with the trained model obs = env.reset() df = pd.DataFrame([model.predict(obs)[0][0]], columns=['value']) df.index.name = 'keys' ref_filepath = os.path.join(utilities.get_root_path(), 'testing', 'references', 'variable_episode_step.csv') self.compare_ref_values_df(df, ref_filepath) # Check variable lengths monitor = pd.read_csv(os.path.join(log_dir, 'monitor.csv'), index_col=None) monitor = monitor.iloc[1:] monitor.reset_index(inplace=True) monitor.columns = ['reward', 'episode_length', 'time'] # Time may vary from one computer to another monitor.drop(labels='time', axis=1, inplace=True) # Utilities require index to have time as index name (even this is not the case here) monitor.index.name = 'time' # Transform to numeric monitor = monitor.apply( lambda col: pd.to_numeric(col, errors='coerce')) # Check that we obtain always same monitoring parameters ref_filepath = os.path.join(utilities.get_root_path(), 'testing', 'references', 'variable_episode_monitoring.csv') self.compare_ref_timeseries_df(monitor, ref_filepath) # Remove model to prove further testing shutil.rmtree(log_dir, ignore_errors=True)
def __init__(self, env, mode="random", agent=''): super().__init__(env) self.mode = mode if self.mode == "agent": self.agent = A2C.load(agent) self.facings = [1, 1, 1, 1] # put into place to not overcomplicate the first turn. both players will just pass until that self.first_turn = 0 self.last_infos = {}
def load_agent(): model = A2C.load("a2c_agent.zip") obs = env.reset() for _ in range(10000): action, _states = model.predict(obs) obs, done, reward, info = env.step(action) if done: obs = env.reset() env.render() env.close()
def _setup(self): # Game parameters self.env = gym.make(self.ENV_NAME) self.env.play_type = PLAY_TYPE.MACHINE self.env.render_mode = 'human' self.env.MAX_TURNS = self.max_turns self.model = A2C.load(self.MODEL_FILENAME) self.env.reset() # Report success print('Created new environment {0} with GameID: {1}'.format(self.ENV_NAME, self.GAME_ID))
def loader(algo, env_name): if algo == 'dqn': return DQN.load("trained_agents/" + algo + "/" + env_name + ".pkl") elif algo == 'ppo2': return PPO2.load("trained_agents/" + algo + "/" + env_name + ".pkl") elif algo == 'a2c': return A2C.load("trained_agents/" + algo + "/" + env_name + ".pkl") elif algo == 'acer': return ACER.load("trained_agents/" + algo + "/" + env_name + ".pkl") elif algo == 'trpo': return TRPO.load("trained_agents/" + algo + "/" + env_name + ".pkl")
def run_illegal_move_training( exp_name,exp_path, basicdate, model_type='PPO2', n_eval_episodes=10, training_intervals=100, max_steps=10000, reward_margin=10, log_to_tb=False, pelican_agent_filepath=False): # set up logging if log_to_tb: writer = SummaryWriter(exp_path) tb_log_name = 'Illegal_move_prevention_training' else: writer = None tb_log_name = None if pelican_agent_filepath: logger.info('Loading agent from file: ' + pelican_agent_filepath) # env = plark_env_illegal_move.PlarkEnvIllegalMove( config_file_path='/Components/plark-game/plark_game/game_config/10x10/balanced.json') env = gym.make('plark-env-illegal-move-v0') if model_type.lower() == 'dqn': model = DQN.load(pelican_agent_filepath) model.set_env(env) elif model_type.lower() == 'ppo2': model = PPO2.load(pelican_agent_filepath) model.set_env(DummyVecEnv([lambda: env])) elif model_type.lower() == 'a2c': model = A2C.load(pelican_agent_filepath) model.set_env(env) elif model_type.lower() == 'acktr': model = ACKTR.load(pelican_agent_filepath) model.set_env(env) else: # Instantiate the env and model env = gym.make('plark-env-illegal-move-v0') model = PPO2('CnnPolicy', env) # Start training train_agent(exp_path,model,env,training_intervals,max_steps,model_type,basicdate,writer,tb_log_name,reward_margin) # Evaluate mean_reward, n_steps = evaluate_policy(model, env, n_eval_episodes=n_eval_episodes, deterministic=False, render=False, callback=None, reward_threshold=None, return_episode_rewards=False) logger.info('Evaluation finished') logger.info('Mean Reward is ' + str(mean_reward)) logger.info('Number of steps is ' + str(n_steps))
def read_model(model_type): if model_type == "A2C": model = A2C.load( "./model_saved/Selected/A2C_ModelMar-05-2021_0815/A2C_ModelMar-05-2021_0815" ) if model_type == "TD3": model = TD3.load( "./model_saved/Selected/TD3_ModelMar-05-2021_1442/TD3_ModelMar-05-2021_1442" ) return model
def load_model(config): model = None if config["algo_name"] == "TD3": model = TD3.load("agents/{}".format(args["test_agent_path"])) if config["algo_name"] == "A2C": model = A2C.load("agents/{}".format(args["test_agent_path"])) if config["algo_name"] == "SAC": model = SAC.load("agents/{}".format(args["test_agent_path"])) if config["algo_name"] == "PPO2": model = PPO2.load("agents/{}".format(args["test_agent_path"])) assert model is not None, "Alg name not found, cannot load model, exiting. " return model
def main(): model_dir = './models' model_name = model_dir + '/' + MODEL_NAME """ Generate & Check environment """ env_name = 'myenv-v2' env = gym.make(env_name) # env = gym.wrappers.Monitor(env, "./videos", force=True) # For video making """ Vectorize environment """ # Unnecessary to vectorize environment # env = DummyVecEnv([lambda: env]) """ Load model and set environment """ if ALGORITHM == 'ppo2': model = PPO2.load(model_name) elif ALGORITHM == 'a2c': model = A2C.load(model_name) else: raise Exception('Load error. Specify proper name') for idx in range(NUM_TRIALS): """ Initialization """ observation = env.reset() frames = [] """ Save some initial values """ fighter_0 = env.fighter.ingress jammer_0 = env.jammer.ingress while True: action_index, _ = model.predict(observation) # 環境を1step 実行 observation, reward, done, _ = env.step(action_index) # 環境の描画とビデオ録画 # shot = env.render(mode=args.render_mode) frames.append(env.render(mode=args.render_mode)) # Space keyでpause, デバッグ用 pause_for_debug() # Slow down rendering pygame.time.wait(10) # エピソードの終了処理 if done: status_print(env, observation, reward, done, fighter_0, jammer_0) video_name = ALGORITHM + '_' + env.mission_condition + '-' + str(idx) make_video(video_name, frames) make_jason(env, video_name, fighter_0, jammer_0, reward) break
def run_sonobuoy_training( exp_name,exp_path, basicdate, model_type='PPO2', n_eval_episodes=10, training_intervals=100, max_steps=10000, reward_margin=10, log_to_tb=False, pelican_agent_filepath=False): # set up logging if log_to_tb: writer = SummaryWriter(exp_path) tb_log_name = 'sonobuoy_training' else: writer = None tb_log_name = None env = gym.make('plark-env-v0', panther_agent_filepath='/data/agents/models/PPO2_20200429_073132_panther/') if pelican_agent_filepath: logger.info('Loading agent from file: ' + pelican_agent_filepath) if model_type.lower() == 'dqn': model = DQN.load(pelican_agent_filepath) model.set_env(env) elif model_type.lower() == 'ppo2': model = PPO2.load(pelican_agent_filepath) model.set_env(DummyVecEnv([lambda: env])) elif model_type.lower() == 'a2c': model = A2C.load(pelican_agent_filepath) model.set_env(env) elif model_type.lower() == 'acktr': model = ACKTR.load(pelican_agent_filepath) model.set_env(env) else: # Instantiate the env and model model = PPO2('CnnPolicy', env) # Start training train_agent(exp_path,model,env,training_intervals,max_steps,model_type,basicdate,writer,tb_log_name,reward_margin) # Evaluate mean_reward, n_steps = evaluate_policy(model, env, n_eval_episodes=n_eval_episodes, deterministic=False, render=False, callback=None, reward_threshold=None, return_episode_rewards=False) logger.info('Evaluation finished') logger.info('Mean Reward is ' + str(mean_reward)) logger.info('Number of steps is ' + str(n_steps))
def get_pretrained_agents(): """ Get the agents from the saved_agents/ directory :return: """ agents = [] dir_name = "saved_agents" for filename in os.listdir(dir_name): agent = A2C.load(dir_name + '/' + filename) weights = np.array([float(w) for w in filename.split('_')]) agents.append([weights, agent]) return agents
def run_baseline(params, LOAD_POLICY, VARIABLE_EVAL): # Evaluate the agent env = env_fun(animate=params["animate"], max_steps=params["max_steps"], action_input=False, latent_input=False, is_variable=VARIABLE_EVAL) policy = A2C('MlpPolicy', env) if LOAD_POLICY: policy_dir = "agents/xxx.zip" policy = A2C.load(policy_dir) # 2Q5 regressor = PyTorchMlpCst(env.obs_dim + env.act_dim, 24, env.obs_dim) return evaluate_model(params, env, policy, regressor)
def loadAgent(self, filepath, algorithm_type): try: if algorithm_type.lower() == 'dqn': self.model = DQN.load(filepath) elif algorithm_type.lower() == 'ppo2': self.model = PPO2.load(filepath) elif algorithm_type.lower() == 'a2c': self.model = A2C.load(filepath) elif algorithm_type.lower() == 'acktr': self.model = ACKTR.load(filepath) except: raise ValueError('Error loading pelican agent. File : "' + filepath + '" does not exsist')
def __init__(self, config: Dict[str, Any]) -> None: """ Initialize agent. Args: config (Dict[str, Any]): Agent configuration. """ from stable_baselines import A2C self.model = A2C.load(config["weights"]) self.state = None # Number of environments used to train model # to which stable-baselines input tensor size is fixed self.n_train_envs = self.model.n_envs
def load_model(env, model_dir, log_dir): model_name = model_dir + '/' + LOAD_MODEL_NAME + '.zip' print(f'----- model will be loaded from {model_name} \n') """ Load trained model, then continue training """ if ALGORITHM == 'ppo2': model = PPO2.load(model_name, verbose=0, tensorboard_log=log_dir) elif ALGORITHM == 'a2c': model = A2C.load(model_name, verbose=0, tensorboard_log=log_dir) else: raise Exception('Specify Algorithm') model.set_env(env) return model
def load_a2c_model(env, learning_rate, batch_size, algorithm): from stable_baselines.common.policies import MlpPolicy model = None existing_pickle_files = get_files_with_pattern(pickle_dir, 'ppo2_recent_model.pkl') for file_name in existing_pickle_files: search = re.search('ppo2_recent_model.pkl', file_name) if search: model = A2C.load(file_name, env=env, verbose=0, tensorboard_log=log_dir) logger.info("Loading existing pickle file for environment {} with algorithm {} and policy '{}'.".format(env, algorithm, model.policy)) return model logger.debug("No pickle was found for environment {}. Creating new model with algorithm {} and policy 'MlpPolicy'...".format(env, algorithm)) model = A2C(policy='MlpPolicy', env=env, verbose=0, tensorboard_log=log_dir, learning_rate=learning_rate, n_steps = batch_size) return model