def build_network(self): """ Builds Network Based on using attention and target task """ self.model = FFN() self.lr = 1e-3 self.batch_size = 256 self.optimizer = tf.keras.optimizers.Adam(self.lr) self.model_is_built = True return
def __init__(self, env, model, dqn_config, action_list): self.env = env self.action_list = action_list self.action_space_size = len(action_list) self.model = model self.old_model = FFN(self.model.model_config) self.old_model.load_state_dict(self.model.state_dict()) self.gamma = dqn_config.gamma self.epsilon_func = DecayEpsilon( max_epsilon=dqn_config.max_epsilon, min_epsilon=dqn_config.min_epsilon, decay_episodes=dqn_config.decay_episodes) self.target_update_episodes = dqn_config.target_update_episodes self.replay_memory = ReplayMemory(dqn_config.max_memory_size) self.replay_memory_warmup = dqn_config.replay_memory_warmup # training config self.device = torch.device( dqn_config.device) if torch.cuda.is_available() else torch.device( 'cpu') self.model.to(self.device) self.old_model.to(self.device) self.max_episodes = dqn_config.max_episodes self.batch_size = dqn_config.batch_size self.optimizer = Adam(self.model.parameters(), lr=dqn_config.lr, weight_decay=0.01) self.criterion = nn.MSELoss() self.save_dir = dqn_config.save_dir if not os.path.exists(self.save_dir): os.makedirs(self.save_dir) self.save_interval = dqn_config.save_interval self.update_nums = dqn_config.update_nums self.eval_interval = dqn_config.eval_interval # log self.writer = SummaryWriter(dqn_config.log_dir) if not os.path.exists(dqn_config.log_dir): os.makedirs(dqn_config.log_dir)
def main(): a2c_config = A2CConfig() set_seed(a2c_config.seed) # initialize environment env = football_env.create_environment( env_name=a2c_config.env_name, representation="simple115", number_of_left_players_agent_controls=1, stacked=False, logdir="/tmp/football", write_goal_dumps=False, write_full_episode_dumps=False, render=False) # state and action space state_space_size = env.observation_space.shape[ 0] # we are using simple115 representation if a2c_config.forbid_actions: action_list = [0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 13, 15] # forbid some actions action_space_size = len(action_list) else: action_list = list(range(env.action_space.n)) # default action space action_space_size = len(action_list) # initialize model model_config = FFNModelConfig(state_space_size=state_space_size, action_space_size=action_space_size) model = FFN(model_config) # TODO multiprocessing env a2c = A2C(env=env, model=model, a2c_config=a2c_config, action_list=action_list) a2c.learn()
print('vocab size:', word_char_embedding.size) print('word embedding shape:', word_char_embedding.shape[0], word_char_embedding.shape[1]) pos2id = get_pos2id() act2id = get_act2id() pos_vocab_size = len(pos2id) output_class = len(act2id) meta_data = {'act2id': act2id, 'output_class': output_class} print('meta_data', meta_data) ######################## building models ######################### print(f'{dt.now()} Building model') hidden_size = 2048 model = FFN(word_char_embedding, hidden_size, output_class, dropout_rate=0.4).cuda() model.load_state_dict(torch.load(args.model_path)) def NERdecoder(sentence, debug=True): #input sentence as String NERresult = transition_forIII(model=model, input_is=sentence.split(), debug=True) NERresult_json = [{ "position": e[0], "named_entity": e[1], "entity_type": e[2], "probability": e[3] } for e in NERresult] return NERresult_json
def quantization(): if args.quant_mode != 'test' and args.deploy: args.deploy = False warnings.warn( 'Exporting xmodel needs to be done in quantization test mode, turn off it in this running!', UserWarning) if args.quant_mode == 'test' and (args.batch_size != 1 or args.subset_len != 1): warnings.warn( 'Exporting xmodel needs batch size to be 1 and only 1 iteration of inference, they\'ll be changed automatically!', UserWarning) args.batch_size = 1 args.subset_len = 1 p = Path(args.checkpoint_dir) / args.model_name model = FFN(args.input_size) model = preprocessors.load_from_state_dict(model, p) if args.quant_mode == 'float': quant_model = deepcopy(model) else: rand_input = torch.randn([args.batch_size, args.input_size]) quantizer = torch_quantizer(args.quant_mode, module=deepcopy(model), input_args=rand_input, bitwidth=8, mix_bit=False, qat_proc=False, device=set_seed.DEVICE) quant_model = quantizer.quant_model if args.fast_finetune: ft_loader = preprocessors.make_dataloader(data_dir=args.data_dir, data_file=args.calib_data, subset_len=args.subset_len) if args.quant_mode == 'calib': loss_fn = MSE().to(set_seed.DEVICE) quantizer.fast_finetune(eval_loss, (quant_model, ft_loader, loss_fn)) elif args.quant_mode == 'test': quantizer.load_ft_param() if args.evaluate: valid_loader = preprocessors.make_dataloader( data_dir=args.data_dir, data_file=args.calib_data, batch_size=args.batch_size) cr1 = CustomRunner(model=model, device=set_seed.DEVICE, input_key='features', input_target_key='targets', evaluate=True, loaders={'test': valid_loader}) print('Evaluation completed!') print('Initial model results:') pprint.pprint(cr1.logs, width=5) if args.quant_mode != 'float': cr2 = CustomRunner(model=quant_model, device=set_seed.DEVICE, input_key='features', input_target_key='targets', evaluate=True, loaders={'test': valid_loader}) print('Quantized model results:') pprint.pprint(cr2.logs, width=5) if args.quant_mode == 'calib': quantizer.export_quant_config() if args.deploy: quantizer.export_xmodel(deploy_check=True)
default=1, type=int, help='number of training epochs') args, _ = parser.parse_known_args() try: os.mkdir(args.checkpoint_dir) except: pass optimizer = None criterion = None p = Path(args.checkpoint_dir) model = FFN(args.input_size) if args.model_name: model = preprocessors.load_from_state_dict(model, p / args.model_name) if args.train: print(f'Running model in train mode for {args.n_epochs} epochs...') optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) criterion = losses.MSE() loaders = preprocessors.make_dataloaders(args.data_dir, data_ext='.npy', batch_size=args.batch_size) if args.evaluate and not args.train: print('Running model evaluation...') loaders = { 'test':
class DNNNetwork: """ A DNN Network capable of solving the respective tasks. """ def __init__(self, task, experiment_name): if task.lower() not in ['regression', 'classification']: raise ValueError( "Task must be one of the following: regression, classification" ) self.task = task.lower() self.epochs = 100 self.model = None self.model_is_built = False self.experiment_name = experiment_name return def build_network(self): """ Builds Network Based on using attention and target task """ self.model = FFN() self.lr = 1e-3 self.batch_size = 256 self.optimizer = tf.keras.optimizers.Adam(self.lr) self.model_is_built = True return def fit(self, x_data, y_data, x_data_val=None, y_data_val=None): """ Trains the Network. :param x_data: Corrupted Input Data :param y_data: Input Label/Target :param x_data_val: Validation Input Data :param y_data_val: Validation Label/Target Data :return: """ if self.model_is_built: x_dataset = tf.data.Dataset.from_tensor_slices( (x_data.astype('float32'), y_data.astype('float32').reshape( -1, 1))).shuffle(600000).batch(self.batch_size) x_dataset_val = tf.data.Dataset.from_tensor_slices( (x_data_val.astype('float32'), y_data_val.astype('float32').reshape(-1, 1))).batch( x_data_val.shape[0]) if self.task == 'regression': train_metric = tf.keras.metrics.MeanSquaredError( name='train_loss') val_metric = tf.keras.metrics.MeanSquaredError(name='val_loss') train_loss = tf.keras.losses.MSE elif self.task == 'classification': train_metric = tf.keras.metrics.BinaryAccuracy( name='train_loss') val_metric = tf.keras.metrics.BinaryAccuracy(name='val_loss') train_loss = wbce if self.task == 'regression': @tf.function def train_step(model, x, y): with tf.GradientTape() as tape: predictions = model(q=x) loss = train_loss(y_true=y, y_pred=predictions) gradients = tape.gradient(loss, model.trainable_variables) self.optimizer.apply_gradients( zip(gradients, self.model.trainable_variables)) train_metric(y, predictions) @tf.function def validation_step(model, x, y): predictions = model(q=x) val_metric(y, predictions) previous_val_loss = 100 count = 0 for epoch in range(self.epochs): train_metric.reset_states() val_metric.reset_states() for (batch, (x, y)) in enumerate(x_dataset): train_step(self.model, x, y) for x, y in x_dataset_val: validation_step(self.model, x, y) print( 'Epoch {} Training Loss {:.4f} / Validation Loss {:.4f}' .format(epoch + 1, train_metric.result(), val_metric.result())) if val_metric.result() > previous_val_loss: count += 1 else: previous_val_loss = val_metric.result() self.model.save_weights( '../training_checkpoints/best_model.tf') count = 0 if count == 50: self.model.load_weights( '../training_checkpoints/best_model.tf') break self.model.load_weights( '../training_checkpoints/best_model.tf') predictions, _ = self.model(q=x_data_val.astype('float32')) result_loss_mse = tf.keras.metrics.MeanSquaredError()( y_data_val, predictions).numpy() result_loss_mae = tf.keras.metrics.MeanAbsoluteError()( y_data_val, predictions).numpy() result = (result_loss_mse, result_loss_mae) elif self.task == 'classification': print(f"Class 0 count: {y_data[y_data == 0].shape[0]}") print(f"Class 1 count: {y_data[y_data == 1].shape[0]}") class_weights = class_weight.compute_class_weight( 'balanced', np.unique(y_data), y_data) @tf.function def train_step(model, x, y): with tf.GradientTape() as tape: predictions = model(q=x, training=True) loss = train_loss(y_true=y, y_pred=predictions, weight0=class_weights[0], weight1=class_weights[1]) gradients = tape.gradient(loss, model.trainable_variables) self.optimizer.apply_gradients( zip(gradients, self.model.trainable_variables)) train_metric(y, predictions) @tf.function def validation_step(model, x, y): predictions = model(q=x, training=False) val_metric(y, predictions) previous_val_loss = 0 count = 0 for epoch in range(self.epochs): train_metric.reset_states() val_metric.reset_states() for (batch, (x, y)) in enumerate(x_dataset): train_step(self.model, x, y) for x, y in x_dataset_val: validation_step(self.model, x, y) print( 'Epoch {} Training ACC {:.4f} / Validation ACC {:.4f}'. format(epoch + 1, train_metric.result(), val_metric.result())) if val_metric.result() < previous_val_loss: count += 1 else: previous_val_loss = val_metric.result() self.model.save_weights( '../training_checkpoints/best_model.tf') count = 0 if count == 50: self.model.load_weights( '../training_checkpoints/best_model.tf') break self.model.load_weights( '../training_checkpoints/best_model.tf') predictions = self.model(q=x_data_val.astype('float32'), training=False) predictions = np.squeeze(predictions) result_acc = tf.keras.metrics.BinaryAccuracy()( y_data_val, predictions).numpy() result_roc = tf.keras.metrics.AUC()( y_true=y_data_val, y_pred=predictions).numpy() result = (result_acc, result_roc) self.__store_run_results(result=result) return else: raise ValueError("Build model first!") def __store_run_results(self, result): file = 'results/{}_results.csv'.format(self.experiment_name) if self.task == 'regression': metric_1_name = "Val_MSE" metric_2_name = "Val_MSE" else: metric_1_name = "Val_Acc" metric_2_name = "Val_ROC" if path.exists(file): data = pd.read_csv(file) new_row = pd.DataFrame([[result[0], result[1]]], columns=[metric_1_name, metric_2_name]) data = data.append(new_row) data.to_csv(file, index=False) else: data = pd.DataFrame([[result[0], result[1]]], columns=[metric_1_name, metric_2_name]) data.to_csv(file, index=False)
class DQN: def __init__(self, env, model, dqn_config, action_list): self.env = env self.action_list = action_list self.action_space_size = len(action_list) self.model = model self.old_model = FFN(self.model.model_config) self.old_model.load_state_dict(self.model.state_dict()) self.gamma = dqn_config.gamma self.epsilon_func = DecayEpsilon( max_epsilon=dqn_config.max_epsilon, min_epsilon=dqn_config.min_epsilon, decay_episodes=dqn_config.decay_episodes) self.target_update_episodes = dqn_config.target_update_episodes self.replay_memory = ReplayMemory(dqn_config.max_memory_size) self.replay_memory_warmup = dqn_config.replay_memory_warmup # training config self.device = torch.device( dqn_config.device) if torch.cuda.is_available() else torch.device( 'cpu') self.model.to(self.device) self.old_model.to(self.device) self.max_episodes = dqn_config.max_episodes self.batch_size = dqn_config.batch_size self.optimizer = Adam(self.model.parameters(), lr=dqn_config.lr, weight_decay=0.01) self.criterion = nn.MSELoss() self.save_dir = dqn_config.save_dir if not os.path.exists(self.save_dir): os.makedirs(self.save_dir) self.save_interval = dqn_config.save_interval self.update_nums = dqn_config.update_nums self.eval_interval = dqn_config.eval_interval # log self.writer = SummaryWriter(dqn_config.log_dir) if not os.path.exists(dqn_config.log_dir): os.makedirs(dqn_config.log_dir) def select_action(self, state, epsilon=0): """ with prob \epsilon select a random action, otherwise select action with max Q-value """ if np.random.random_sample() < epsilon: action = np.random.randint(0, self.action_space_size) else: state_tensor = torch.tensor([state], device=self.device) logits = self.model(state_tensor) action = logits.argmax(-1).item() return action def update(self): batch_states, batch_actions, batch_rewards, batch_new_states = self.replay_memory.sample_mini_batch( self.batch_size) predict_logits = self.model(batch_states.to( self.device)) # bsz, action_space_size predict_values = predict_logits.gather( 1, batch_actions.to(self.device).unsqueeze(1)).squeeze(1) with torch.no_grad(): target_logits = self.old_model(batch_new_states.to(self.device)) target_values = target_logits.max(1)[0] * self.gamma target_values += batch_rewards.to(self.device) loss = self.criterion(target_values, predict_values) loss.backward() self.optimizer.step() self.optimizer.zero_grad() return loss.item() def train(self): self.model.train() self.old_model.eval() # 所有 episodes 的平均 mean_reward = 0 mean_loss = 0 mean_steps = 0 # 当前所有 episodes 平均 step 数 print("collecting experience") state = self.env.reset() count = 1 while len(self.replay_memory) < self.replay_memory_warmup: # 选择容易完成进球的动作来快速收集非 0 reward action = np.random.choice([4, 5, 6, 12]) obs, rew, done, info = self.env.step(action) if rew == 1 or rew == -1: count += 1 self.replay_memory.add_experience(state=state, action=action, reward=rew, new_state=obs) state = obs if done: state = self.env.reset() print("training") tqdm_episodes = tqdm(range(1, self.max_episodes + 1)) # episode 从 1 开始计数 for episode in tqdm_episodes: state = self.env.reset() done = False steps = 0 # 当前 episode 的 step 数 while not done: epsilon = self.epsilon_func.get_epsilon(episode) action = self.select_action(state, epsilon) obs, rew, done, info = self.env.step(self.action_list[action]) steps += 1 self.replay_memory.add_experience(state=state, action=action, reward=rew, new_state=obs) state = obs mean_reward = (mean_reward * (episode - 1) + rew) / episode mean_steps = (mean_steps * (episode - 1) + steps) / episode # update model parameters with adam loss = 0 self.model.train() # 在一个 episode 结束后做 update_nums 次更新 for _ in range(self.update_nums): loss += self.update() loss /= self.update_nums mean_loss = (mean_loss * (episode - 1) + loss) / episode tqdm_episodes.set_postfix({ 'episode': episode, 'epsilon': epsilon, 'mean_steps': mean_steps, 'mean_reward': mean_reward, 'mean_loss': mean_loss }) self.writer.add_scalar('Train/epsilon', epsilon, episode) self.writer.add_scalar('Train/steps', steps, episode) self.writer.add_scalar('Train/mean_steps', mean_steps, episode) self.writer.add_scalar('Train/reward', rew, episode) self.writer.add_scalar('Train/mean_reward', mean_reward, episode) self.writer.add_scalar('Train/loss', loss, episode) self.writer.add_scalar('Train/mean_loss', mean_loss, episode) # reset old model if episode % self.target_update_episodes == 0: self.old_model.load_state_dict(self.model.state_dict()) if episode % self.eval_interval == 0: self.eval(episode) # save checkpoint if episode % self.save_interval == 0: torch.save( self.model.state_dict(), os.path.join(self.save_dir, f"checkpoint{episode}.pt")) def eval(self, episode): """以当前模型的参数进行 100 个 episodes 的游戏,以 e-greedy 选择动作,计算 100 个 episodes 的平均 reward 和 steps 作为当前训练 episode 的评估参考""" self.model.eval() mean_steps = 0 mean_reward = 0 epsilon = 0.05 n_episodes = 100 for idx in range(n_episodes): steps = 0 state = self.env.reset() # 记录最后一个 episode 的动作序列 if idx == n_episodes - 1: actions = [] done = False while not done: action = self.select_action(state, epsilon=epsilon) if idx == n_episodes - 1: actions.append( football_action_set.named_action_from_action_set( self.env.unwrapped._env._action_set, self.action_list[action])) steps += 1 obs, rew, done, info = self.env.step(self.action_list[action]) # print(obs[94:97]) state = obs # print(rew) mean_steps = (mean_steps * idx + steps) / (idx + 1) mean_reward = (mean_reward * idx + rew) / (idx + 1) print( f"\nepisode {episode}, mean steps {mean_steps}, mean reward {mean_reward}" ) # 打印最后一个 episode 的状态序列 print(f"last episode action sequence:") print(' '.join([f"{action_i}" for action_i in actions])) self.writer.add_scalar('Eval/mean_steps', mean_steps, episode) self.writer.add_scalar('Eval/mean_reward', mean_reward, episode)