Beispiel #1
0
 def build_network(self):
     """
      Builds Network Based on using attention and target task
     """
     self.model = FFN()
     self.lr = 1e-3
     self.batch_size = 256
     self.optimizer = tf.keras.optimizers.Adam(self.lr)
     self.model_is_built = True
     return
Beispiel #2
0
    def __init__(self, env, model, dqn_config, action_list):
        self.env = env
        self.action_list = action_list
        self.action_space_size = len(action_list)

        self.model = model
        self.old_model = FFN(self.model.model_config)
        self.old_model.load_state_dict(self.model.state_dict())

        self.gamma = dqn_config.gamma
        self.epsilon_func = DecayEpsilon(
            max_epsilon=dqn_config.max_epsilon,
            min_epsilon=dqn_config.min_epsilon,
            decay_episodes=dqn_config.decay_episodes)
        self.target_update_episodes = dqn_config.target_update_episodes

        self.replay_memory = ReplayMemory(dqn_config.max_memory_size)
        self.replay_memory_warmup = dqn_config.replay_memory_warmup

        # training config

        self.device = torch.device(
            dqn_config.device) if torch.cuda.is_available() else torch.device(
                'cpu')
        self.model.to(self.device)
        self.old_model.to(self.device)
        self.max_episodes = dqn_config.max_episodes
        self.batch_size = dqn_config.batch_size
        self.optimizer = Adam(self.model.parameters(),
                              lr=dqn_config.lr,
                              weight_decay=0.01)
        self.criterion = nn.MSELoss()
        self.save_dir = dqn_config.save_dir
        if not os.path.exists(self.save_dir):
            os.makedirs(self.save_dir)
        self.save_interval = dqn_config.save_interval
        self.update_nums = dqn_config.update_nums
        self.eval_interval = dqn_config.eval_interval

        # log
        self.writer = SummaryWriter(dqn_config.log_dir)
        if not os.path.exists(dqn_config.log_dir):
            os.makedirs(dqn_config.log_dir)
Beispiel #3
0
def main():
    a2c_config = A2CConfig()
    set_seed(a2c_config.seed)

    # initialize environment
    env = football_env.create_environment(
        env_name=a2c_config.env_name,
        representation="simple115",
        number_of_left_players_agent_controls=1,
        stacked=False,
        logdir="/tmp/football",
        write_goal_dumps=False,
        write_full_episode_dumps=False,
        render=False)

    # state and action space
    state_space_size = env.observation_space.shape[
        0]  # we are using simple115 representation
    if a2c_config.forbid_actions:
        action_list = [0, 1, 2, 3, 4, 5, 6, 7, 8, 12, 13,
                       15]  # forbid some actions
        action_space_size = len(action_list)
    else:
        action_list = list(range(env.action_space.n))  # default action space
        action_space_size = len(action_list)

    # initialize model
    model_config = FFNModelConfig(state_space_size=state_space_size,
                                  action_space_size=action_space_size)
    model = FFN(model_config)

    # TODO multiprocessing env
    a2c = A2C(env=env,
              model=model,
              a2c_config=a2c_config,
              action_list=action_list)
    a2c.learn()
Beispiel #4
0
print('vocab size:', word_char_embedding.size)
print('word embedding shape:', word_char_embedding.shape[0],
      word_char_embedding.shape[1])

pos2id = get_pos2id()
act2id = get_act2id()
pos_vocab_size = len(pos2id)
output_class = len(act2id)
meta_data = {'act2id': act2id, 'output_class': output_class}
print('meta_data', meta_data)

######################## building models #########################

print(f'{dt.now()} Building model')
hidden_size = 2048
model = FFN(word_char_embedding, hidden_size, output_class,
            dropout_rate=0.4).cuda()
model.load_state_dict(torch.load(args.model_path))


def NERdecoder(sentence, debug=True):
    #input sentence as String
    NERresult = transition_forIII(model=model,
                                  input_is=sentence.split(),
                                  debug=True)
    NERresult_json = [{
        "position": e[0],
        "named_entity": e[1],
        "entity_type": e[2],
        "probability": e[3]
    } for e in NERresult]
    return NERresult_json
Beispiel #5
0
def quantization():
    if args.quant_mode != 'test' and args.deploy:
        args.deploy = False
        warnings.warn(
            'Exporting xmodel needs to be done in quantization test mode, turn off it in this running!',
            UserWarning)

    if args.quant_mode == 'test' and (args.batch_size != 1
                                      or args.subset_len != 1):
        warnings.warn(
            'Exporting xmodel needs batch size to be 1 and only 1 iteration of inference, they\'ll be changed automatically!',
            UserWarning)
        args.batch_size = 1
        args.subset_len = 1

    p = Path(args.checkpoint_dir) / args.model_name
    model = FFN(args.input_size)
    model = preprocessors.load_from_state_dict(model, p)

    if args.quant_mode == 'float':
        quant_model = deepcopy(model)
    else:
        rand_input = torch.randn([args.batch_size, args.input_size])
        quantizer = torch_quantizer(args.quant_mode,
                                    module=deepcopy(model),
                                    input_args=rand_input,
                                    bitwidth=8,
                                    mix_bit=False,
                                    qat_proc=False,
                                    device=set_seed.DEVICE)

        quant_model = quantizer.quant_model

    if args.fast_finetune:
        ft_loader = preprocessors.make_dataloader(data_dir=args.data_dir,
                                                  data_file=args.calib_data,
                                                  subset_len=args.subset_len)
        if args.quant_mode == 'calib':
            loss_fn = MSE().to(set_seed.DEVICE)
            quantizer.fast_finetune(eval_loss,
                                    (quant_model, ft_loader, loss_fn))
        elif args.quant_mode == 'test':
            quantizer.load_ft_param()

    if args.evaluate:
        valid_loader = preprocessors.make_dataloader(
            data_dir=args.data_dir,
            data_file=args.calib_data,
            batch_size=args.batch_size)
        cr1 = CustomRunner(model=model,
                           device=set_seed.DEVICE,
                           input_key='features',
                           input_target_key='targets',
                           evaluate=True,
                           loaders={'test': valid_loader})
        print('Evaluation completed!')
        print('Initial model results:')
        pprint.pprint(cr1.logs, width=5)

        if args.quant_mode != 'float':
            cr2 = CustomRunner(model=quant_model,
                               device=set_seed.DEVICE,
                               input_key='features',
                               input_target_key='targets',
                               evaluate=True,
                               loaders={'test': valid_loader})
            print('Quantized model results:')
            pprint.pprint(cr2.logs, width=5)

    if args.quant_mode == 'calib':
        quantizer.export_quant_config()
    if args.deploy:
        quantizer.export_xmodel(deploy_check=True)
Beispiel #6
0
                    default=1,
                    type=int,
                    help='number of training epochs')

args, _ = parser.parse_known_args()

try:
    os.mkdir(args.checkpoint_dir)
except:
    pass

optimizer = None
criterion = None
p = Path(args.checkpoint_dir)

model = FFN(args.input_size)
if args.model_name:
    model = preprocessors.load_from_state_dict(model, p / args.model_name)

if args.train:
    print(f'Running model in train mode for {args.n_epochs} epochs...')
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    criterion = losses.MSE()
    loaders = preprocessors.make_dataloaders(args.data_dir,
                                             data_ext='.npy',
                                             batch_size=args.batch_size)

if args.evaluate and not args.train:
    print('Running model evaluation...')
    loaders = {
        'test':
Beispiel #7
0
class DNNNetwork:
    """
        A DNN Network capable of solving the respective tasks.
    """
    def __init__(self, task, experiment_name):
        if task.lower() not in ['regression', 'classification']:
            raise ValueError(
                "Task must be one of the following: regression, classification"
            )
        self.task = task.lower()
        self.epochs = 100
        self.model = None
        self.model_is_built = False
        self.experiment_name = experiment_name
        return

    def build_network(self):
        """
         Builds Network Based on using attention and target task
        """
        self.model = FFN()
        self.lr = 1e-3
        self.batch_size = 256
        self.optimizer = tf.keras.optimizers.Adam(self.lr)
        self.model_is_built = True
        return

    def fit(self, x_data, y_data, x_data_val=None, y_data_val=None):
        """
        Trains the Network.
        :param x_data: Corrupted Input Data
        :param y_data: Input Label/Target
        :param x_data_val: Validation Input Data
        :param y_data_val: Validation Label/Target Data
        :return:
        """

        if self.model_is_built:
            x_dataset = tf.data.Dataset.from_tensor_slices(
                (x_data.astype('float32'), y_data.astype('float32').reshape(
                    -1, 1))).shuffle(600000).batch(self.batch_size)

            x_dataset_val = tf.data.Dataset.from_tensor_slices(
                (x_data_val.astype('float32'),
                 y_data_val.astype('float32').reshape(-1, 1))).batch(
                     x_data_val.shape[0])

            if self.task == 'regression':
                train_metric = tf.keras.metrics.MeanSquaredError(
                    name='train_loss')
                val_metric = tf.keras.metrics.MeanSquaredError(name='val_loss')
                train_loss = tf.keras.losses.MSE
            elif self.task == 'classification':
                train_metric = tf.keras.metrics.BinaryAccuracy(
                    name='train_loss')
                val_metric = tf.keras.metrics.BinaryAccuracy(name='val_loss')
                train_loss = wbce

            if self.task == 'regression':

                @tf.function
                def train_step(model, x, y):
                    with tf.GradientTape() as tape:
                        predictions = model(q=x)
                        loss = train_loss(y_true=y, y_pred=predictions)
                    gradients = tape.gradient(loss, model.trainable_variables)
                    self.optimizer.apply_gradients(
                        zip(gradients, self.model.trainable_variables))
                    train_metric(y, predictions)

                @tf.function
                def validation_step(model, x, y):
                    predictions = model(q=x)
                    val_metric(y, predictions)

                previous_val_loss = 100
                count = 0
                for epoch in range(self.epochs):
                    train_metric.reset_states()
                    val_metric.reset_states()

                    for (batch, (x, y)) in enumerate(x_dataset):
                        train_step(self.model, x, y)

                    for x, y in x_dataset_val:
                        validation_step(self.model, x, y)
                    print(
                        'Epoch {} Training Loss {:.4f} / Validation Loss {:.4f}'
                        .format(epoch + 1, train_metric.result(),
                                val_metric.result()))
                    if val_metric.result() > previous_val_loss:
                        count += 1
                    else:
                        previous_val_loss = val_metric.result()
                        self.model.save_weights(
                            '../training_checkpoints/best_model.tf')
                        count = 0
                    if count == 50:
                        self.model.load_weights(
                            '../training_checkpoints/best_model.tf')
                        break

                self.model.load_weights(
                    '../training_checkpoints/best_model.tf')
                predictions, _ = self.model(q=x_data_val.astype('float32'))
                result_loss_mse = tf.keras.metrics.MeanSquaredError()(
                    y_data_val, predictions).numpy()
                result_loss_mae = tf.keras.metrics.MeanAbsoluteError()(
                    y_data_val, predictions).numpy()

                result = (result_loss_mse, result_loss_mae)
            elif self.task == 'classification':
                print(f"Class 0 count: {y_data[y_data == 0].shape[0]}")
                print(f"Class 1 count: {y_data[y_data == 1].shape[0]}")
                class_weights = class_weight.compute_class_weight(
                    'balanced', np.unique(y_data), y_data)

                @tf.function
                def train_step(model, x, y):
                    with tf.GradientTape() as tape:
                        predictions = model(q=x, training=True)
                        loss = train_loss(y_true=y,
                                          y_pred=predictions,
                                          weight0=class_weights[0],
                                          weight1=class_weights[1])
                    gradients = tape.gradient(loss, model.trainable_variables)
                    self.optimizer.apply_gradients(
                        zip(gradients, self.model.trainable_variables))
                    train_metric(y, predictions)

                @tf.function
                def validation_step(model, x, y):
                    predictions = model(q=x, training=False)
                    val_metric(y, predictions)

                previous_val_loss = 0
                count = 0
                for epoch in range(self.epochs):
                    train_metric.reset_states()
                    val_metric.reset_states()

                    for (batch, (x, y)) in enumerate(x_dataset):
                        train_step(self.model, x, y)

                    for x, y in x_dataset_val:
                        validation_step(self.model, x, y)
                    print(
                        'Epoch {} Training ACC {:.4f} / Validation ACC {:.4f}'.
                        format(epoch + 1, train_metric.result(),
                               val_metric.result()))
                    if val_metric.result() < previous_val_loss:
                        count += 1
                    else:
                        previous_val_loss = val_metric.result()
                        self.model.save_weights(
                            '../training_checkpoints/best_model.tf')
                        count = 0
                    if count == 50:
                        self.model.load_weights(
                            '../training_checkpoints/best_model.tf')
                        break

                self.model.load_weights(
                    '../training_checkpoints/best_model.tf')
                predictions = self.model(q=x_data_val.astype('float32'),
                                         training=False)
                predictions = np.squeeze(predictions)
                result_acc = tf.keras.metrics.BinaryAccuracy()(
                    y_data_val, predictions).numpy()
                result_roc = tf.keras.metrics.AUC()(
                    y_true=y_data_val, y_pred=predictions).numpy()
                result = (result_acc, result_roc)
            self.__store_run_results(result=result)
            return
        else:
            raise ValueError("Build model first!")

    def __store_run_results(self, result):
        file = 'results/{}_results.csv'.format(self.experiment_name)
        if self.task == 'regression':
            metric_1_name = "Val_MSE"
            metric_2_name = "Val_MSE"
        else:
            metric_1_name = "Val_Acc"
            metric_2_name = "Val_ROC"

        if path.exists(file):
            data = pd.read_csv(file)
            new_row = pd.DataFrame([[result[0], result[1]]],
                                   columns=[metric_1_name, metric_2_name])
            data = data.append(new_row)
            data.to_csv(file, index=False)
        else:
            data = pd.DataFrame([[result[0], result[1]]],
                                columns=[metric_1_name, metric_2_name])
            data.to_csv(file, index=False)
Beispiel #8
0
class DQN:
    def __init__(self, env, model, dqn_config, action_list):
        self.env = env
        self.action_list = action_list
        self.action_space_size = len(action_list)

        self.model = model
        self.old_model = FFN(self.model.model_config)
        self.old_model.load_state_dict(self.model.state_dict())

        self.gamma = dqn_config.gamma
        self.epsilon_func = DecayEpsilon(
            max_epsilon=dqn_config.max_epsilon,
            min_epsilon=dqn_config.min_epsilon,
            decay_episodes=dqn_config.decay_episodes)
        self.target_update_episodes = dqn_config.target_update_episodes

        self.replay_memory = ReplayMemory(dqn_config.max_memory_size)
        self.replay_memory_warmup = dqn_config.replay_memory_warmup

        # training config

        self.device = torch.device(
            dqn_config.device) if torch.cuda.is_available() else torch.device(
                'cpu')
        self.model.to(self.device)
        self.old_model.to(self.device)
        self.max_episodes = dqn_config.max_episodes
        self.batch_size = dqn_config.batch_size
        self.optimizer = Adam(self.model.parameters(),
                              lr=dqn_config.lr,
                              weight_decay=0.01)
        self.criterion = nn.MSELoss()
        self.save_dir = dqn_config.save_dir
        if not os.path.exists(self.save_dir):
            os.makedirs(self.save_dir)
        self.save_interval = dqn_config.save_interval
        self.update_nums = dqn_config.update_nums
        self.eval_interval = dqn_config.eval_interval

        # log
        self.writer = SummaryWriter(dqn_config.log_dir)
        if not os.path.exists(dqn_config.log_dir):
            os.makedirs(dqn_config.log_dir)

    def select_action(self, state, epsilon=0):
        """
        with prob \epsilon select a random action, otherwise select action with max Q-value
        """
        if np.random.random_sample() < epsilon:
            action = np.random.randint(0, self.action_space_size)
        else:
            state_tensor = torch.tensor([state], device=self.device)
            logits = self.model(state_tensor)
            action = logits.argmax(-1).item()
        return action

    def update(self):
        batch_states, batch_actions, batch_rewards, batch_new_states = self.replay_memory.sample_mini_batch(
            self.batch_size)
        predict_logits = self.model(batch_states.to(
            self.device))  # bsz, action_space_size
        predict_values = predict_logits.gather(
            1,
            batch_actions.to(self.device).unsqueeze(1)).squeeze(1)

        with torch.no_grad():
            target_logits = self.old_model(batch_new_states.to(self.device))
            target_values = target_logits.max(1)[0] * self.gamma
            target_values += batch_rewards.to(self.device)

        loss = self.criterion(target_values, predict_values)
        loss.backward()
        self.optimizer.step()
        self.optimizer.zero_grad()
        return loss.item()

    def train(self):
        self.model.train()
        self.old_model.eval()
        # 所有 episodes 的平均
        mean_reward = 0
        mean_loss = 0
        mean_steps = 0  # 当前所有 episodes 平均 step 数

        print("collecting experience")
        state = self.env.reset()
        count = 1
        while len(self.replay_memory) < self.replay_memory_warmup:
            # 选择容易完成进球的动作来快速收集非 0 reward
            action = np.random.choice([4, 5, 6, 12])
            obs, rew, done, info = self.env.step(action)
            if rew == 1 or rew == -1:
                count += 1
                self.replay_memory.add_experience(state=state,
                                                  action=action,
                                                  reward=rew,
                                                  new_state=obs)
            state = obs
            if done:
                state = self.env.reset()

        print("training")

        tqdm_episodes = tqdm(range(1,
                                   self.max_episodes + 1))  # episode 从 1 开始计数
        for episode in tqdm_episodes:
            state = self.env.reset()
            done = False
            steps = 0  # 当前 episode 的 step 数

            while not done:
                epsilon = self.epsilon_func.get_epsilon(episode)
                action = self.select_action(state, epsilon)
                obs, rew, done, info = self.env.step(self.action_list[action])
                steps += 1
                self.replay_memory.add_experience(state=state,
                                                  action=action,
                                                  reward=rew,
                                                  new_state=obs)
                state = obs

            mean_reward = (mean_reward * (episode - 1) + rew) / episode
            mean_steps = (mean_steps * (episode - 1) + steps) / episode

            # update model parameters with adam
            loss = 0
            self.model.train()
            # 在一个 episode 结束后做 update_nums 次更新
            for _ in range(self.update_nums):
                loss += self.update()
            loss /= self.update_nums
            mean_loss = (mean_loss * (episode - 1) + loss) / episode
            tqdm_episodes.set_postfix({
                'episode': episode,
                'epsilon': epsilon,
                'mean_steps': mean_steps,
                'mean_reward': mean_reward,
                'mean_loss': mean_loss
            })
            self.writer.add_scalar('Train/epsilon', epsilon, episode)
            self.writer.add_scalar('Train/steps', steps, episode)
            self.writer.add_scalar('Train/mean_steps', mean_steps, episode)
            self.writer.add_scalar('Train/reward', rew, episode)
            self.writer.add_scalar('Train/mean_reward', mean_reward, episode)
            self.writer.add_scalar('Train/loss', loss, episode)
            self.writer.add_scalar('Train/mean_loss', mean_loss, episode)

            # reset old model
            if episode % self.target_update_episodes == 0:
                self.old_model.load_state_dict(self.model.state_dict())

            if episode % self.eval_interval == 0:
                self.eval(episode)

            # save checkpoint
            if episode % self.save_interval == 0:
                torch.save(
                    self.model.state_dict(),
                    os.path.join(self.save_dir, f"checkpoint{episode}.pt"))

    def eval(self, episode):
        """以当前模型的参数进行 100 个 episodes 的游戏,以 e-greedy 选择动作,计算 100 个 episodes 的平均
         reward 和 steps 作为当前训练 episode 的评估参考"""
        self.model.eval()
        mean_steps = 0
        mean_reward = 0
        epsilon = 0.05
        n_episodes = 100

        for idx in range(n_episodes):
            steps = 0
            state = self.env.reset()
            # 记录最后一个 episode 的动作序列
            if idx == n_episodes - 1:
                actions = []
            done = False
            while not done:
                action = self.select_action(state, epsilon=epsilon)
                if idx == n_episodes - 1:
                    actions.append(
                        football_action_set.named_action_from_action_set(
                            self.env.unwrapped._env._action_set,
                            self.action_list[action]))
                steps += 1
                obs, rew, done, info = self.env.step(self.action_list[action])
                # print(obs[94:97])
                state = obs
            # print(rew)
            mean_steps = (mean_steps * idx + steps) / (idx + 1)
            mean_reward = (mean_reward * idx + rew) / (idx + 1)

        print(
            f"\nepisode {episode}, mean steps {mean_steps}, mean reward {mean_reward}"
        )
        # 打印最后一个 episode 的状态序列
        print(f"last episode action sequence:")
        print(' '.join([f"{action_i}" for action_i in actions]))
        self.writer.add_scalar('Eval/mean_steps', mean_steps, episode)
        self.writer.add_scalar('Eval/mean_reward', mean_reward, episode)