def main():
    args = parse_args()
    make_output_dir(args)
    config_for_multi_gpu(args)
    set_seed(args)
    with Timer('load input'):
        train_data_loader, dev_data_loader, test_data_loader = load_data_for_nlu_task(
            args, train=True, dev=True, test=False)
    print(f'train batch size: {args.train_batch_size}')
    print(f'train data batch num: {len(train_data_loader)}')
    # 每个epoch做两次dev:
    args.eval_interval = len(train_data_loader) // 2
    print(f'eval interval: {args.eval_interval}')
    # 注意该参数影响学习率warm up
    args.max_train_steps = len(train_data_loader) * args.max_train_epochs
    print(f'max steps: {args.max_train_steps}')
    if not args.early_stop:
        print(
            f'do not use early stop, training will last {args.max_train_epochs} epochs'
        )

    with Timer('load trainer'):
        trainer = load_trainer(args)
    with Timer('Train'):
        trainer.train(train_data_loader, dev_data_loader)
 def train(self, train_data_loader, dev_data_loader=None):
     best_result = BestResult()
     self.model.zero_grad()
     set_seed(self.args)
     train_stop = False
     summary_writer = SummaryWriter(log_dir=self.args.summary_dir)
     global_step = 0
     for epoch in range(self.args.max_train_epochs + 1):
         epoch_train_loss = 0
         train_data_loader = tqdm(train_data_loader,
                                  desc=f'Training epoch {epoch}')
         for step, batch in enumerate(train_data_loader):
             batch = tuple(t.to(self.args.device) for t in batch)
             self.model.train()
             inputs, y_trues = self._unpack_batch(self.args, batch)
             logits = self.model(inputs)
             loss, _ = self._update_and_predict(logits,
                                                y_trues,
                                                calc_loss=True,
                                                update=True,
                                                calc_prediction=False)
             global_step += 1
             if loss is not None:
                 epoch_train_loss += loss
             if global_step % self.args.eval_interval == 0:
                 summary_writer.add_scalar('loss/train', loss, global_step)
                 if dev_data_loader:
                     f1, report = self.dev(dev_data_loader)
                     summary_writer.add_scalar('metrics/f1', f1,
                                               global_step)
                     if best_result.is_new_record(f1, global_step, epoch):
                         best_result.best_report = report
                         print(f"\n## NEW BEST RESULT in epoch {epoch} ##")
                         print(best_result)
             if self.args.early_stop and (epoch - best_result.best_epoch
                                          ) > self.args.early_stop_epoch:
                 print(f'\n## Early stop in epoch:{epoch} ##')
                 train_stop = True
                 break
         if train_stop:
             break
         summary_writer.add_scalar(
             'epoch_average_loss',
             epoch_train_loss / len(train_data_loader), epoch)
     with open(self.args.dev_result_path, 'w', encoding='utf-8') as f:
         f.write(str(best_result) + '\n')
     print("\n## BEST RESULT in Training ##")
     print(best_result)
     summary_writer.close()
     print('train stop')
Example #3
0
def main():
    with Timer('parse args'):
        args = parse_args()
    # 添加多卡运行下的配置参数
    # BERT训练须在多卡下运行,单卡非常慢
    config_for_multi_gpu(args)
    # set_seed 必须在设置n_gpu之后
    set_seed(args)

    if args.run_mode == 'train':
        train(args)
    elif args.run_mode == 'dev':
        dev(args)
    elif args.run_mode == 'inference':
        inference(args)
Example #4
0
    def __init__(self, args):
        self.args = args
        self.model = None
        self.optimizer = None
        self.scheduler = None
        self.epoch = 0

        # s = State(args)
        set_seed(self.args.seed, self.args.cudnn_behavoir)
        self.log = Log(self.args.log_path)
        self.writer = Tensorboard(self.args.tensorboard_path)
        self.stati  = Statistic(self.args.expernameid, self.args.experid_path, self.args.root_path)
        self.stati.add('hparam', self.args.dict())
        # s.writer.add_hparams(hparam_dict=s.args.dict(), metric_dict={})
        self.record = Record()
Example #5
0
    def run(self):
        """
        Run process.

        :return: None
        """
        # Set random seed for this process
        set_seed(self.seed)

        # Create eval agent
        eval_agent = copy.deepcopy(self.agent)

        eval_results = []

        while not self.stop_flag.is_set():
            # Wait for evaluation
            self.__wait_for_eval__()

            # Find out current step
            current_step = sum(self.workers_steps)

            # Copy current agent's state to eval agent
            eval_agent.model.load_state_dict(self.agent.model.state_dict())

            # Evaluate agent for given number of episodes
            result = self.__eval__(eval_agent, self.eval_episodes)

            # Store evaluation result
            eval_results.append(result)

            # Log evaluation result
            log_eval_result(current_step, result)

            # If termination condition passed given evaluation result, finish training
            if self.goal and self.goal(result):
                logger.info("")
                logger.info("Termination condition passed")
                logger.info("")
                self.stop_flag.set()

            # If workers reached total number of training steps, finish training
            if self.__workers_finished__():
                logger.info("")
                self.stop_flag.set()

        # Put result to the queue
        self.result_queue.put(RunResult([], eval_results))
Example #6
0
    def run(self):
        """
        Run process.

        :return: None
        """
        # Set random seed for this process
        set_seed(self.seed)

        # Initialize worker's current step
        self.workers_steps[self.worker.worker_id] = 0

        # Train until stop flag is set or number of training steps is reached
        while not self.stop_flag.is_set() and self.workers_steps[
                self.worker.worker_id] < self.train_steps:
            # Train worker for batch steps
            self.__train__(self.batch_steps)
Example #7
0
def main():
    with Timer('parse args'):
        args = parse_args()
    # 添加多卡运行下的配置参数
    # Setup CUDA, GPU & distributed training
    config_for_multi_gpu(args)
    # set_seed 必须在设置n_gpu之后
    set_seed(args)
    # 创建输出文件夹,保存运行结果,配置文件,模型参数
    if args.run_mode == 'train' and args.local_rank in [-1, 0]:
        make_output_dir(args)

    if args.run_mode == 'train':
        train(args)
    elif args.run_mode == 'dev':
        dev(args)
    elif args.run_mode == 'inference':
        inference(args)
Example #8
0
    def worker(self, env_fn_serialized, seed, remote, parent_remote):
        # Set random seed for this process
        set_seed(seed)

        # Close pipe
        parent_remote.close()

        # Create environment
        env = deserialize(env_fn_serialized)()

        while True:
            # Wait for data
            cmd, data = remote.recv()

            if cmd == 'state':
                # Return current state
                remote.send(env.state)
            elif cmd == 'step':
                # Perform action
                reward, next_state, done = env.step(data)

                # Reset environments if done flag is set
                if done:
                    env.reset()

                # Return observation
                remote.send((reward, next_state, done))
            elif cmd == 'reset':
                # Reset environment
                state = env.reset()

                remote.send(state)
            elif cmd == 'close':
                # Close pipe
                remote.close()

                break
            else:
                raise NotImplementedError
Example #9
0
 def setUp(self):
     set_seed(self.seed)
    def train(self,
              train_data_loader,
              dev_data_loader=None,
              dev_CoNLLU_file=None):
        self.optimizer, self.optim_scheduler = get_optimizer(
            self.args, self.model)
        global_step = 0
        best_result = BestResult()
        self.model.zero_grad()
        set_seed(
            self.args
        )  # Added here for reproductibility (even between python 2 and 3)
        train_stop = False
        summary_writer = SummaryWriter(log_dir=self.args.summary_dir)
        for epoch in range(1, self.args.max_train_epochs + 1):
            epoch_ave_loss = 0
            train_data_loader = tqdm(train_data_loader,
                                     desc=f'Training epoch {epoch}')
            # 某些模型在训练时可能需要一些定制化的操作,默认什么都不做
            # 具体参考子类中_custom_train_operations的实现
            self._custom_train_operations(epoch)
            for step, batch in enumerate(train_data_loader):
                batch = tuple(t.to(self.args.device) for t in batch)
                self.model.train()
                # debug_print(batch)
                # word_mask:以word为单位,1为真实输入,0为PAD
                inputs, word_mask, _, dep_ids = self._unpack_batch(
                    self.args, batch)
                # word_pad_mask:以word为单位,1为PAD,0为真实输入
                word_pad_mask = torch.eq(word_mask, 0)
                unlabeled_scores, labeled_scores = self.model(inputs)
                labeled_target = dep_ids
                unlabeled_target = labeled_target.ge(1).to(
                    unlabeled_scores.dtype)
                # Calc loss and update:
                loss, _ = self._update_and_predict(
                    unlabeled_scores,
                    labeled_scores,
                    unlabeled_target,
                    labeled_target,
                    word_pad_mask,
                    label_loss_ratio=self.model.label_loss_ratio
                    if not self.args.parallel_train else
                    self.model.module.label_loss_ratio,
                    calc_loss=True,
                    update=True,
                    calc_prediction=False)
                global_step += 1
                if loss is not None:
                    epoch_ave_loss += loss

                if global_step % self.args.eval_interval == 0:
                    summary_writer.add_scalar('loss/train', loss, global_step)
                    # 记录学习率
                    for i, param_group in enumerate(
                            self.optimizer.param_groups):
                        summary_writer.add_scalar(f'lr/group_{i}',
                                                  param_group['lr'],
                                                  global_step)
                    if dev_data_loader:
                        UAS, LAS = self.dev(dev_data_loader, dev_CoNLLU_file)
                        summary_writer.add_scalar('metrics/uas', UAS,
                                                  global_step)
                        summary_writer.add_scalar('metrics/las', LAS,
                                                  global_step)
                        if best_result.is_new_record(LAS=LAS,
                                                     UAS=UAS,
                                                     global_step=global_step):
                            self.logger.info(
                                f"\n## NEW BEST RESULT in epoch {epoch} ##")
                            self.logger.info('\n' + str(best_result))
                            # 保存最优模型:
                            if hasattr(self.model, 'module'):
                                # 多卡,torch.nn.DataParallel封装model
                                self.model.module.save_pretrained(
                                    self.args.output_model_dir)
                            else:
                                self.model.save_pretrained(
                                    self.args.output_model_dir)

                if self.args.early_stop and global_step - best_result.best_LAS_step > self.args.early_stop_steps:
                    self.logger.info(
                        f'\n## Early stop in step:{global_step} ##')
                    train_stop = True
                    break
            if train_stop:
                break
            # print(f'\n- Epoch {epoch} average loss : {epoch_ave_loss / len(train_data_loader)}')
            summary_writer.add_scalar('epoch_loss',
                                      epoch_ave_loss / len(train_data_loader),
                                      epoch)
        with open(self.args.dev_result_path, 'w', encoding='utf-8') as f:
            f.write(str(best_result) + '\n')
        self.logger.info("\n## BEST RESULT in Training ##")
        self.logger.info('\n' + str(best_result))
        summary_writer.close()
Example #11
0
def train(args):
    def _get_dataloader(datasubset,
                        tokenizer,
                        device,
                        args,
                        subset_classes=True):
        """
        Get specific dataloader.

        Args:
            datasubset ([type]): [description]
            tokenizer ([type]): [description]
            device ([type]): [description]
            args ([type]): [description]

        Returns:
            dataloader
        """

        if subset_classes:
            dataloader = StratifiedLoaderwClassesSubset(
                datasubset,
                k=args['k'],
                max_classes=args['max_classes'],
                max_batch_size=args['max_batch_size'],
                tokenizer=tokenizer,
                device=device,
                shuffle=True,
                verbose=False)
        else:
            dataloader = StratifiedLoader(
                datasubset,
                k=args['k'],
                max_batch_size=args['max_batch_size'],
                tokenizer=tokenizer,
                device=device,
                shuffle=True,
                verbose=False)

        return dataloader

    def _adapt_and_fit(support_labels,
                       support_input,
                       query_labels,
                       query_input,
                       loss_fn,
                       model_init,
                       args,
                       mode="train"):
        """
        Adapts the init model to a support set and computes loss on query set.

        Args:
            support_labels ([type]): [description]
            support_text ([type]): [description]
            query_labels ([type]): [description]
            query_text ([type]): [description]
            model_init ([type]): [description]
            args
            mode
        """

        #####################
        # Create model_task #
        #####################
        if (not args['dropout']) and mode == "train":
            for module in model_init.modules():
                if isinstance(module, nn.Dropout):
                    module.eval()
                else:
                    module.train()
        elif mode != "train":
            model_init.eval()
        else:
            model_init.train()

        model_task = deepcopy(model_init)

        for name, param in model_task.encoder.model.named_parameters():
            transformer_layer = re.search("(?:encoder\.layer\.)([0-9]+)", name)
            if transformer_layer and (int(transformer_layer.group(1)) >
                                      args['inner_nu']):
                param.requires_grad = True
            elif 'pooler' in name:
                param.requires_grad = False
            elif args['inner_nu'] < 0:
                param.requires_grad = True
            else:
                param.requires_grad = False

        model_task_optimizer = optim.SGD(model_task.parameters(),
                                         lr=args['inner_lr'])
        model_task.zero_grad()

        #######################
        # Generate prototypes #
        #######################

        labs = torch.sort(torch.unique(support_labels))[0]

        if (not args['kill_prototypes']):

            y = model_init(support_input)

            prototypes = torch.stack(
                [torch.mean(y[support_labels == c], dim=0) for c in labs])

            W_init = 2 * prototypes
            b_init = -torch.norm(prototypes, p=2, dim=1)**2

        else:

            W_init = torch.empty(
                (labs.size()[0],
                 model_init.out_dim)).to(model_task.get_device())
            nn.init.kaiming_normal_(W_init)

            b_init = torch.zeros((labs.size()[0])).to(model_task.get_device())

        W_task, b_task = W_init.detach(), b_init.detach()
        W_task.requires_grad, b_task.requires_grad = True, True

        #################
        # Adapt to data #
        #################
        for _ in range(args['n_inner']):

            y = model_task(support_input)
            logits = F.linear(y, W_task, b_task)

            inner_loss = loss_fn(logits, support_labels)

            W_task_grad, b_task_grad = torch.autograd.grad(inner_loss,\
                [W_task, b_task], retain_graph=True)

            inner_loss.backward()

            if args['clip_val'] > 0:
                torch.nn.utils.clip_grad_norm_(model_task.parameters(),
                                               args['clip_val'])

            model_task_optimizer.step()

            W_task = W_task - args['output_lr'] * W_task_grad
            b_task = b_task - args['output_lr'] * b_task_grad

            if args['print_inner_loss']:
                print(f"\tInner Loss: {inner_loss.detach().cpu().item()}")

        #########################
        # Validate on query set #
        #########################
        if mode == "train":
            for module in model_task.modules():
                if isinstance(module, nn.Dropout):
                    module.eval()

            W_task = W_init + (W_task - W_init).detach()
            b_task = b_init + (b_task - b_init).detach()

        y = model_task(query_input)
        logits = F.linear(y, W_task, b_task)

        outer_loss = loss_fn(logits, query_labels)

        if mode == "train":
            model_task_params = [
                param for param in model_task.parameters()
                if param.requires_grad
            ]
            model_task_grads = torch.autograd.grad(outer_loss,
                                                   model_task_params,
                                                   retain_graph=True)

            model_init_params = [
                param for param in model_init.parameters()
                if param.requires_grad
            ]

            model_init_grads = torch.autograd.grad(outer_loss,
                                                   model_init_params,
                                                   retain_graph=False,
                                                   allow_unused=True)

            model_init_grads = model_init_grads + model_task_grads

            for param, grad in zip(model_init_params, model_init_grads):
                if param.grad != None and grad != None:
                    param.grad += grad.detach()
                elif grad != None:
                    param.grad = grad.detach()
                else:
                    param.grad = None
        else:
            del model_task, W_task, b_task, W_task_grad, b_task_grad, W_init, b_init

        if outer_loss.detach().cpu().item() > 10:
            print(outer_loss.detach().cpu().item(),
                  inner_loss.detach().cpu().item())

        return logits.detach(), outer_loss.detach()

    #######################
    # Logging Directories #
    #######################
    log_dir = os.path.join(args['checkpoint_path'], args['version'])

    os.makedirs(log_dir, exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'tensorboard'), exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'checkpoint'), exist_ok=True)
    #print(f"Saving models and logs to {log_dir}")

    checkpoint_save_path = os.path.join(log_dir, 'checkpoint')

    with open(os.path.join(log_dir, 'checkpoint', 'hparams.pickle'),
              'wb') as file:
        pickle.dump(args, file)

    ##########################
    # Device, Logging, Timer #
    ##########################

    set_seed(args['seed'])

    timer = Timer()

    device = torch.device('cuda' if (
        torch.cuda.is_available() and args['gpu']) else 'cpu')

    # Build the tensorboard writer
    writer = SummaryWriter(os.path.join(log_dir, 'tensorboard'))

    ###################
    # Load in dataset #
    ###################
    print("Data Prep")
    dataset = meta_dataset(include=args['include'], verbose=True)
    dataset.prep(text_tokenizer=manual_tokenizer)
    print("")

    ####################
    # Init models etc. #
    ####################
    model_init = SeqTransformer(args)
    tokenizer = AutoTokenizer.from_pretrained(args['encoder_name'])

    tokenizer.add_special_tokens({'additional_special_tokens': specials()})
    model_init.encoder.model.resize_token_embeddings(len(tokenizer.vocab))

    if args['optimizer'] == "Adam":
        meta_optimizer = optim.Adam(model_init.parameters(),
                                    lr=args['meta_lr'])
    elif args['optimizer'] == "SGD":
        meta_optimizer = optim.SGD(model_init.parameters(), lr=args['meta_lr'])

    meta_scheduler = get_constant_schedule_with_warmup(meta_optimizer,
                                                       args['warmup_steps'])
    reduceOnPlateau = optim.lr_scheduler.ReduceLROnPlateau(
        meta_optimizer,
        mode='max',
        factor=args['lr_reduce_factor'],
        patience=args['patience'],
        verbose=True)

    model_init = model_init.to(device)

    loss_fn = nn.CrossEntropyLoss()

    #################
    # Training loop #
    #################

    best_overall_acc_s = 0.0

    for episode in range(1, args['max_episodes'] + 1):

        outer_loss_agg, acc_agg, f1_agg = 0.0, 0.0, 0.0
        outer_loss_s_agg, acc_s_agg, f1_s_agg = 0.0, 0.0, 0.0

        for ii in range(1, args['n_outer'] + 1):
            #################
            # Sample a task #
            #################
            task = dataset_sampler(dataset, sampling_method='sqrt')

            datasubset = dataset.datasets[task]['train']

            dataloader = _get_dataloader(datasubset,
                                         tokenizer,
                                         device,
                                         args,
                                         subset_classes=args['subset_classes'])

            support_labels, support_input, query_labels, query_input = next(
                dataloader)

            logits, outer_loss = _adapt_and_fit(support_labels,
                                                support_input,
                                                query_labels,
                                                query_input,
                                                loss_fn,
                                                model_init,
                                                args,
                                                mode="train")

            ######################
            # Inner Loop Logging #
            ######################
            with torch.no_grad():
                mets = logging_metrics(logits.detach().cpu(),
                                       query_labels.detach().cpu())
                outer_loss_ = outer_loss.detach().cpu().item()
                acc = mets['acc']
                f1 = mets['f1']

                outer_loss_s = outer_loss_ / np.log(dataloader.n_classes)
                acc_s = acc / (1 / dataloader.n_classes)
                f1_s = f1 / (1 / dataloader.n_classes)

                outer_loss_agg += outer_loss_ / args['n_outer']
                acc_agg += acc / args['n_outer']
                f1_agg += f1 / args['n_outer']

                outer_loss_s_agg += outer_loss_s / args['n_outer']
                acc_s_agg += acc_s / args['n_outer']
                f1_s_agg += f1_s / args['n_outer']

            print(
                "{:} | Train | Episode {:04}.{:02} | Task {:^20s}, N={:} | Loss {:5.2f}, Acc {:5.2f}, F1 {:5.2f} | Mem {:5.2f} GB"
                .format(
                    timer.dt(), episode, ii, task, dataloader.n_classes,
                    outer_loss_s if args['print_scaled'] else outer_loss_,
                    acc_s if args['print_scaled'] else acc,
                    f1_s if args['print_scaled'] else f1,
                    psutil.Process(os.getpid()).memory_info().rss / 1024**3))

            writer.add_scalars('Loss/Train', {task: outer_loss_}, episode)
            writer.add_scalars('Accuracy/Train', {task: acc}, episode)
            writer.add_scalars('F1/Train', {task: f1}, episode)

            writer.add_scalars('LossScaled/Train', {task: outer_loss_s},
                               episode)
            writer.add_scalars('AccuracyScaled/Train', {task: acc_s}, episode)
            writer.add_scalars('F1Scaled/Train', {task: f1_s}, episode)

            writer.flush()

        ############################
        # Init Model Backward Pass #
        ############################
        model_init_params = [
            param for param in model_init.parameters() if param.requires_grad
        ]
        #for param in model_init_params:
        #    param.grad = param.grad #/ args['n_outer']

        if args['clip_val'] > 0:
            torch.nn.utils.clip_grad_norm_(model_init_params, args['clip_val'])

        meta_optimizer.step()
        meta_scheduler.step()

        if args['warmup_steps'] <= episode + 1:
            meta_optimizer.zero_grad()

        #####################
        # Aggregate Logging #
        #####################
        print(
            "{:} | MACRO-AGG | Train | Episode {:04} | Loss {:5.2f}, Acc {:5.2f}, F1 {:5.2f}\n"
            .format(
                timer.dt(), episode,
                outer_loss_s_agg if args['print_scaled'] else outer_loss_agg,
                acc_s_agg if args['print_scaled'] else acc_agg,
                f1_s_agg if args['print_scaled'] else f1_agg))

        writer.add_scalar('Loss/MacroTrain', outer_loss_agg, episode)
        writer.add_scalar('Accuracy/MacroTrain', acc_agg, episode)
        writer.add_scalar('F1/MacroTrain', f1_agg, episode)

        writer.add_scalar('LossScaled/MacroTrain', outer_loss_s_agg, episode)
        writer.add_scalar('AccuracyScaled/MacroTrain', acc_s_agg, episode)
        writer.add_scalar('F1Scaled/MacroTrain', f1_s_agg, episode)

        writer.flush()

        ##############
        # Evaluation #
        ##############
        if (episode % args['eval_every_n']) == 0 or episode == 1:

            overall_loss, overall_acc, overall_f1 = [], [], []
            overall_loss_s, overall_acc_s, overall_f1_s = [], [], []
            ###################
            # Individual Task #
            ###################
            for task in dataset.lens.keys():
                datasubset = dataset.datasets[task]['validation']

                task_loss, task_acc, task_f1 = [], [], []
                task_loss_s, task_acc_s, task_f1_s = [], [], []
                for _ in range(args['n_eval_per_task']):

                    dataloader = _get_dataloader(
                        datasubset,
                        tokenizer,
                        device,
                        args,
                        subset_classes=args['subset_classes'])
                    support_labels, support_input, query_labels, query_input = next(
                        dataloader)

                    logits, loss = _adapt_and_fit(support_labels,
                                                  support_input,
                                                  query_labels,
                                                  query_input,
                                                  loss_fn,
                                                  model_init,
                                                  args,
                                                  mode="eval")

                    mets = logging_metrics(logits.detach().cpu(),
                                           query_labels.detach().cpu())

                    task_loss.append(loss.detach().cpu().item())
                    task_acc.append(mets['acc'])
                    task_f1.append(mets['f1'])

                    task_loss_s.append(loss.detach().cpu().item() /
                                       np.log(dataloader.n_classes))
                    task_acc_s.append(mets['acc'] / (1 / dataloader.n_classes))
                    task_f1_s.append(mets['f1'] / (1 / dataloader.n_classes))

                overall_loss.append(np.mean(task_loss))
                overall_acc.append(np.mean(task_acc))
                overall_f1.append(np.mean(task_f1))

                overall_loss_s.append(np.mean(task_loss_s))
                overall_acc_s.append(np.mean(task_acc_s))
                overall_f1_s.append(np.mean(task_f1_s))

                print(
                    "{:} | Eval  | Episode {:04} | Task {:^20s} | Loss {:5.2f}, Acc {:5.2f}, F1 {:5.2f} | Mem {:5.2f} GB"
                    .format(
                        timer.dt(), episode, task, overall_loss_s[-1]
                        if args['print_scaled'] else overall_loss[-1],
                        overall_acc_s[-1] if args['print_scaled'] else
                        overall_acc[-1], overall_f1_s[-1]
                        if args['print_scaled'] else overall_f1[-1],
                        psutil.Process(os.getpid()).memory_info().rss /
                        1024**3))

                writer.add_scalars('Loss/Eval', {task: overall_loss[-1]},
                                   episode)
                writer.add_scalars('Accuracy/Eval', {task: overall_acc[-1]},
                                   episode)
                writer.add_scalars('F1/Eval', {task: overall_f1[-1]}, episode)

                writer.add_scalars('LossScaled/Eval',
                                   {task: overall_loss_s[-1]}, episode)
                writer.add_scalars('AccuracyScaled/Eval',
                                   {task: overall_acc_s[-1]}, episode)
                writer.add_scalars('F1Scaled/Eval', {task: overall_f1_s[-1]},
                                   episode)

                writer.flush()

            #######################
            # All Tasks Aggregate #
            #######################
            overall_loss = np.mean(overall_loss)
            overall_acc = np.mean(overall_acc)
            overall_f1 = np.mean(overall_f1)

            overall_loss_s = np.mean(overall_loss_s)
            overall_acc_s = np.mean(overall_acc_s)
            overall_f1_s = np.mean(overall_f1_s)

            print(
                "{:} | MACRO-AGG | Eval  | Episode {:04} | Loss {:5.2f}, Acc {:5.2f}, F1 {:5.2f}\n"
                .format(
                    timer.dt(), episode,
                    overall_loss_s if args['print_scaled'] else overall_loss,
                    overall_acc_s if args['print_scaled'] else overall_acc,
                    overall_f1_s if args['print_scaled'] else overall_f1))

            writer.add_scalar('Loss/MacroEval', overall_loss, episode)
            writer.add_scalar('Accuracy/MacroEval', overall_acc, episode)
            writer.add_scalar('F1/MacroEval', overall_f1, episode)

            writer.add_scalar('LossScaled/MacroEval', overall_loss_s, episode)
            writer.add_scalar('AccuracyScaled/MacroEval', overall_acc_s,
                              episode)
            writer.add_scalar('F1Scaled/MacroEval', overall_f1_s, episode)

            writer.flush()

            #####################
            # Best Model Saving #
            #####################
            if overall_acc_s >= best_overall_acc_s:
                for file in os.listdir(checkpoint_save_path):
                    if 'best_model' in file:
                        ep = re.match(r".+macroaccs_\[(.+)\]", file)
                        if float(ep.group(1)):
                            os.remove(os.path.join(checkpoint_save_path, file))

                save_name = "best_model-episode_[{:}]-macroaccs_[{:.2f}].checkpoint".format(
                    episode, overall_acc_s)

                with open(os.path.join(checkpoint_save_path, save_name),
                          'wb') as f:

                    torch.save(model_init.state_dict(), f)

                print(
                    f"New best scaled accuracy. Saving model as {save_name}\n")
                best_overall_acc_s = overall_acc_s
                curr_patience = args['patience']
            else:
                if episode > args['min_episodes']:
                    curr_patience -= 1
                #print(f"Model did not improve with macroaccs_={overall_acc_s}. Patience is now {curr_patience}\n")

            #######################
            # Latest Model Saving #
            #######################
            for file in os.listdir(checkpoint_save_path):
                if 'latest_model' in file:
                    ep = re.match(r".+episode_\[([a-zA-Z0-9\.]+)\].+", file)
                    if ep != None and int(ep.group(1)) <= episode:
                        os.remove(os.path.join(checkpoint_save_path, file))

            save_name = "latest_model-episode_[{:}]-macroaccs_[{:.2f}].checkpoint".format(
                episode, overall_acc_s)

            with open(os.path.join(checkpoint_save_path, save_name),
                      'wb') as f:

                torch.save(model_init.state_dict(), f)

            with open(
                    os.path.join(checkpoint_save_path,
                                 "latest_trainer.pickle"), 'wb') as f:

                pickle.dump(
                    {
                        'episode': episode,
                        'overall_acc_s': overall_acc_s,
                        'best_overall_acc_s': best_overall_acc_s
                    }, f)

            if episode >= args['min_episodes']:
                reduceOnPlateau.step(overall_acc_s)

                curr_lr = meta_optimizer.param_groups[0]['lr']
                if curr_lr < args['min_meta_lr']:
                    print("Patience spent.\nEarly stopping.")
                    raise KeyboardInterrupt

        writer.add_scalar('Meta-lr', meta_optimizer.param_groups[0]['lr'],
                          episode)
Example #12
0
    generator = NavieGenerator(input_dim=Config.z_dim)
    generator.load_weights(weight_file)

    pseudo_imgs = generator(z_val, training=False)

    mean, std = get_cifar10_mean_std()
    # put back mean and std
    ret = pseudo_imgs * std + mean
    # pseudo_imgs
    return ret


# ------------------------------------------------------------------------------

if __name__ == '__main__':
    set_seed(Config.seed)
    files = [
        "zeroshot_cifar10_T-40-2_S-16-1_seed_45/generator_i100.h5",
        "zeroshot_cifar10_T-40-2_S-16-1_seed_45/generator_i200.h5",
        "zeroshot_cifar10_T-40-2_S-16-1_seed_45/generator_i300.h5",
        "zeroshot_cifar10_T-40-2_S-16-1_seed_45/generator_i400.h5",
        "zeroshot_cifar10_T-40-2_S-16-1_seed_45/generator_i500.h5",
        "zeroshot_cifar10_T-40-2_S-16-1_seed_45/generator_i700.h5",
        "zeroshot_cifar10_T-40-2_S-16-1_seed_45/generator_i1000.h5",
        "zeroshot_cifar10_T-40-2_S-16-1_seed_45/generator_i1500.h5",
        "zeroshot_cifar10_T-40-2_S-16-1_seed_45/generator_i2000.h5",
        "zeroshot_cifar10_T-40-2_S-16-1_seed_45/generator_i2500.h5",
        "zeroshot_cifar10_T-40-2_S-16-1_seed_45/generator_i10000.h5",
        "zeroshot_cifar10_T-40-2_S-16-1_seed_45/generator_i15000.h5",
        "zeroshot_cifar10_T-40-2_S-16-1_seed_45/generator_i20000.h5",
        "zeroshot_cifar10_T-40-2_S-16-1_seed_45/generator_i50000.h5",
Example #13
0
def train(depth,
          width,
          seed=42,
          data_per_class=-1,
          dataset='cifar10',
          savedir='saved_models',
          is_continue=False):

    set_seed(seed)

    # Load data
    if dataset == 'cifar10':
        # TODO: sampling for Fig2 green line
        (x_train, y_train_lbl), (x_test, y_test_lbl) = get_cifar10_data()
        # x_train, y_train_lbl = balance_sampling(x_train, y_train_lbl, data_per_class=200)
        shape = (32, 32, 3)
        classes = 10
    elif dataset == 'fashion_mnist':
        (x_train, y_train_lbl), (x_test, y_test_lbl) = get_fashion_mnist_data()
        shape = (32, 32, 1)
        classes = 10
    else:
        raise NotImplementedError("TODO: SVHN")
    # ====================================================================
    # make sampling
    if data_per_class > 0:
        # sample
        x_train_sample, y_train_lbl_sample = \
            balance_sampling(x_train, y_train_lbl, data_per_class=data_per_class)

        # repeat the sampled data to be as large as the full data set for convienient
        x_train = np.repeat(x_train_sample,
                            Config.n_data_per_class / data_per_class,
                            axis=0)
        y_train_lbl = np.repeat(y_train_lbl_sample,
                                Config.n_data_per_class / data_per_class,
                                axis=0)
    # ====================================================================
    # To one-hot
    y_train = to_categorical(y_train_lbl)
    y_test = to_categorical(y_test_lbl)

    # Setup model
    model_type = 'WRN-%d-%d-seed%d' % (depth, width, seed)
    wrn_model = WideResidualNetwork(depth,
                                    width,
                                    classes=classes,
                                    input_shape=shape,
                                    weight_decay=Config.weight_decay)

    # Prepare model model saving directory.
    save_dir = os.path.join(os.getcwd(), savedir)
    mkdir(save_dir)

    # Set up model name and path
    model_name = '%s_%s_model.{epoch:03d}.h5' % (dataset, model_type)
    model_filepath = os.path.join(save_dir, model_name)

    # set up log file
    log_fname = '{}-wrn-{}-{}-seed{}_log.csv'.format(dataset, depth, width,
                                                     seed)
    log_filepath = os.path.join(save_dir, log_fname)
    # =================================================================
    if is_continue:
        for i in range(Config.epochs, 0, -1):
            fname = model_filepath.format(epoch=i)
            if os.path.isfile(fname):
                print("Using ", fname, " as the save point.")
                break
        if i <= 1:
            raise RuntimeError("Cannot continue the training")
        # ======================================================
        initial_epoch = i
        wrn_model = load_model(fname)
        is_log_append = True
    else:
        initial_epoch = 0
        # compile model
        optim = SGD(learning_rate=lr_schedule(initial_epoch),
                    momentum=Config.momentum,
                    decay=0.0,
                    nesterov=True)

        wrn_model.compile(loss='categorical_crossentropy',
                          optimizer=optim,
                          metrics=['accuracy'])
        is_log_append = False

    logger = CSVLogger(filename=log_filepath,
                       separator=',',
                       append=is_log_append)

    # Prepare callbacks for model saving and for learning rate adjustment.
    lr_scheduler = LearningRateScheduler(lr_schedule)
    checkpointer = ModelCheckpoint(filepath=model_filepath,
                                   monitor='val_acc',
                                   verbose=1,
                                   save_best_only=True)

    callbacks = [lr_scheduler, checkpointer, logger]

    datagen = ImageDataGenerator(
        width_shift_range=4,
        height_shift_range=4,
        horizontal_flip=True,
        vertical_flip=False,
        rescale=None,
        fill_mode='reflect',
    )

    datagen.fit(x_train)

    wrn_model.fit_generator(datagen.flow(x_train,
                                         y_train,
                                         batch_size=Config.batch_size,
                                         shuffle=True),
                            validation_data=(x_test, y_test),
                            epochs=Config.epochs,
                            initial_epoch=initial_epoch,
                            verbose=1,
                            callbacks=callbacks)

    scores = wrn_model.evaluate(x_test, y_test, verbose=1)
    print('Test loss:', scores[0])
    print('Test accuracy:', scores[1])
    # =================================================
    # use the final one as teachers
    wrn_model.save(model_filepath.format(epoch=Config.epochs - 1))
Example #14
0
                                         project_name="cpc-nlp")
    experiment.set_name(run_name)
    experiment.log_parameters({
        **config.training.to_dict(),
        **config.dataset.to_dict(),
        **config.cpc_model.to_dict()
    })
else:
    experiment = None

# define if gpu or cpu
use_cuda = not config.training.no_cuda and torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
logger.info('===> use_cuda is {}'.format(use_cuda))
# set seed for reproducibility
set_seed(config.training.seed, use_cuda)

# create a CPC model for NLP
model = CPCv1(config=config)
# load model if resume mode
if config.training.resume_name:
    logger.info('===> loading a checkpoint')
    checkpoint = torch.load('{}/{}-{}'.format(config.training.logging_dir,
                                              run_name, 'model_best.pth'))
    model.load_state_dict(checkpoint['state_dict'])
# line for multi-gpu
if config.training.multigpu and torch.cuda.device_count() > 1:
    logger.info("===> let's use {} GPUs!".format(torch.cuda.device_count()))
    model = nn.DataParallel(model)
# move to device
model.to(device)
Example #15
0
import numpy as np
from numpy import tile
from sklearn.metrics import f1_score, precision_recall_fscore_support, accuracy_score
from torch.utils.data import Dataset, DataLoader
from torch.utils.data.distributed import DistributedSampler
from utils.log import get_logger
from utils.compare import compare, count
from utils.lr_scheduler import cos_lr_scheduler, exp_lr_scheduler
from utils.dataset import roadDataset, roadDatasetInfer
from utils.create_dir import create_dir
from utils.seed import set_seed
from cnn_finetune import make_model
from efficientnet_pytorch import EfficientNet
from config.default import cfg

set_seed(2020)


class BASE():
    def __init__(self, cfg):

        self.gpu_id = cfg.SYSTEM.GPU_ID
        self.num_workers = cfg.SYSTEM.NUM_WORKERS
        self.train_dir = cfg.DATASET.TRAIN_DIR
        self.val_dir = cfg.DATASET.VAL_DIR
        self.test_dir = cfg.DATASET.TEST_DIR
        self.sub_dir = cfg.OUTPUT_DIR.SUB_DIR
        self.log_dir = cfg.OUTPUT_DIR.LOG_DIR
        self.out_dir = cfg.OUTPUT_DIR.OUT_DIR
        self.model_name = cfg.MODEL.MODEL_NAME
        self.train_batch_size = cfg.TRAIN_PARAM.TRAIN_BATCH_SIZE
def run(seed=42,
        lr=3e-5,
        bs=config.TRAIN_BATCH_SIZE,
        epoch=config.EPOCHS,
        threshold=.3,
        eps=.1):
    set_seed(seed)
    main_df = pd.read_csv(config.TRAINING_FILE)
    folds = main_df['kfold'].unique()
    scores = []
    for fold in sorted(folds):
        print(f'Fold {fold}')

        df_train = main_df[main_df['kfold'] != fold].reset_index(drop=True)
        df_valid = main_df[main_df['kfold'] == fold].reset_index(drop=True)

        train_dataset = TweetDataset(
            tweets=df_train['text'].values,
            selected_texts=df_train['selected_text'].values,
            sentiments=df_train['sentiment'].values,
            threshold=threshold)

        train_data_loader = torch.utils.data.DataLoader(train_dataset,
                                                        shuffle=True,
                                                        batch_size=bs,
                                                        num_workers=6)

        valid_dataset = TweetDataset(
            tweets=df_valid['text'].values,
            selected_texts=df_valid['selected_text'].values,
            sentiments=df_valid['sentiment'].values,
            threshold=0)

        valid_data_loader = torch.utils.data.DataLoader(
            valid_dataset,
            shuffle=False,
            batch_size=config.VALID_BATCH_SIZE,
            num_workers=6)

        device = torch.device(
            'cuda') if torch.cuda.is_available() else torch.device('cpu')
        print("device: ", device)
        model = Transformer(nb_layers=2)
        model.to(device)

        best_jaccard = 0
        param_optimizer = list(model.named_parameters())
        no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
        optimizer_parameters = [
            {
                'params': [
                    p for n, p in param_optimizer
                    if not any(nd in n for nd in no_decay)
                ],
                'weight_decay':
                0.001
            },
            {
                'params': [
                    p for n, p in param_optimizer
                    if any(nd in n for nd in no_decay)
                ],
                'weight_decay':
                0.0
            },
        ]

        num_train_steps = int(len(df_train) / bs * epoch)
        optimizer = AdamW(optimizer_parameters, lr=lr)
        scheduler = get_linear_schedule_with_warmup(
            optimizer, num_warmup_steps=0, num_training_steps=num_train_steps)

        for _ in range(epoch):
            engine.training(train_data_loader, model, optimizer, device,
                            scheduler, eps)
            jaccard = engine.evaluating(valid_data_loader, model, device)

            print(f'Jaccard validation score: {jaccard}')
            if jaccard > best_jaccard:
                # torch.save(
                #     model.state_dict(),
                #     os.path.join(config.SAVED_MODEL_PATH, f'model_{fold}.bin'))
                best_jaccard = jaccard
        scores.append(best_jaccard)
    print(f'Cross validation score: {np.mean(scores)} +/-{np.std(scores)}')
Example #17
0
    def run(self):
        """
        Run an experiment.
        """

        args = self.parser.parse_args()

        # Set random seed
        set_seed(args.seed)

        def run_op(op):
            # Create task
            task = self.define_task()

            # Create agent
            agent = self.define_agent(task.width, task.height,
                                      len(task.get_actions()))

            # Log experiment info
            self.log_info(task, agent)

            # Loading the agent state
            if args.load:
                if os.path.isfile(args.load):
                    agent.load(args.load)
                    logger.info("Agent loaded from {}".format(args.load))
                else:
                    logger.error(
                        "Agent couldn't be loaded. File {} doesn't exist".
                        format(args.load))
                logger.info("")

            # Run op and return its result
            return op(lambda: GridWorldEnv(task), agent)

        def run_train():
            def train_op(env, agent):
                # Train agent on environment
                result = self.train(env, agent, args.seed)

                # Saving the agent state
                if args.save:
                    agent.save(args.save)
                    logger.info("Agent saved to {}".format(args.save))
                    logger.info("")

                return result

            # Run train op and return its result
            return run_op(train_op)

        def run_eval():
            def eval_op(env, agent):
                # Evaluate agent on environment
                return self.eval(env, agent, args.seed)

            # Run eval op and return its result
            return run_op(eval_op)

        if args.train:
            # Train agent
            avg_result = AverageRunner(run_train).run(args.runs)

            log_average_run_result(avg_result)
        elif args.eval:
            # Evaluate agent
            avg_result = AverageRunner(run_eval).run(args.runs)

            log_average_run_result(avg_result)
Example #18
0
def main():
    """
    YOLOv3 trainer. See README for details.
    """
    args = parse_args()
    print("Setting Arguments.. : ", args)

    cuda = torch.cuda.is_available() and args.use_cuda
    os.makedirs(args.checkpoint_dir, exist_ok=True)

    # Parse config settings
    with open(args.cfg, 'r') as f:
        cfg = yaml.load(f)

    print("successfully loaded config file: ", cfg)

    momentum = cfg['TRAIN']['MOMENTUM']
    decay = cfg['TRAIN']['DECAY']
    burn_in = cfg['TRAIN']['BURN_IN']
    iter_size = cfg['TRAIN']['MAXITER']
    steps = eval(cfg['TRAIN']['STEPS'])
    batch_size = cfg['TRAIN']['BATCHSIZE']
    subdivision = cfg['TRAIN']['SUBDIVISION']
    ignore_thre = cfg['TRAIN']['IGNORETHRE']
    random_resize = cfg['AUGMENTATION']['RANDRESIZE']
    base_lr = cfg['TRAIN']['LR'] / batch_size / subdivision
    gradient_clip = cfg['TRAIN']['GRADIENT_CLIP']

    print('effective_batch_size = batch_size * iter_size = %d * %d' %
          (batch_size, subdivision))

    # Make trainer behavior deterministic
    set_seed(seed=0)
    setup_cudnn(deterministic=True)

    # Learning rate setup
    def burnin_schedule(i):
        if i < burn_in:
            factor = pow(i / burn_in, 4)
        elif i < steps[0]:
            factor = 1.0
        elif i < steps[1]:
            factor = 0.1
        else:
            factor = 0.01
        return factor

    # Initiate model
    model = YOLOv3(cfg['MODEL'], ignore_thre=ignore_thre)

    if args.weights_path:
        print("loading darknet weights....", args.weights_path)
        parse_yolo_weights(model, args.weights_path)
    elif args.checkpoint:
        print("loading pytorch ckpt...", args.checkpoint)
        state = torch.load(args.checkpoint)
        if 'model_state_dict' in state.keys():
            model.load_state_dict(state['model_state_dict'])
        else:
            model.load_state_dict(state)

    if cuda:
        print("using cuda") 
        model = model.cuda()

    if args.tfboard_dir:
        print("using tfboard")
        from tensorboardX import SummaryWriter
        tblogger = SummaryWriter(args.tfboard_dir)

    model.train()

    imgsize = cfg['TRAIN']['IMGSIZE']
    dataset = COCODataset(model_type=cfg['MODEL']['TYPE'],
                  data_dir='COCO/',
                  img_size=imgsize,
                  augmentation=cfg['AUGMENTATION'],
                  debug=args.debug)

    dataloader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size, shuffle=True, num_workers=args.n_cpu)
    dataiterator = iter(dataloader)

    evaluator = COCOAPIEvaluator(model_type=cfg['MODEL']['TYPE'],
                    data_dir='COCO/',
                    img_size=cfg['TEST']['IMGSIZE'],
                    confthre=cfg['TEST']['CONFTHRE'],
                    nmsthre=cfg['TEST']['NMSTHRE'])

    dtype = torch.cuda.FloatTensor if cuda else torch.FloatTensor

    # optimizer setup
    # set weight decay only on conv.weight
    params_dict = dict(model.named_parameters())
    params = []
    for key, value in params_dict.items():
        if 'conv.weight' in key:
            params += [{'params':value, 'weight_decay':decay * batch_size * subdivision}]
        else:
            params += [{'params':value, 'weight_decay':0.0}]
    optimizer = optim.SGD(params, lr=base_lr, momentum=momentum,
                          dampening=0, weight_decay=decay * batch_size * subdivision)

    iter_state = 0

    if args.checkpoint:
        if 'optimizer_state_dict' in state.keys():
            optimizer.load_state_dict(state['optimizer_state_dict'])
            iter_state = state['iter'] + 1

    scheduler = optim.lr_scheduler.LambdaLR(optimizer, burnin_schedule)

    # start training loop
    for iter_i in range(iter_state, iter_size + 1):

        # COCO evaluation
        if iter_i % args.eval_interval == 0:
            print('evaluating...')
            ap = evaluator.evaluate(model)
            model.train()
            if args.tfboard_dir:
                # val/aP
                tblogger.add_scalar('val/aP50', ap['aP50'], iter_i)
                tblogger.add_scalar('val/aP75', ap['aP75'], iter_i)
                tblogger.add_scalar('val/aP5095', ap['aP5095'], iter_i)
                tblogger.add_scalar('val/aP5095_S', ap['aP5095_S'], iter_i)
                tblogger.add_scalar('val/aP5095_M', ap['aP5095_M'], iter_i)
                tblogger.add_scalar('val/aP5095_L', ap['aP5095_L'], iter_i)

        # subdivision loop
        optimizer.zero_grad()
        for inner_iter_i in range(subdivision):
            try:
                imgs, targets, _, _ = next(dataiterator)  # load a batch
            except StopIteration:
                dataiterator = iter(dataloader)
                imgs, targets, _, _ = next(dataiterator)  # load a batch
            imgs = Variable(imgs.type(dtype))
            targets = Variable(targets.type(dtype), requires_grad=False)
            loss = model(imgs, targets)
            loss.backward()

        if gradient_clip >= 0:
            torch.nn.utils.clip_grad_norm(model.parameters(), gradient_clip)

        optimizer.step()
        scheduler.step()

        if iter_i % 10 == 0:
            # logging
            current_lr = scheduler.get_lr()[0] * batch_size * subdivision
            print('[Iter %d/%d] [lr %f] '
                  '[Losses: xy %f, wh %f, conf %f, cls %f, total %f, imgsize %d]'
                  % (iter_i, iter_size, current_lr,
                     model.loss_dict['xy'], model.loss_dict['wh'],
                     model.loss_dict['conf'], model.loss_dict['cls'], 
                     loss, imgsize),
                  flush=True)

            if args.tfboard_dir:
                # lr
                tblogger.add_scalar('lr', current_lr, iter_i)
                # train/loss
                tblogger.add_scalar('train/loss_xy', model.loss_dict['xy'], iter_i)
                tblogger.add_scalar('train/loss_wh', model.loss_dict['wh'], iter_i)
                tblogger.add_scalar('train/loss_conf', model.loss_dict['conf'], iter_i)
                tblogger.add_scalar('train/loss_cls', model.loss_dict['cls'], iter_i)
                tblogger.add_scalar('train/loss', loss, iter_i)

            # random resizing
            if random_resize:
                imgsize = (random.randint(0, 9) % 10 + 10) * 32
                dataset.img_shape = (imgsize, imgsize)
                dataset.img_size = imgsize
                dataloader = torch.utils.data.DataLoader(
                    dataset, batch_size=batch_size, shuffle=True, num_workers=args.n_cpu)
                dataiterator = iter(dataloader)

        # save checkpoint
        if args.checkpoint_dir and iter_i > 0 and (iter_i % args.checkpoint_interval == 0):
            torch.save({'iter': iter_i,
                        'model_state_dict': model.state_dict(),
                        'optimizer_state_dict': optimizer.state_dict(),
                        },
                        os.path.join(args.checkpoint_dir, "snapshot"+str(iter_i)+".ckpt"))

    if args.tfboard_dir:
        tblogger.close()
def eval(args):
    def _get_dataloader(datasubset,
                        tokenizer,
                        device,
                        args,
                        subset_classes=True):
        """
        Get specific dataloader.

        Args:
            datasubset ([type]): [description]
            tokenizer ([type]): [description]
            device ([type]): [description]
            args ([type]): [description]

        Returns:
            dataloader
        """

        if subset_classes:
            dataloader = StratifiedLoaderwClassesSubset(
                datasubset,
                k=args['k'],
                max_classes=args['max_classes'],
                max_batch_size=args['max_batch_size'],
                tokenizer=tokenizer,
                device=device,
                shuffle=True,
                verbose=False)
        else:
            dataloader = StratifiedLoader(
                datasubset,
                k=args['k'],
                max_batch_size=args['max_batch_size'],
                tokenizer=tokenizer,
                device=device,
                shuffle=True,
                verbose=False)

        return dataloader

    def _adapt_and_fit(support_labels_list, support_input_list,
                       query_labels_list, query_input_list, loss_fn,
                       model_init, args, mode):
        """
        Adapts the init model to a support set and computes loss on query set.

        Args:
            support_labels ([type]): [description]
            support_text ([type]): [description]
            query_labels ([type]): [description]
            query_text ([type]): [description]
            model_init ([type]): [description]
            args
            mode
        """

        #####################
        # Create model_task #
        #####################
        model_init.eval()

        model_task = deepcopy(model_init)
        model_task_optimizer = optim.SGD(model_task.parameters(),
                                         lr=args['inner_lr'])
        model_task.zero_grad()

        #######################
        # Generate prototypes #
        #######################

        with torch.no_grad():
            prototypes = 0.0
            for support_labels, support_input in zip(support_labels_list,
                                                     support_input_list):
                if mode != "baseline":
                    y = model_init(support_input)
                else:
                    y = model_init.encode(support_input)

                labs = torch.sort(torch.unique(support_labels))[0]
                prototypes += torch.stack(
                    [torch.mean(y[support_labels == c], dim=0) for c in labs])

            prototypes = prototypes / len(support_labels_list)

            W_init = 2 * prototypes
            b_init = -torch.norm(prototypes, p=2, dim=1)**2

        W_task, b_task = W_init.detach(), b_init.detach()
        W_task.requires_grad, b_task.requires_grad = True, True

        #################
        # Adapt to data #
        #################
        for _ in range(args['n_inner']):
            for support_labels, support_input in zip(support_labels_list,
                                                     support_input_list):
                if mode != "baseline":
                    y = model_task(support_input)
                else:
                    y = model_task.encode(support_input)

                logits = F.linear(y, W_task, b_task)

                inner_loss = loss_fn(logits, support_labels)

                W_task_grad, b_task_grad = torch.autograd.grad(
                    inner_loss, [W_task, b_task], retain_graph=True)

                inner_loss.backward()

                if args['clip_val'] > 0:
                    torch.nn.utils.clip_grad_norm_(model_task.parameters(),
                                                   args['clip_val'])

                model_task_optimizer.step()

                W_task = W_task - args['output_lr'] * W_task_grad
                b_task = b_task - args['output_lr'] * b_task_grad

        #########################
        # Validate on query set #
        #########################
        logits_list, outer_loss_list = [], []
        for query_labels, query_input in zip(query_labels_list,
                                             query_input_list):
            with torch.no_grad():
                if mode != "baseline":
                    y = model_task(query_input)
                else:
                    y = model_task.encode(query_input)

                logits = F.linear(y, W_task, b_task)

                outer_loss = loss_fn(logits, query_labels)

                logits_list.append(logits)
                outer_loss_list.append(outer_loss)

        return logits_list, outer_loss_list

    #######################
    # Logging Directories #
    #######################
    log_dir = os.path.join(args['checkpoint_path'], args['version'])

    os.makedirs(log_dir, exist_ok=True)
    os.makedirs(os.path.join(log_dir, args['save_version']), exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'checkpoint'), exist_ok=True)
    #print(f"Saving models and logs to {log_dir}")

    checkpoint_save_path = os.path.join(log_dir, 'checkpoint')

    if args['mode'] != "baseline":
        with open(os.path.join("./", checkpoint_save_path, "hparams.pickle"),
                  mode='rb+') as f:
            hparams = pickle.load(f)
    else:
        with open(os.path.join("./", args['checkpoint_path'],
                               "hparams.pickle"),
                  mode='rb+') as f:
            hparams = pickle.load(f)

    ##########################
    # Device, Logging, Timer #
    ##########################

    set_seed(args['seed'])

    timer = Timer()

    device = torch.device('cuda' if (
        torch.cuda.is_available() and args['gpu']) else 'cpu')

    # Build the tensorboard writer
    writer = SummaryWriter(os.path.join(log_dir, args['save_version']))

    ###################
    # Load in dataset #
    ###################
    print("Data Prep")
    dataset = meta_dataset(include=args['include'], verbose=True)
    dataset.prep(text_tokenizer=manual_tokenizer)
    print("")

    ####################
    # Init models etc. #
    ####################
    if args['mode'] != "baseline":
        model_init = SeqTransformer(hparams)
        tokenizer = AutoTokenizer.from_pretrained(hparams['encoder_name'])
    else:
        model_init = CustomBERT(num_classes=task_label_dict[args['version']])
        tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')

    tokenizer.add_special_tokens({'additional_special_tokens': specials()})
    model_init.encoder.model.resize_token_embeddings(len(tokenizer.vocab))

    for file in os.listdir(checkpoint_save_path):
        if 'best_model' in file:
            fp = os.path.join(checkpoint_save_path, file)
            with open(fp, mode='rb+') as f:
                print(f"Found pre-trained file at {fp}")
                if args['mode'] != "baseline":
                    model_init.load_state_dict(
                        torch.load(f, map_location=device))

                    for name, param in model_init.encoder.model.named_parameters(
                    ):
                        transformer_layer = re.search(
                            "(?:encoder\.layer\.)([0-9]+)", name)
                        if transformer_layer and (int(
                                transformer_layer.group(1)) > args['nu']):
                            param.requires_grad = True
                        elif 'pooler' in name:
                            param.requires_grad = False
                        elif args['nu'] < 0:
                            param.requires_grad = True
                        else:
                            param.requires_grad = False
                else:
                    model_init.load_state_dict(
                        torch.load(f, map_location=device)["bert_state_dict"])

    model_init = model_init.to(device)

    loss_fn = nn.CrossEntropyLoss()

    ##############
    # Evaluation #
    ##############

    results_dict = defaultdict(dict)

    for split in args['splits']:

        overall_loss, overall_acc, overall_f1 = [], [], []
        overall_loss_s, overall_acc_s, overall_f1_s = [], [], []

        ###################
        # Individual Task #
        ###################
        for task in dataset.lens.keys():
            datasubset = dataset.datasets[task][split]

            task_loss, task_acc, task_f1 = [], [], []
            task_loss_s, task_acc_s, task_f1_s = [], [], []
            for _ in range(args['n_eval_per_task']):
                dataloader = _get_dataloader(
                    datasubset,
                    tokenizer,
                    device,
                    args,
                    subset_classes=args['subset_classes'])

                total_size = args['k'] * dataloader.n_classes
                n_sub_batches = total_size / args['max_batch_size']
                reg_k = int(args['k'] // n_sub_batches)
                left_over = args['k'] * dataloader.n_classes - \
                    int(n_sub_batches) * reg_k * dataloader.n_classes
                last_k = int(left_over / dataloader.n_classes)


                support_labels_list, support_input_list, query_labels_list, query_input_list = [], [], [], []

                dataloader.k = reg_k
                for _ in range(int(n_sub_batches)):

                    support_labels, support_text, query_labels, query_text = next(
                        dataloader)

                    support_labels_list.append(support_labels)
                    support_input_list.append(support_text)
                    query_labels_list.append(query_labels)
                    query_input_list.append(query_text)

                if last_k > 0.0:
                    dataloader.k = last_k
                    support_labels, support_text, query_labels, query_text = next(
                        dataloader)

                    support_labels_list.append(support_labels)
                    support_input_list.append(support_text)
                    query_labels_list.append(query_labels)
                    query_input_list.append(query_text)

                logits_list, loss_list = _adapt_and_fit(
                    support_labels_list, support_input_list, query_labels_list,
                    query_input_list, loss_fn, model_init, hparams,
                    args['mode'])

                for logits, query_labels, loss in zip(logits_list,
                                                      query_labels_list,
                                                      loss_list):
                    mets = logging_metrics(logits.detach().cpu(),
                                           query_labels.detach().cpu())

                    task_loss.append(loss.detach().cpu().item())
                    task_acc.append(mets['acc'])
                    task_f1.append(mets['f1'])

                    task_loss_s.append(loss.detach().cpu().item() /
                                       np.log(dataloader.n_classes))
                    task_acc_s.append(mets['acc'] / (1 / dataloader.n_classes))
                    task_f1_s.append(mets['f1'] / (1 / dataloader.n_classes))

            overall_loss.append(np.mean(task_loss))
            overall_acc.append(np.mean(task_acc))
            overall_f1.append(np.mean(task_f1))

            overall_loss_s.append(np.mean(task_loss_s))
            overall_acc_s.append(np.mean(task_acc_s))
            overall_f1_s.append(np.mean(task_f1_s))

            print(
                "{:} | Eval  | Split {:^8s} | Task {:^20s} | Loss {:5.2f} ({:4.2f}), Acc {:5.2f} ({:4.2f}), F1 {:5.2f} ({:4.2f}) | Mem {:5.2f} GB"
                .format(
                    timer.dt(), split, task, overall_loss_s[-1]
                    if args['print_scaled'] else overall_loss[-1],
                    np.std(task_loss_s) if args['print_scaled'] else
                    np.std(task_loss), overall_acc_s[-1]
                    if args['print_scaled'] else overall_acc[-1],
                    np.std(task_acc_s) if args['print_scaled'] else
                    np.std(task_acc), overall_f1_s[-1]
                    if args['print_scaled'] else overall_f1[-1],
                    np.std(task_f1_s)
                    if args['print_scaled'] else np.std(task_f1),
                    psutil.Process(os.getpid()).memory_info().rss / 1024**3))

            writer.add_scalars(f'Loss/{split}', {task: overall_loss[-1]}, 0)
            writer.add_scalars(f'Accuracy/{split}', {task: overall_acc[-1]}, 0)
            writer.add_scalars(f'F1/{split}', {task: overall_f1[-1]}, 0)

            writer.add_scalars(f'LossScaled/{split}',
                               {task: overall_loss_s[-1]}, 0)
            writer.add_scalars(f'AccuracyScaled/{split}',
                               {task: overall_acc_s[-1]}, 0)
            writer.add_scalars(f'F1Scaled/{split}', {task: overall_f1_s[-1]},
                               0)

            writer.flush()

            results_dict[task][split] = {
                "loss":
                "{:.2f} ({:.2f})".format(overall_loss[-1], np.std(task_loss)),
                "acc":
                "{:.2f} ({:.2f})".format(overall_acc[-1], np.std(task_acc)),
                "f1":
                "{:.2f} ({:.2f})".format(overall_f1[-1], np.std(task_f1)),
                "loss_scaled":
                "{:.2f} ({:.2f})".format(overall_loss_s[-1],
                                         np.std(task_loss_s)),
                "acc_scaled":
                "{:.2f} ({:.2f})".format(overall_acc_s[-1],
                                         np.std(task_acc_s)),
                "f1_scaled":
                "{:.2f} ({:.2f})".format(overall_f1_s[-1], np.std(task_f1_s)),
            }

        #######################
        # All Tasks Aggregate #
        #######################
        overall_loss = np.mean(overall_loss)
        overall_acc = np.mean(overall_acc)
        overall_f1 = np.mean(overall_f1)

        overall_loss_s = np.mean(overall_loss_s)
        overall_acc_s = np.mean(overall_acc_s)
        overall_f1_s = np.mean(overall_f1_s)

        print(
            "{:} | MACRO-AGG | Eval  | Split {:^8s} | Loss {:5.2f}, Acc {:5.2f}, F1 {:5.2f}\n"
            .format(timer.dt(), split,
                    overall_loss_s if args['print_scaled'] else overall_loss,
                    overall_acc_s if args['print_scaled'] else overall_acc,
                    overall_f1_s if args['print_scaled'] else overall_f1))

        writer.add_scalar(f'Loss/Macro{split}', overall_loss, 0)
        writer.add_scalar(f'Accuracy/Macro{split}', overall_acc, 0)
        writer.add_scalar(f'F1/Macro{split}', overall_f1, 0)

        writer.add_scalar(f'LossScaled/Macro{split}', overall_loss_s, 0)
        writer.add_scalar(f'AccuracyScaled/Macro{split}', overall_acc_s, 0)
        writer.add_scalar(f'F1Scaled/Macro{split}', overall_f1_s, 0)

        writer.flush()

    with open(os.path.join(log_dir, args['save_version'], 'results.pickle'),
              'wb+') as file:
        pickle.dump(results_dict, file)
Example #20
0
def zeroshot_train(t_depth,
                   t_width,
                   t_wght_path,
                   s_depth,
                   s_width,
                   seed=42,
                   savedir=None,
                   dataset='cifar10',
                   sample_per_class=0):

    set_seed(seed)

    train_name = '%s_T-%d-%d_S-%d-%d_seed_%d' % (dataset, t_depth, t_width,
                                                 s_depth, s_width, seed)
    if sample_per_class > 0:
        train_name += "-m%d" % sample_per_class
    log_filename = train_name + '_training_log.csv'

    # save dir
    if not savedir:
        savedir = 'zeroshot_' + train_name
    full_savedir = os.path.join(os.getcwd(), savedir)
    mkdir(full_savedir)

    log_filepath = os.path.join(full_savedir, log_filename)
    logger = CustomizedCSVLogger(log_filepath)

    # Teacher
    teacher = WideResidualNetwork(t_depth,
                                  t_width,
                                  input_shape=Config.input_dim,
                                  dropout_rate=0.0,
                                  output_activations=True,
                                  has_softmax=False)

    teacher.load_weights(t_wght_path)
    teacher.trainable = False

    # Student
    student = WideResidualNetwork(s_depth,
                                  s_width,
                                  input_shape=Config.input_dim,
                                  dropout_rate=0.0,
                                  output_activations=True,
                                  has_softmax=False)

    if sample_per_class > 0:
        s_decay_steps = Config.n_outer_loop * Config.n_s_in_loop + Config.n_outer_loop
    else:
        s_decay_steps = Config.n_outer_loop * Config.n_s_in_loop

    s_optim = Adam(learning_rate=CosineDecay(Config.student_init_lr,
                                             decay_steps=s_decay_steps))
    # ---------------------------------------------------------------------------
    # Generator
    generator = NavieGenerator(input_dim=Config.z_dim)
    g_optim = Adam(learning_rate=CosineDecay(Config.generator_init_lr,
                                             decay_steps=Config.n_outer_loop *
                                             Config.n_g_in_loop))
    # ---------------------------------------------------------------------------
    # Test data
    if dataset == 'cifar10':
        (x_train, y_train_lbl), (x_test, y_test) = get_cifar10_data()
    elif dataset == 'fashion_mnist':
        (x_train, y_train_lbl), (x_test, y_test) = get_fashion_mnist_data()
    else:
        raise ValueError("Only Cifar-10 and Fashion-MNIST supported !!")
    test_data_loader = tf.data.Dataset.from_tensor_slices(
        (x_test, y_test)).batch(200)
    # ---------------------------------------------------------------------------
    # Train data (if using train data)
    train_dataflow = None
    if sample_per_class > 0:
        # sample first
        x_train, y_train_lbl = \
            balance_sampling(x_train, y_train_lbl, data_per_class=sample_per_class)
        datagen = ImageDataGenerator(width_shift_range=4,
                                     height_shift_range=4,
                                     horizontal_flip=True,
                                     vertical_flip=False,
                                     rescale=None,
                                     fill_mode='reflect')
        datagen.fit(x_train)
        y_train = to_categorical(y_train_lbl)
        train_dataflow = datagen.flow(x_train,
                                      y_train,
                                      batch_size=Config.batch_size,
                                      shuffle=True)

    # Generator loss metrics
    g_loss_met = tf.keras.metrics.Mean()

    # Student loss metrics
    s_loss_met = tf.keras.metrics.Mean()

    #
    n_cls_t_pred_metric = tf.keras.metrics.Mean()
    n_cls_s_pred_metric = tf.keras.metrics.Mean()

    max_g_grad_norm_metric = tf.keras.metrics.Mean()
    max_s_grad_norm_metric = tf.keras.metrics.Mean()

    test_data_loader = tf.data.Dataset.from_tensor_slices(
        (x_test, y_test)).batch(200)

    teacher.trainable = False

    # checkpoint
    chkpt_dict = {
        'teacher': teacher,
        'student': student,
        'generator': generator,
        's_optim': s_optim,
        'g_optim': g_optim,
    }
    # Saving checkpoint
    ckpt = tf.train.Checkpoint(**chkpt_dict)
    ckpt_manager = tf.train.CheckpointManager(ckpt,
                                              os.path.join(savedir, 'chpt'),
                                              max_to_keep=2)
    # ==========================================================================
    # if a checkpoint exists, restore the latest checkpoint.
    start_iter = 0
    if ckpt_manager.latest_checkpoint:
        ckpt.restore(ckpt_manager.latest_checkpoint)
        print('Latest checkpoint restored!!')
        with open(os.path.join(savedir, 'chpt', 'iteration'), 'r') as f:
            start_iter = int(f.read())
        logger = CustomizedCSVLogger(log_filepath, append=True)

    for iter_ in range(start_iter, Config.n_outer_loop):
        iter_stime = time.time()

        max_s_grad_norm = 0
        max_g_grad_norm = 0
        # sample from latern space to have an image
        z_val = tf.random.normal([Config.batch_size, Config.z_dim])

        # Generator training
        loss = 0
        for ng in range(Config.n_g_in_loop):
            loss, g_grad_norm = train_gen(generator, g_optim, z_val, teacher,
                                          student)
            max_g_grad_norm = max(max_g_grad_norm, g_grad_norm.numpy())
            g_loss_met(loss)

        # ==========================================================================
        # Student training
        loss = 0
        pseudo_imgs, t_logits, t_acts = prepare_train_student(
            generator, z_val, teacher)
        for ns in range(Config.n_s_in_loop):
            # pseudo_imgs, t_logits, t_acts = prepare_train_student(generator, z_val, teacher)
            loss, s_grad_norm, s_logits = train_student(
                pseudo_imgs, s_optim, t_logits, t_acts, student)
            max_s_grad_norm = max(max_s_grad_norm, s_grad_norm.numpy())

            n_cls_t_pred = len(np.unique(np.argmax(t_logits, axis=-1)))
            n_cls_s_pred = len(np.unique(np.argmax(s_logits, axis=-1)))
            # logging
            s_loss_met(loss)
            n_cls_t_pred_metric(n_cls_t_pred)
            n_cls_s_pred_metric(n_cls_s_pred)
        # ==========================================================================
        # train if provided n samples
        if train_dataflow:
            x_batch_train, y_batch_train = next(train_dataflow)
            t_logits, t_acts = forward(teacher, x_batch_train, training=False)
            loss = train_student_with_labels(student, s_optim, x_batch_train,
                                             t_logits, t_acts, y_batch_train)
        # ==========================================================================

        # --------------------------------------------------------------------
        iter_etime = time.time()
        max_g_grad_norm_metric(max_g_grad_norm)
        max_s_grad_norm_metric(max_s_grad_norm)
        # --------------------------------------------------------------------
        is_last_epoch = (iter_ == Config.n_outer_loop - 1)

        if iter_ != 0 and (iter_ % Config.print_freq == 0 or is_last_epoch):
            n_cls_t_pred_avg = n_cls_t_pred_metric.result().numpy()
            n_cls_s_pred_avg = n_cls_s_pred_metric.result().numpy()
            time_per_epoch = iter_etime - iter_stime

            s_loss = s_loss_met.result().numpy()
            g_loss = g_loss_met.result().numpy()
            max_g_grad_norm_avg = max_g_grad_norm_metric.result().numpy()
            max_s_grad_norm_avg = max_s_grad_norm_metric.result().numpy()

            # build ordered dict
            row_dict = OrderedDict()

            row_dict['time_per_epoch'] = time_per_epoch
            row_dict['epoch'] = iter_
            row_dict['generator_loss'] = g_loss
            row_dict['student_kd_loss'] = s_loss
            row_dict['n_cls_t_pred_avg'] = n_cls_t_pred_avg
            row_dict['n_cls_s_pred_avg'] = n_cls_s_pred_avg
            row_dict['max_g_grad_norm_avg'] = max_g_grad_norm_avg
            row_dict['max_s_grad_norm_avg'] = max_s_grad_norm_avg

            if sample_per_class > 0:
                s_optim_iter = iter_ * (Config.n_s_in_loop + 1)
            else:
                s_optim_iter = iter_ * Config.n_s_in_loop
            row_dict['s_optim_lr'] = s_optim.learning_rate(
                s_optim_iter).numpy()
            row_dict['g_optim_lr'] = g_optim.learning_rate(iter_).numpy()

            pprint.pprint(row_dict)
        # ======================================================================
        if iter_ != 0 and (iter_ % Config.log_freq == 0 or is_last_epoch):
            # calculate acc
            test_accuracy = evaluate(test_data_loader, student).numpy()
            row_dict['test_acc'] = test_accuracy
            logger.log_with_order(row_dict)
            print('Test Accuracy: ', test_accuracy)

            # for check poing
            ckpt_save_path = ckpt_manager.save()
            print('Saving checkpoint for epoch {} at {}'.format(
                iter_ + 1, ckpt_save_path))
            with open(os.path.join(savedir, 'chpt', 'iteration'), 'w') as f:
                f.write(str(iter_ + 1))

            s_loss_met.reset_states()
            g_loss_met.reset_states()
            max_g_grad_norm_metric.reset_states()
            max_s_grad_norm_metric.reset_states()

        if iter_ != 0 and (iter_ % 5000 == 0 or is_last_epoch):
            generator.save_weights(
                join(full_savedir, "generator_i{}.h5".format(iter_)))
            student.save_weights(
                join(full_savedir, "student_i{}.h5".format(iter_)))
Example #21
0
    # print beta
    Config.beta = args.beta
    # print out config
    for attr, v in vars(Config).items():
        if attr.startswith('__'):
            continue
        print(attr, ": ", v)

    # calculate iterations
    iter_per_epoch = math.ceil(Config.total_iteration / Config.epochs)
    print("Iteration per epoch: ", iter_per_epoch)
    print("-------------------------------------")

    # Set seed
    set_seed(args.seed)

    # ===================================
    # Go to have training
    # load cifar 10, sampling if need; TODO: make a for SVHN
    (x_train, y_train_lbl), (x_test, y_test_lbl) = get_cifar10_data()
    if args.sample_per_class < 5000:
        x_train, y_train_lbl = balance_sampling(
            x_train, y_train_lbl, data_per_class=args.sample_per_class)

    # For evaluation
    test_data_loader = tf.data.Dataset.from_tensor_slices(
        (x_test, y_test_lbl)).batch(200)
    # y_test = to_categorical(y_test_lbl)
    y_train = to_categorical(y_train_lbl)