예제 #1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--test_data_path",
                        default='./data/test_stt.pkl',
                        type=str,
                        help="test data path")
    args_ = parser.parse_args()
    pretrained = torch.load(pretrained_model_path)
    args = torch.load(args_path)
    args.test_data_path = args_.test_data_path
    args.eval_batch_size = 64

    bert_config = BertConfig(config_path)
    bert_config.num_labels = 7

    model = BertForEmotionClassification(bert_config).to(device)
    model.load_state_dict(pretrained, strict=False)
    args.n_gpu = 2
    loss, acc, f1, total_y_hat, cm = test(model, args)
    print("loss : {} \nacc : {} \nf1 : {}".format(loss, acc, f1))

    draw_cm(cm)
    tmp = pd.read_pickle(args.test_data_path)

    # remove duplicates
    tmp = tmp[['Sentence', 'Emotion']].drop_duplicates().reset_index(drop=True)

    tmp['Pred'] = [label_list[i] for i in total_y_hat]
    tmp.to_csv('./result/test_result.csv')
    print("results are saved to result folder")
예제 #2
0
def main():
    parser = argparse.ArgumentParser()
    # parser.add_argument("--test_data_path", default='./data/korean_single_test.csv', type=str,
    #                     help="test data path")
    parser.add_argument("--test_data_path",
                        default='./data/toon_test.csv',
                        type=str,
                        help="test data path")
    args_ = parser.parse_args()
    pretrained = torch.load(pretrained_model_path)
    args = torch.load(args_path)
    args.test_data_path = args_.test_data_path
    args.eval_batch_size = 64

    bert_config = BertConfig(config_path)
    bert_config.num_labels = 7

    model = BertForEmotionClassification(bert_config).to(device)
    model.load_state_dict(pretrained, strict=False)

    loss, acc, f1, total_y_hat, cm = test(model, args)
    print("loss : {} \nacc : {} \nf1 : {}".format(loss, acc, f1))

    draw_cm(cm)
    tmp = pd.read_csv(args.test_data_path)
    tmp['Pred'] = [label_list[i] for i in total_y_hat]
    tmp.to_csv('./result/test_result_toon.csv')
    print("results are saved to result folder")
예제 #3
0
def train(args):
    set_seed(args)
    # Set device
    if args.device == 'cuda':
        device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
        logger.info('use cuda')
    else:
        device = torch.device('cpu')
        logger.info('use cpu')

    # Set label list for classification
    if args.num_label == 'multi':
        label_list = ['공포', '놀람', '분노', '슬픔', '중립', '행복', '혐오']
    elif args.num_label == 'binary':
        label_list = ['긍정', '부정']
    logger.info('use {} labels for training'.format(len(label_list)))

    # Load pretrained model and model configuration
    pretrained_path = os.path.join('./pretrained_model/', args.pretrained_type)
    if args.pretrained_model_path is None:
        # Use pretrained bert model(etri/skt)
        pretrained_model_path = os.path.join(pretrained_path, 'pytorch_model.bin')
    else:
        # Use further-pretrained bert model
        pretrained_model_path = args.pretrained_model_path
    logger.info('Pretrain Model : {}'.format(pretrained_model_path))
    pretrained = torch.load(pretrained_model_path)
    
    if args.pretrained_type == 'skt' and 'bert.' not in list(pretrained.keys())[0]:
        logger.info('modify parameter names')
        # Change parameter name for consistency
        new_keys_ = ['bert.' + k for k in pretrained.keys()]
        old_values_ = pretrained.values()
        pretrained = {k: v for k, v in zip(new_keys_, old_values_)}

    bert_config = BertConfig(os.path.join(pretrained_path + '/bert_config.json'))
    bert_config.num_labels = len(label_list)
    model = BertForEmotionClassification(bert_config).to(device)
    model.load_state_dict(pretrained, strict=False)

    # Load Datasets
    tr_set = Datasets(file_path=args.train_data_path,
                      label_list=label_list,
                      pretrained_type=args.pretrained_type,
                      max_len=args.max_len)
    # Use custom batch function
    collate_fn = ClassificationBatchFunction(args.max_len, tr_set.pad_idx, tr_set.cls_idx, tr_set.sep_idx)
    tr_loader = DataLoader(dataset=tr_set,
                           batch_size=args.train_batch_size,
                           shuffle=True,
                           num_workers=8,
                           pin_memory=True,
                           collate_fn=collate_fn)

    dev_set = Datasets(file_path=args.dev_data_path,
                       label_list=label_list,
                       pretrained_type=args.pretrained_type,
                       max_len=args.max_len)
    dev_loader = DataLoader(dataset=dev_set,
                            batch_size=args.eval_batch_size,
                            num_workers=8,
                            pin_memory=True,
                            drop_last=False,
                            collate_fn=collate_fn)

    # optimizer
    optimizer = layerwise_decay_optimizer(model=model, lr=args.learning_rate, layerwise_decay=args.layerwise_decay)

    # lr scheduler
    t_total = len(tr_loader) // args.gradient_accumulation_steps * args.epochs
    warmup_steps = int(t_total * args.warmup_percent)
    logger.info('total training steps : {}, lr warmup steps : {}'.format(t_total, warmup_steps))
    # Use gradual warmup and linear decay
    scheduler = optimization.WarmupLinearSchedule(optimizer, warmup_steps=warmup_steps, t_total=t_total)

    # for low-precision training
    if args.fp16:
        try:
            from apex import amp
            logger.info('Use fp16')
        except ImportError:
            raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.")
        model, optimizer = amp.initialize(model, optimizer, opt_level=args.fp16_opt_level, verbosity=0)

    # tensorboard setting
    save_path = "./model_saved_finetuning/lr{},batch{},total{},warmup{},len{},{}".format(
        args.learning_rate, args.train_batch_size * args.gradient_accumulation_steps, t_total,
        args.warmup_percent, args.max_len, args.pretrained_type)

    if not os.path.isdir(save_path):
        os.makedirs(save_path)
    writer = SummaryWriter(save_path)

    # Save best model results with resultwriter
    result_writer = utils.ResultWriter("./model_saved_finetuning/results.csv")
    model.zero_grad()

    best_val_loss = 1e+9
    global_step = 0
    
    train_loss, train_acc, train_f1 = 0, 0, 0
    logging_loss, logging_acc, logging_f1 = 0, 0, 0

    logger.info('***** Training starts *****')
    total_result = []
    for epoch in tqdm(range(args.epochs), desc='epochs'):
        for step, batch in tqdm(enumerate(tr_loader), desc='steps', total=len(tr_loader)):
            model.train()
            x_train, mask_train, y_train = map(lambda x: x.to(device), batch)

            inputs = {
                'input_ids': x_train,
                'attention_mask': mask_train,
                'classification_label': y_train,
            }

            output, loss = model(**inputs)
            y_max = output.max(dim=1)[1]

            cr = classification_report(y_train.tolist(),
                                       y_max.tolist(),
                                       labels=list(range(len(label_list))),
                                       target_names=label_list,
                                       output_dict=True)
            # Get accuracy(micro f1)
            if 'micro avg' not in cr.keys():
                batch_acc = list(cr.items())[len(label_list)][1]
            else:
                # If at least one of labels does not exists in mini-batch, use micro average instead
                batch_acc = cr['micro avg']['f1-score']
            # macro f1
            batch_macro_f1 = cr['macro avg']['f1-score']

            # accumulate measures
            grad_accu = args.gradient_accumulation_steps
            if grad_accu > 1:
                loss /= grad_accu
                batch_acc /= grad_accu
                batch_macro_f1 /= grad_accu

            if args.fp16:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            train_loss += loss.item()
            train_acc += batch_acc
            train_f1 += batch_macro_f1

            if (global_step + 1) % grad_accu == 0:
                if args.fp16:
                    torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), args.grad_clip_norm)
                else:
                    torch.nn.utils.clip_grad_norm_(model.parameters(), args.grad_clip_norm)

                optimizer.step()
                scheduler.step()
                model.zero_grad()
                global_step += 1
                
                if global_step % args.logging_step == 0:
                    acc_ = (train_acc - logging_acc) / args.logging_step
                    f1_ = (train_f1 - logging_f1) / args.logging_step
                    loss_ = (train_loss - logging_loss) / args.logging_step
                    writer.add_scalars('loss', {'train': loss_}, global_step)
                    writer.add_scalars('acc', {'train': acc_}, global_step)
                    writer.add_scalars('macro_f1', {'train': f1_}, global_step)

                    logger.info('[{}/{}], trn loss : {:.3f}, trn acc : {:.3f}, macro f1 : {:.3f}'.format(
                        global_step, t_total, loss_, acc_, f1_
                    ))
                    logging_acc, logging_f1, logging_loss = train_acc, train_f1, train_loss

                    # Get f1 score for each label
                    f1_results = [(l, r['f1-score']) for i, (l, r) in enumerate(cr.items()) if i < len(label_list)]
                    f1_log = "\n".join(["{} : {}".format(l, f) for l, f in f1_results])
                    logger.info("\n\n***f1-score***\n" + f1_log + "\n\n***confusion matrix***\n{}".format(
                        confusion_matrix(y_train.tolist(), y_max.tolist())))

        # Validation
        val_loss, val_acc, val_macro_f1, _ = evaluate(args, dev_loader, model, device)
        val_result = '[{}/{}] val loss : {:.3f}, val acc : {:.3f}. val macro f1 : {:.3f}'.format(
            global_step, t_total, val_loss, val_acc, val_macro_f1
        )

        writer.add_scalars('loss', {'val': val_loss}, global_step)
        writer.add_scalars('acc', {'val': val_acc}, global_step)
        writer.add_scalars('macro_f1', {'val': val_macro_f1}, global_step)
        logger.info(val_result)
        total_result.append(val_result)

        if val_loss < best_val_loss:
            # Save model checkpoints
            torch.save(model.state_dict(), os.path.join(save_path, 'best_model.bin'))
            torch.save(args, os.path.join(save_path, 'training_args.bin'))
            logger.info('Saving model checkpoint to %s', save_path)
            best_val_loss = val_loss
            best_val_acc = val_acc
            best_val_macro_f1 = val_macro_f1

    # Save results in 'model_saved_finetuning/results.csv'
    results = {
        'val_loss': best_val_loss,
        'val_acc': best_val_acc,
        'val_macro_f1' : best_val_macro_f1,
        'save_dir': save_path,
        'pretrained_path': pretrained_path,
    }
    result_writer.update(args, **results)
    return global_step, loss_, acc_, best_val_loss, best_val_acc, total_result
except Exception as e:
    logging.critical("Unexpected error : %s", e)
    sys.exit()

# print(DIRNAME)

pretrained_model_path = os.path.join(MODEL_ABS_PATH, constant.MODEL_BIN_NAME)
# print(pretrained_model_path)

config_path = os.path.join(DIRNAME, constant.BERT_CONFIG_NAME)
# print(config_path)

pretrained = torch.load(pretrained_model_path, map_location='cpu')
bert_config = BertConfig(config_path)
bert_config.num_labels = 7

model = BertForEmotionClassification(bert_config)
model.load_state_dict(pretrained, strict=False)
model.eval()
softmax = torch.nn.Softmax(dim=1)

tokenizer, vocab = get_pretrained_model('etri')

# '공포', '놀람', '분노', '슬픔', '중립', '행복', '혐오'
# 'angry', 'surprise', 'angry', 'sad', 'neutral', 'joy', 'disgust'
obj = dict()
emotion = ['scare', 'surprise', 'angry', 'sad', 'neutral', 'joy', 'disgust']


def get_prediction(sentence):
def convert_roberta_checkpoint_to_pytorch(roberta_checkpoint_path,
                                          pytorch_dump_folder_path,
                                          classification_head):
    """
    Copy/paste/tweak roberta's weights to our BERT structure.
    """
    roberta = FairseqRobertaModel.from_pretrained(roberta_checkpoint_path)
    roberta.eval()  # disable dropout
    config = BertConfig(
        vocab_size_or_config_json_file=50265,
        hidden_size=roberta.args.encoder_embed_dim,
        num_hidden_layers=roberta.args.encoder_layers,
        num_attention_heads=roberta.args.encoder_attention_heads,
        intermediate_size=roberta.args.encoder_ffn_embed_dim,
        max_position_embeddings=514,
        type_vocab_size=1,
        layer_norm_eps=1e-5,  # PyTorch default used in fairseq
    )
    if classification_head:
        config.num_labels = roberta.args.num_classes
    print("Our BERT config:", config)

    model = RobertaForSequenceClassification(
        config) if classification_head else RobertaForMaskedLM(config)
    model.eval()

    # Now let's copy all the weights.
    # Embeddings
    roberta_sent_encoder = roberta.model.decoder.sentence_encoder
    model.roberta.embeddings.word_embeddings.weight = roberta_sent_encoder.embed_tokens.weight
    model.roberta.embeddings.position_embeddings.weight = roberta_sent_encoder.embed_positions.weight
    model.roberta.embeddings.token_type_embeddings.weight.data = torch.zeros_like(
        model.roberta.embeddings.token_type_embeddings.weight
    )  # just zero them out b/c RoBERTa doesn't use them.
    model.roberta.embeddings.LayerNorm.weight = roberta_sent_encoder.emb_layer_norm.weight
    model.roberta.embeddings.LayerNorm.bias = roberta_sent_encoder.emb_layer_norm.bias

    for i in range(config.num_hidden_layers):
        # Encoder: start of layer
        layer: BertLayer = model.roberta.encoder.layer[i]
        roberta_layer: TransformerSentenceEncoderLayer = roberta_sent_encoder.layers[
            i]

        ### self attention
        self_attn: BertSelfAttention = layer.attention.self
        assert (roberta_layer.self_attn.in_proj_weight.shape == torch.Size(
            (3 * config.hidden_size, config.hidden_size)))
        # we use three distinct linear layers so we split the source layer here.
        self_attn.query.weight.data = roberta_layer.self_attn.in_proj_weight[:
                                                                             config
                                                                             .
                                                                             hidden_size, :]
        self_attn.query.bias.data = roberta_layer.self_attn.in_proj_bias[:
                                                                         config
                                                                         .
                                                                         hidden_size]
        self_attn.key.weight.data = roberta_layer.self_attn.in_proj_weight[
            config.hidden_size:2 * config.hidden_size, :]
        self_attn.key.bias.data = roberta_layer.self_attn.in_proj_bias[
            config.hidden_size:2 * config.hidden_size]
        self_attn.value.weight.data = roberta_layer.self_attn.in_proj_weight[
            2 * config.hidden_size:, :]
        self_attn.value.bias.data = roberta_layer.self_attn.in_proj_bias[
            2 * config.hidden_size:]

        ### self-attention output
        self_output: BertSelfOutput = layer.attention.output
        assert (self_output.dense.weight.shape ==
                roberta_layer.self_attn.out_proj.weight.shape)
        self_output.dense.weight = roberta_layer.self_attn.out_proj.weight
        self_output.dense.bias = roberta_layer.self_attn.out_proj.bias
        self_output.LayerNorm.weight = roberta_layer.self_attn_layer_norm.weight
        self_output.LayerNorm.bias = roberta_layer.self_attn_layer_norm.bias

        ### intermediate
        intermediate: BertIntermediate = layer.intermediate
        assert (
            intermediate.dense.weight.shape == roberta_layer.fc1.weight.shape)
        intermediate.dense.weight = roberta_layer.fc1.weight
        intermediate.dense.bias = roberta_layer.fc1.bias

        ### output
        bert_output: BertOutput = layer.output
        assert (
            bert_output.dense.weight.shape == roberta_layer.fc2.weight.shape)
        bert_output.dense.weight = roberta_layer.fc2.weight
        bert_output.dense.bias = roberta_layer.fc2.bias
        bert_output.LayerNorm.weight = roberta_layer.final_layer_norm.weight
        bert_output.LayerNorm.bias = roberta_layer.final_layer_norm.bias
        #### end of layer

    if classification_head:
        model.classifier.dense.weight = roberta.model.classification_heads[
            'mnli'].dense.weight
        model.classifier.dense.bias = roberta.model.classification_heads[
            'mnli'].dense.bias
        model.classifier.out_proj.weight = roberta.model.classification_heads[
            'mnli'].out_proj.weight
        model.classifier.out_proj.bias = roberta.model.classification_heads[
            'mnli'].out_proj.bias
    else:
        # LM Head
        model.lm_head.dense.weight = roberta.model.decoder.lm_head.dense.weight
        model.lm_head.dense.bias = roberta.model.decoder.lm_head.dense.bias
        model.lm_head.layer_norm.weight = roberta.model.decoder.lm_head.layer_norm.weight
        model.lm_head.layer_norm.bias = roberta.model.decoder.lm_head.layer_norm.bias
        model.lm_head.decoder.weight = roberta.model.decoder.lm_head.weight
        model.lm_head.bias = roberta.model.decoder.lm_head.bias

    # Let's check that we get the same results.
    input_ids: torch.Tensor = roberta.encode(SAMPLE_TEXT).unsqueeze(
        0)  # batch of size 1

    our_output = model(input_ids)[0]
    if classification_head:
        their_output = roberta.model.classification_heads['mnli'](
            roberta.extract_features(input_ids))
    else:
        their_output = roberta.model(input_ids)[0]
    print(our_output.shape, their_output.shape)
    max_absolute_diff = torch.max(torch.abs(our_output - their_output)).item()
    print(f"max_absolute_diff = {max_absolute_diff}")  # ~ 1e-7
    success = torch.allclose(our_output, their_output, atol=1e-3)
    print("Do both models output the same tensors?",
          "🔥" if success else "💩")
    if not success:
        raise Exception("Something went wRoNg")

    print(f"Saving model to {pytorch_dump_folder_path}")
    model.save_pretrained(pytorch_dump_folder_path)