コード例 #1
0
def main():
    # コマンドライン引数の取得(このファイル上部のドキュメントから自動生成)
    args = docopt(__doc__)
    pprint(args)

    # パラメータの取得
    weights    = args['<pretrained_weights>']
    dir_name   = args['--dir_name']
    lr         = float(args['--lr'])
    seq_len    = int(args['--seq_len'])
    max_epoch  = int(args['--max_epoch'])
    batch_size = int(args['--batch_size'])
    num_train  = int(args['--num_train'])
    num_valid  = int(args['--num_valid'])

    # 学習済みモデルの選択
    weights = args['<pretrained_weights>']

    # モデルアーキテクチャの選択
    if   weights == 'bert-base-uncased':
        tokenizer = BertTokenizer.from_pretrained(weights)
        config    = BertConfig(num_labels=4)
        model     = BertForSequenceClassification.from_pretrained(weights, config=config)

    elif weights == 'bert-large-uncased':
        tokenizer = BertTokenizer.from_pretrained(weights)
        config    = BertConfig(hidden_size=1024, num_hidden_layers=24,
                               num_attention_heads=16, intermediate_size=4096,
                               num_labels=4)
        model     = BertForSequenceClassification.from_pretrained(weights, config=config)

    elif weights in ['albert-base-v1', 'albert-large-v1', 'albert-base-v2', 'albert-large-v2']:
        tokenizer = AlbertTokenizer.from_pretrained(weights)
        config    = AlbertConfig.from_pretrained(weights, num_labels=4)
        model     = AlbertForSequenceClassification.from_pretrained(weights, config=config)

    # 使用デバイスの取得
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    # データの読み込みとデータセットの作成
    encoder = TwinPhraseEncoder(tokenizer, seq_len)

    train_dataset = WordnetDataset(mode='train', num_data=num_train, transform=encoder)
    valid_dataset = WordnetDataset(mode='valid', num_data=num_valid, transform=encoder)
    train_loader = data.DataLoader(train_dataset, batch_size, shuffle=True)
    valid_loader = data.DataLoader(valid_dataset, batch_size, shuffle=True)

    # 最適化法の定義
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # 保存用ディレクトリの作成
    log_dir   = f'../logs/{dir_name}'
    model_dir = f'../models/{dir_name}'
    os.makedirs(log_dir  , exist_ok=True)
    os.makedirs(model_dir, exist_ok=True)

    # 学習ログの記録(TensorBoard)
    train_writer = SummaryWriter(log_dir=f'{log_dir}/train')
    valid_writer = SummaryWriter(log_dir=f'{log_dir}/valid')

    # 学習
    for epoch in range(1, max_epoch+1):
        print('='*27 + f' Epoch {epoch:0>2} ' + '='*27)

        # Training
        loss, accu = train_model(model, optimizer, train_loader, device)
        print(f'|  Training    |  loss-avg : {loss:>8.6f}  |  accuracy : {accu:>8.3%}  |')
        train_writer.add_scalar('loss', loss, epoch)
        train_writer.add_scalar('accu', accu, epoch)

        # Validation
        loss, accu = valid_model(model, optimizer, valid_loader, device)
        print(f'|  Validation  |  loss-avg : {loss:>8.6f}  |  accuracy : {accu:>8.3%}  |')
        valid_writer.add_scalar('loss', loss, epoch)
        valid_writer.add_scalar('accu', accu, epoch)

        # モデルの保存
        torch.save(model.state_dict(), f'{model_dir}/epoch-{epoch:0>2}.pkl')

    train_writer.close()
    valid_writer.close()
コード例 #2
0
                              batch_size=batch_size)

# Create the DataLoader for our validation set.
validation_data = TensorDataset(validation_inputs, validation_masks,
                                validation_labels)
validation_sampler = SequentialSampler(validation_data)
validation_dataloader = DataLoader(validation_data,
                                   sampler=validation_sampler,
                                   batch_size=batch_size)

# Load BertForSequenceClassification, the pretrained Bert model with a single
# linear classification layer on top.
model = BertForSequenceClassification.from_pretrained(
    "dkleczek/bert-base-polish-cased-v1",  # Use the 12-layer Bert model, with an uncased vocab.
    num_labels=2,  # The number of output labels--2 for binary classification.
    # You can increase this for multi-class tasks.
    output_attentions=False,  # Whether the model returns attentions weights.
    output_hidden_states=False,  # Whether the model returns all hidden-states.
    attention_probs_dropout_prob=0.1,
    hidden_dropout_prob=0.1)

# Tell pytorch to run this model on the GPU.
model.cuda()

# Note: AdamW is a class from the huggingface library (as opposed to pytorch)
# I believe the 'W' stands for 'Weight Decay fix"
optimizer = AdamW(
    model.parameters(),
    lr=5e-5,  # args.learning_rate - default is 5e-5, our notebook had 2e-5
    eps=1e-8,  # args.adam_epsilon  - default is 1e-8.
    weight_decay=0.05)
コード例 #3
0
from transformers import BertForSequenceClassification, AdamW
from utils.config import bert_config_from_file

config = bert_config_from_file('conf/bert.json')
model = BertForSequenceClassification.from_pretrained(
    config['pretrained'], cache_dir=config['cache_dir'], num_labels=5)
no_decay = ['bias', 'LayerNorm.weight']
optimizer_grouped_parameters = [{
    'params': [
        p for n, p in model.named_parameters()
        if not any(nd in n for nd in no_decay)
    ],
    'weight_decay':
    0.01
}, {
    'params': [
        p for n, p in model.named_parameters()
        if any(nd in n for nd in no_decay)
    ],
    'weight_decay':
    0.0
}]
optimizer = AdamW(optimizer_grouped_parameters, lr=2e-5, eps=1e-8)
コード例 #4
0
    def __init__(self, weight="bert-base-japanese-whole-word-masking"):
        self.weight = weight

        self.tokenizer = BertJapaneseTokenizer.from_pretrained(self.weight)
        self.model = BertForSequenceClassification.from_pretrained(self.weight)
コード例 #5
0
    def __init__(self, options_name: str = "bert-base-cased"):
        super(BERT, self).__init__()

        options_name = options_name
        self.encoder = BertForSequenceClassification.from_pretrained(
            options_name)
コード例 #6
0
import torch
from transformers import BertTokenizer, BertForSequenceClassification
import torch.nn as nn
import csv
import pickle
import os
from pycocotools.coco import COCO
# load model
state_dict = torch.load('./checkpoints/moco.p')
# for para in model:
#     print(para,"\t",model[para].size())

# create model
net = BertForSequenceClassification.from_pretrained(
    'bert-base-uncased',
    num_labels=80,
    output_attentions=False,
    output_hidden_states=False,
)

# for para in net.state_dict():
#     print(para,"\t",net.state_dict()[para].size())

# fc_features = net.classifier.in_features
# net.classifier = nn.Linear(fc_features,2)

# load parameters
#net.load_state_dict(state_dict)

# for para in net.state_dict():
#     print(para,"\t",net.state_dict()[para].size())
コード例 #7
0
 def load_model(self, path_model, path_config):
     # self.model = BertForSequenceClassification(BertConfig(path_config), num_labels=self.num_classes,output_attentions = False, output_hidden_states = True)
     self.model = BertForSequenceClassification.from_pretrained(path_model)
     self.tokenizer = BertTokenizer.from_pretrained(path_model)
     # self.model.load_state_dict(torch.load(path_model))
     self.__init_model()
コード例 #8
0
# Train and evaluate using tf.keras.Model.fit()
train_steps = train_examples//BATCH_SIZE
valid_steps = valid_examples//EVAL_BATCH_SIZE

history = model.fit(train_dataset, epochs=EPOCHS, steps_per_epoch=train_steps,
                    validation_data=valid_dataset, validation_steps=valid_steps)

# Save TF2 model
os.makedirs('./save/', exist_ok=True)
model.save_pretrained('./save/')

if TASK == "mrpc":
    # Load the TensorFlow model in PyTorch for inspection
    # This is to demo the interoperability between the two frameworks, you don't have to 
    # do this in real life (you can run the inference on the TF model).
    pytorch_model = BertForSequenceClassification.from_pretrained('./save/', from_tf=True)

    # Quickly test a few predictions - MRPC is a paraphrasing task, let's see if our model learned the task
    sentence_0 = 'This research was consistent with his findings.'
    sentence_1 = 'His findings were compatible with this research.'
    sentence_2 = 'His findings were not compatible with this research.'
    inputs_1 = tokenizer.encode_plus(sentence_0, sentence_1, add_special_tokens=True, return_tensors='pt')
    inputs_2 = tokenizer.encode_plus(sentence_0, sentence_2, add_special_tokens=True, return_tensors='pt')

    del inputs_1["special_tokens_mask"]
    del inputs_2["special_tokens_mask"]

    pred_1 = pytorch_model(**inputs_1)[0].argmax().item()
    pred_2 = pytorch_model(**inputs_2)[0].argmax().item()
    print('sentence_1 is', 'a paraphrase' if pred_1 else 'not a paraphrase', 'of sentence_0')
    print('sentence_2 is', 'a paraphrase' if pred_2 else 'not a paraphrase', 'of sentence_0')
def main():
    # 1. Basic setup
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--pytorch-only',
                        action='store_true',
                        default=False,
                        help='disables ONNX Runtime training')
    parser.add_argument('--batch-size',
                        type=int,
                        default=32,
                        metavar='N',
                        help='input batch size for training (default: 32)')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=64,
                        metavar='N',
                        help='input batch size for testing (default: 64)')
    parser.add_argument('--view-graphs',
                        action='store_true',
                        default=False,
                        help='views forward and backward graphs')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--epochs',
                        type=int,
                        default=4,
                        metavar='N',
                        help='number of epochs to train (default: 4)')
    parser.add_argument('--seed',
                        type=int,
                        default=42,
                        metavar='S',
                        help='random seed (default: 42)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=40,
        metavar='N',
        help=
        'how many batches to wait before logging training status (default: 40)'
    )
    parser.add_argument(
        '--train-steps',
        type=int,
        default=-1,
        metavar='N',
        help=
        'number of steps to train. Set -1 to run through whole dataset (default: -1)'
    )
    parser.add_argument(
        '--log-level',
        choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
        default='WARNING',
        help='Log level (default: WARNING)')
    parser.add_argument(
        '--num-hidden-layers',
        type=int,
        default=1,
        metavar='H',
        help=
        'Number of hidden layers for the BERT model. A vanila BERT has 12 hidden layers (default: 1)'
    )
    parser.add_argument('--data-dir',
                        type=str,
                        default='./cola_public/raw',
                        help='Path to the bert data directory')

    args = parser.parse_args()

    # Device (CPU vs CUDA)
    if torch.cuda.is_available() and not args.no_cuda:
        device = torch.device("cuda")
        print('There are %d GPU(s) available.' % torch.cuda.device_count())
        print('We will use the GPU:', torch.cuda.get_device_name(0))
    else:
        print('No GPU available, using the CPU instead.')
        device = torch.device("cpu")

    # Set log level
    numeric_level = getattr(logging, args.log_level.upper(), None)
    if not isinstance(numeric_level, int):
        raise ValueError('Invalid log level: %s' % args.log_level)
    logging.basicConfig(level=numeric_level)

    # 2. Dataloader
    train_dataloader, validation_dataloader = load_dataset(args)

    # 3. Modeling
    # Load BertForSequenceClassification, the pretrained BERT model with a single
    # linear classification layer on top.
    config = AutoConfig.from_pretrained(
        "bert-base-uncased",
        num_labels=2,
        num_hidden_layers=args.num_hidden_layers,
        output_attentions=False,  # Whether the model returns attentions weights.
        output_hidden_states=
        False,  # Whether the model returns all hidden-states.
    )
    model = BertForSequenceClassification.from_pretrained(
        "bert-base-uncased",  # Use the 12-layer BERT model, with an uncased vocab.
        config=config,
    )

    if not args.pytorch_only:
        # Just for future debugging
        debug_options = DebugOptions(
            save_onnx=False,
            onnx_prefix='BertForSequenceClassificationAutoCast')

        model = ORTModule(model, debug_options)

    model._torch_module._execution_manager(
        is_training=True)._enable_grad_acc_optimization = True

    # Tell pytorch to run this model on the GPU.
    if torch.cuda.is_available() and not args.no_cuda:
        model.cuda()

    # Note: AdamW is a class from the huggingface library (as opposed to pytorch)
    optimizer = AdamW(
        model.parameters(),
        lr=2e-5,  # args.learning_rate - default is 5e-5, our notebook had 2e-5
        eps=1e-8  # args.adam_epsilon  - default is 1e-8.
    )

    # Authors recommend between 2 and 4 epochs
    # Total number of training steps is number of batches * number of epochs.
    total_steps = len(train_dataloader) * args.epochs

    # Create the learning rate scheduler.
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=0,  # Default value in run_glue.py
        num_training_steps=total_steps)
    scaler = torch.cuda.amp.GradScaler()

    # Seed
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    onnxruntime.set_seed(args.seed)
    if torch.cuda.is_available() and not args.no_cuda:
        torch.cuda.manual_seed_all(args.seed)

    # 4. Train loop (fine-tune)
    total_training_time, total_test_time, epoch_0_training, validation_accuracy = 0, 0, 0, 0
    for epoch_i in range(0, args.epochs):
        total_training_time += train(model, optimizer, scaler, scheduler,
                                     train_dataloader, epoch_i, device, args)
        if not args.pytorch_only and epoch_i == 0:
            epoch_0_training = total_training_time
        test_time, validation_accuracy = test(model, validation_dataloader,
                                              device, args)
        total_test_time += test_time

    assert validation_accuracy > 0.5

    print('\n======== Global stats ========')
    if not args.pytorch_only:
        estimated_export = 0
        if args.epochs > 1:
            estimated_export = epoch_0_training - (
                total_training_time - epoch_0_training) / (args.epochs - 1)
            print("  Estimated ONNX export took:               {:.4f}s".format(
                estimated_export))
        else:
            print(
                "  Estimated ONNX export took:               Estimate available when epochs > 1 only"
            )
        print("  Accumulated training without export took: {:.4f}s".format(
            total_training_time - estimated_export))
    print("  Accumulated training took:                {:.4f}s".format(
        total_training_time))
    print("  Accumulated validation took:              {:.4f}s".format(
        total_test_time))
                    help="logging steps")
parser.add_argument("--learning_rate",
                    type=float,
                    default=5e-5,
                    help="learning rate")
parser.add_argument(
    "--output_dir",
    type=str,
    default='./checkpoints',
    help="directory where check points are saved during training")

args = parser.parse_args()

# Load the BERT base cased tokenizer and pre-trained model
tokenizer = BertTokenizerFast.from_pretrained(args.model_type)
seq_model = BertForSequenceClassification.from_pretrained(args.model_type,
                                                          num_labels=2)

# Load the dataset
training_texts, training_spans = load_dataset(args.train_dir)
val_texts, val_spans = load_dataset(args.dev_dir)
test_texts, test_spans = load_dataset(args.test_dir)

# Split each post into sentences
training_sentences, training_labels, training_sentence_spans, tr_post_to_sentence_num = split_into_setences(
    training_texts, training_spans)
val_sentences, val_labels, val_sentence_spans, val_post_to_sentence_num = split_into_setences(
    val_texts, val_spans)
test_sentences, test_labels, test_sentence_spans, test_post_to_sentence_num = split_into_setences(
    test_texts, test_spans)

# Tokenize the sentences
コード例 #11
0
ファイル: toxicity_ddp_lab.py プロジェクト: sli0111/v2
def dist_train(rank, world_size, data_dir, epochs):

    logger.info("Running on rank: %i", rank)
    seed = 1234
    lr = 2e-5
    batch_size = 32
    accumulation_steps = 1
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

    torch.cuda.set_device(rank)

    logger.info('loading data..')
    train_dataset, val_dataset = load_data(data_dir, seed)
    y_columns = ['target']

    # use torch.utils.data.distributed.DistributedSampler here to create a sampler.
    train_loader = torch.utils.data.DataLoader(
        dataset=train_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=0,
        pin_memory=True,
        # uncomment this once you have your sampler working
        #                                               sampler=train_sampler
    )
    logger.info("len of train_loader: %i", len(train_loader))

    model = BertForSequenceClassification.from_pretrained(
        'bert-base-uncased', num_labels=len(y_columns)).cuda()
    # move model to GPU with id rank
    # use nn.parallel.DistributedDataParallel to put your model on multiple GPUs
    # replace the line below with your own
    ddp_model = model

    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay':
        0.01
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }]

    optimizer = BertAdam(optimizer_grouped_parameters, lr=lr)

    tq = trange(epochs)
    for epoch in tq:
        ddp_model.train()
        ddp_train(rank, world_size, ddp_model, train_loader, optimizer,
                  accumulation_steps)

    if rank == 0:
        torch.save(ddp_model.state_dict(), CHECKPOINT_PATH)
コード例 #12
0
ファイル: bert.py プロジェクト: aiida-/EmotionDetection
def CV(input_ids, attention_masks, pred_labels, dataset, FILE, device):
    # -------------------------------------------------------------------

    # Use train_test_split to split our data into train and validation sets for
    # training
    for LABEL in pred_labels:

        print("working on", LABEL, "\n")
        labels = dataset[LABEL].values
        max_f1 = 0

        # Use 90% for training and 10% for validation.
        train_inputs, validation_inputs, train_labels, validation_labels = \
            train_test_split(input_ids, labels, random_state=2018, test_size=0.1)
        # Do the same for the masks.
        train_masks, validation_masks, _, _ = train_test_split(attention_masks, labels,
                                                               random_state=2018, test_size=0.1)

        validation_accuracies = []
        validation_precisions = []
        validation_f1s = []
        validation_recalls = []
        num_folds = 10
        skf = StratifiedKFold(n_splits=num_folds, shuffle=True)
        fold = 1
        for train_idx, test_idx in skf.split(input_ids, labels):
            print("----------------------------fold = " + str(fold) + "-----------------------------")
            fold += 1
            train_inputs = input_ids[train_idx]
            train_labels = labels[train_idx]
            train_masks = [attention_masks[idx] for idx in train_idx]
            validation_inputs = input_ids[test_idx]
            validation_labels = labels[test_idx]
            validation_masks = [attention_masks[idx] for idx in test_idx]
            # -------------------------------------------------------------------

            # Convert all inputs and labels into torch tensors, the required datatype
            # for our model.
            train_inputs = torch.tensor(train_inputs)
            validation_inputs = torch.tensor(validation_inputs)

            train_labels = torch.tensor(train_labels)
            validation_labels = torch.tensor(validation_labels)

            train_masks = torch.tensor(train_masks)
            validation_masks = torch.tensor(validation_masks)

            # -------------------------------------------------------------------

            # This section is basically aiming to save runtime memory using torch DataLoader class :)

            # The DataLoader needs to know our batch size for training, so we specify it
            # here.
            # For fine-tuning BERT on a specific task, the authors recommend a batch size of
            # 16 or 32.

            batch_size = 32

            # Create the DataLoader for our training set.
            train_data = TensorDataset(train_inputs, train_masks, train_labels)
            train_sampler = RandomSampler(train_data)
            train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

            # Create the DataLoader for our validation set.
            validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels)
            validation_sampler = SequentialSampler(validation_data)
            validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=batch_size)

            # -------------------------------------------------------------------

            # Load BertForSequenceClassification, the pretrained BERT model with a single
            # linear classification layer on top.
            model = BertForSequenceClassification.from_pretrained(
                "bert-base-uncased",  # Use the 12-layer BERT model, with an uncased vocab.
                num_labels=6,  # The number of output labels--2 for binary classification.

                # You can increase this for multi-class tasks.
                output_attentions=False,  # Whether the model returns attentions weights.
                output_hidden_states=False,  # Whether the model returns all hidden-states.
            )

            # Tell pytorch to run this model on the GPU.
            model.cuda()

            # -------------------------------------------------------------------

            # Get all of the model's parameters as a list of tuples.
            params = list(model.named_parameters())

            # print('The BERT model has {:} different named parameters.\n'.format(len(params)))
            #
            # print('==== Embedding Layer ====\n')
            #
            # for p in params[0:5]:
            #     print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size()))))
            #
            # print('\n==== First Transformer ====\n')
            #
            # for p in params[5:21]:
            #     print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size()))))
            #
            # print('\n==== Output Layer ====\n')
            #
            # for p in params[-4:]:
            #     print("{:<55} {:>12}".format(p[0], str(tuple(p[1].size()))))

            # -------------------------------------------------------------------

            """
            For the purposes of fine-tuning, the authors recommend choosing from the following values:
    
                Batch size: 16, 32 (We chose 32 when creating our DataLoaders).
                Learning rate (Adam): 5e-5, 3e-5, 2e-5 (We'll use 2e-5).
                Number of epochs: 2, 3, 4 (We'll use 4).
    
            """

            # Note: AdamW is a class from the huggingface library (as opposed to pytorch)
            # I believe the 'W' stands for 'Weight Decay fix"
            optimizer = AdamW(model.parameters(),
                              lr=2e-5,  # args.learning_rate - default is 5e-5, our notebook had 2e-5
                              eps=1e-8  # args.adam_epsilon  - default is 1e-8.
                              )

            # Number of training epochs (authors recommend between 2 and 4)
            epochs = 2

            # Total number of training steps is number of batches * number of epochs.
            total_steps = len(train_dataloader) * epochs

            # Create the learning rate scheduler.
            scheduler = get_linear_schedule_with_warmup(optimizer,
                                                        num_warmup_steps=0,  # Default value in run_glue.py
                                                        num_training_steps=total_steps)


            # -------------------------------------------------------------------

            # Function to calculate the accuracy of our predictions vs labels
            def flat_accuracy(preds, labels):
                pred_flat = np.argmax(preds, axis=1).flatten()
                labels_flat = labels.flatten()
                return np.sum(pred_flat == labels_flat) / len(labels_flat)


            def flat_precision(preds, labels, num_labels):
                pred_flat = np.argmax(preds, axis=1).flatten()
                labels_flat = labels.flatten()
                return precision_score(labels_flat, pred_flat, average="micro")


            def flat_recall(preds, labels, num_labels):
                pred_flat = np.argmax(preds, axis=1).flatten()
                labels_flat = labels.flatten()
                return recall_score(labels_flat, pred_flat, average="micro")


            def format_time(elapsed):
                '''
                Takes a time in seconds and returns a string hh:mm:ss
                '''
                # Round to the nearest second.
                elapsed_rounded = int(round((elapsed)))

                # Format as hh:mm:ss
                return str(datetime.timedelta(seconds=elapsed_rounded))


            # -------------------------------------------------------------------

            # This training code is based on the `run_glue.py` script here:
            # https://github.com/huggingface/transformers/blob/5bfcd0485ece086ebcbed2d008813037968a9e58/examples/run_glue.py#L128

            # Set the seed value all over the place to make this reproducible.
            seed_val = 42

            random.seed(seed_val)
            np.random.seed(seed_val)
            torch.manual_seed(seed_val)
            torch.cuda.manual_seed_all(seed_val)

            # Store the average loss after each epoch so we can plot them.
            loss_values = []

            # For each epoch...
            for epoch_i in range(0, epochs):

                # ========================================
                #               Training
                # ========================================

                # Perform one full pass over the training set.

                print("")
                print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
                print('Training...')

                # Measure how long the training epoch takes.
                t0 = time.time()

                # Reset the total loss for this epoch.
                total_loss = 0

                # Put the model into training mode. Don't be mislead--the call to
                # `train` just changes the *mode*, it doesn't *perform* the training.
                # `dropout` and `batchnorm` layers behave differently during training
                # vs. test (source: https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch)
                model.train()

                # For each batch of training data...
                for step, batch in enumerate(train_dataloader):

                    # Progress update every 40 batches.
                    if step % 40 == 0 and not step == 0:
                        # Calculate elapsed time in minutes.
                        elapsed = format_time(time.time() - t0)

                        # Report progress.
                        print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))

                    # Unpack this training batch from our dataloader.
                    #
                    # As we unpack the batch, we'll also copy each tensor to the GPU using the
                    # `to` method.
                    #
                    # `batch` contains three pytorch tensors:
                    #   [0]: input ids
                    #   [1]: attention masks
                    #   [2]: labels
                    b_input_ids = batch[0].to(device)
                    b_input_mask = batch[1].to(device)
                    b_labels = batch[2].to(device)

                    # Always clear any previously calculated gradients before performing a
                    # backward pass. PyTorch doesn't do this automatically because
                    # accumulating the gradients is "convenient while training RNNs".
                    # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)
                    model.zero_grad()

                    # Perform a forward pass (evaluate the model on this training batch).
                    # This will return the loss (rather than the model output) because we
                    # have provided the `labels`.
                    # The documentation for this `model` function is here:
                    # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification
                    outputs = model(b_input_ids,
                                    token_type_ids=None,
                                    attention_mask=b_input_mask,
                                    labels=b_labels)

                    # The call to `model` always returns a tuple, so we need to pull the
                    # loss value out of the tuple.
                    loss = outputs[0]

                    # Accumulate the training loss over all of the batches so that we can
                    # calculate the average loss at the end. `loss` is a Tensor containing a
                    # single value; the `.item()` function just returns the Python value
                    # from the tensor.
                    total_loss += loss.item()

                    # Perform a backward pass to calculate the gradients.
                    loss.backward()

                    # Clip the norm of the gradients to 1.0.
                    # This is to help prevent the "exploding gradients" problem.
                    torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

                    # Update parameters and take a step using the computed gradient.
                    # The optimizer dictates the "update rule"--how the parameters are
                    # modified based on their gradients, the learning rate, etc.
                    optimizer.step()

                    # Update the learning rate.
                    scheduler.step()

                # Calculate the average loss over the training data.
                avg_train_loss = total_loss / len(train_dataloader)

                # Store the loss value for plotting the learning curve.
                loss_values.append(avg_train_loss)

                print("")
                print("  Average training loss: {0:.2f}".format(avg_train_loss))
                print("  Training epcoh took: {:}".format(format_time(time.time() - t0)))

                # ========================================
                #               Validation
                # ========================================
                # After the completion of each training epoch, measure our performance on
                # our validation set.

                print("")
                print("Running Validation...")

                t0 = time.time()

                # Put the model in evaluation mode--the dropout layers behave differently
                # during evaluation.
                model.eval()

                # Tracking variables
                eval_loss, eval_accuracy, eval_precision, eval_recall = 0, 0, 0, 0
                nb_eval_steps, nb_eval_examples = 0, 0

                # Evaluate data for one epoch
                for batch in validation_dataloader:
                    # Add batch to GPU
                    batch = tuple(t.to(device) for t in batch)

                    # Unpack the inputs from our dataloader
                    b_input_ids, b_input_mask, b_labels = batch

                    # Telling the model not to compute or store gradients, saving memory and
                    # speeding up validation
                    with torch.no_grad():
                        # Forward pass, calculate logit predictions.
                        # This will return the logits rather than the loss because we have
                        # not provided labels.
                        # token_type_ids is the same as the "segment ids", which
                        # differentiates sentence 1 and 2 in 2-sentence tasks.
                        # The documentation for this `model` function is here:
                        # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification
                        outputs = model(b_input_ids,
                                        token_type_ids=None,
                                        attention_mask=b_input_mask)

                    # Get the "logits" output by the model. The "logits" are the output
                    # values prior to applying an activation function like the softmax.
                    logits = outputs[0]

                    # Move logits and labels to CPU
                    logits = logits.detach().cpu().numpy()
                    label_ids = b_labels.to('cpu').numpy()

                    # Calculate the accuracy for this batch of test sentences.
                    tmp_eval_accuracy = flat_accuracy(logits, label_ids)
                    tmp_eval_precision = flat_precision(logits, label_ids)
                    tmp_eval_recall = flat_recall(logits, label_ids)

                    # Accumulate the total accuracy.
                    eval_accuracy += tmp_eval_accuracy
                    eval_precision += tmp_eval_precision
                    eval_recall += tmp_eval_recall

                    # Track the number of batches
                    nb_eval_steps += 1

                # Report the final accuracy for this validation run.
                print("  Accuracy: {0:.2f}".format(eval_accuracy / nb_eval_steps))
                validation_accuracies.append((eval_accuracy / nb_eval_steps))
                print("  Precision: {0:.2f}".format(eval_precision / nb_eval_steps))
                validation_precisions.append((eval_precision / nb_eval_steps))
                print("  Recall: {0:.2f}".format(eval_recall / nb_eval_steps))
                validation_recalls.append((eval_recall / nb_eval_steps))
                temp_precision = validation_precisions[-1]
                temp_recall = validation_recalls[-1]
                if temp_precision * temp_recall == 0:
                    F1 = 0
                else:
                    F1 = 2 * temp_precision * temp_recall / (temp_precision + temp_recall)
                validation_f1s.append(F1)
                print("  F1: {0:.2f}".format(F1))
                if F1 > max_f1:
                    model.save_pretrained("model_" + LABEL)
                    max_f1 = F1
                print("  Validation took: {:}".format(format_time(time.time() - t0)))

            print("")
            print("Training complete!")

        # -------------------------------------------------------------------

        from numpy import asarray
        from numpy import savetxt

        # define data
        data = asarray(validation_recalls)
        # save to csv file
        savetxt(FILE + "_" + LABEL + '_validation_recalls.csv', data, delimiter=',')
        # define data
        data = asarray(validation_precisions)
        # save to csv file
        savetxt(FILE + "_" + LABEL + '_validation_precision.csv', data, delimiter=',')
        data = asarray(validation_accuracies)
        # save to csv file
        savetxt(FILE + "_" + LABEL + '_validation_accuracies.csv', data, delimiter=',')

        data = asarray(validation_f1s)
        # save to csv file
        savetxt(FILE + "_" + LABEL + '_validation_F1.csv', data, delimiter=',')

        # Use plot styling from seaborn.
        sns.set(style='darkgrid')

        # Increase the plot size and font size.
        sns.set(font_scale=1.5)
        plt.rcParams["figure.figsize"] = (12, 6)

        # Plot the learning curve.
        plt.plot(loss_values, 'b-o')

        # Label the plot.
        plt.title("Training loss")
        plt.xlabel("Epoch")
        plt.ylabel("Loss")

        plt.savefig(LABEL + "_" + FILE + "_" + "Loss.png")
コード例 #13
0
ファイル: imdb.py プロジェクト: dmitriydligach/Sandbox
def train():
    """Fine-tune bert using IMDB data"""

    # deal with warnings for now
    os.system('clear')

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print('using gpu:', torch.cuda.get_device_name(gpu_num))

    sentences, labels = load_data()
    print('loaded %d examples and %d labels...' %
          (len(sentences), len(labels)))

    # tokenize and convert to ints
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',
                                              do_lower_case=True)
    tokenized_texts = [tokenizer.tokenize(sent) for sent in sentences]
    input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tokenized_texts]
    input_ids = pad_sequences(input_ids,
                              maxlen=max_len,
                              dtype="long",
                              truncating="post",
                              padding="post")

    # create attention masks
    attention_masks = []
    for seq in input_ids:
        # use 1s for tokens and 0s for padding
        seq_mask = [float(i > 0) for i in seq]
        attention_masks.append(seq_mask)

    # make validation set
    train_inputs, validation_inputs, train_labels, validation_labels = \
      train_test_split(input_ids, labels, test_size=0.1, random_state=0)
    train_masks, validation_masks, _, _ = \
      train_test_split(attention_masks, input_ids, test_size=0.1, random_state=0)

    # convert everything into torch tensors
    train_inputs = torch.tensor(train_inputs)
    validation_inputs = torch.tensor(validation_inputs)
    train_labels = torch.tensor(train_labels)
    validation_labels = torch.tensor(validation_labels)
    train_masks = torch.tensor(train_masks)
    validation_masks = torch.tensor(validation_masks)

    # create iterators for our data
    train_data = TensorDataset(train_inputs, train_masks, train_labels)
    train_sampler = RandomSampler(train_data)
    train_dataloader = DataLoader(train_data,
                                  sampler=train_sampler,
                                  batch_size=batch_size)
    validation_data = TensorDataset(validation_inputs, validation_masks,
                                    validation_labels)
    validation_sampler = SequentialSampler(validation_data)
    validation_dataloader = DataLoader(validation_data,
                                       sampler=validation_sampler,
                                       batch_size=batch_size)

    # load pretrained bert model with a single linear classification layer on top
    model = BertForSequenceClassification.from_pretrained("bert-base-uncased",
                                                          num_labels=2)
    model.cuda()

    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'gamma', 'beta']
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay_rate':
        0.01
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay_rate':
        0.0
    }]

    # this variable contains all of the hyperparemeter information our training loop needs
    optimizer = AdamW(optimizer_grouped_parameters, lr=2e-5)
    scheduler = WarmupLinearSchedule(optimizer,
                                     warmup_steps=num_warmup_steps,
                                     t_total=num_total_steps)

    # store our loss and accuracy for plotting
    train_loss_set = []

    # training loop
    for epoch in trange(epochs, desc="epoch"):

        # Set our model to training mode (as opposed to evaluation mode)
        model.train()

        # Tracking variables
        tr_loss = 0
        nb_tr_examples, nb_tr_steps = 0, 0

        # train for one epoch
        for step, batch in enumerate(train_dataloader):

            # add batch to GPU
            batch = tuple(t.to(device) for t in batch)

            # unpack the inputs from our dataloader
            b_input_ids, b_input_mask, b_labels = batch

            # clear out the gradients (by default they accumulate)
            optimizer.zero_grad()

            # forward pass
            loss, logits = model(b_input_ids,
                                 token_type_ids=None,
                                 attention_mask=b_input_mask,
                                 labels=b_labels)
            train_loss_set.append(loss.item())

            # backward pass
            loss.backward()

            # update parameters and take a step using the computed gradient
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
            optimizer.step()
            scheduler.step()

            # update tracking variables
            tr_loss += loss.item()
            nb_tr_examples += b_input_ids.size(0)
            nb_tr_steps += 1

        print("epoch: {}, loss: {}".format(epoch, tr_loss / nb_tr_steps))

        # put model in evaluation mode to evaluate loss on the validation set
        model.eval()

        # tracking variables
        eval_loss, eval_accuracy = 0, 0
        nb_eval_steps, nb_eval_examples = 0, 0

        # evaluate data for one epoch
        for batch in validation_dataloader:

            # add batch to GPU
            batch = tuple(t.to(device) for t in batch)

            # unpack the inputs from our dataloader
            b_input_ids, b_input_mask, b_labels = batch

            # don't compute or store gradients
            with torch.no_grad():
                # forward pass; only logits returned since labels not provided
                [logits] = model(b_input_ids,
                                 token_type_ids=None,
                                 attention_mask=b_input_mask)

            # move logits and labels to CPU
            logits = logits.detach().cpu().numpy()
            label_ids = b_labels.to('cpu').numpy()

            tmp_eval_accuracy = flat_accuracy(logits, label_ids)

            eval_accuracy += tmp_eval_accuracy
            nb_eval_steps += 1

        print("validation accuracy: {}\n".format(eval_accuracy /
                                                 nb_eval_steps))
コード例 #14
0
ファイル: train.py プロジェクト: joshcx/UDA
    if args.prepare_data:
        data_utils.prepare_data(args.task,
                                path=data_path,
                                sequence_length=args.sequence_length)

    logs_path = 'logs'
    # Logs path
    if not os.path.exists(logs_path):
        os.makedirs(logs_path)

    save_path = data_utils.get_save_dir(logs_path, args.name)

    if not torch.cuda.is_available():
        print('GPU not available. Running on CPU...')
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = BertForSequenceClassification.from_pretrained(
        'bert-base-uncased', cache_dir=CACHE_DIR).to(device)

    supervised_train_dataset = data_utils.SupervisedDataset(
        os.path.join(data_path, args.task, 'train_uda_ids.pt'))
    supervised_validation_dataset = data_utils.SupervisedDataset(
        os.path.join(data_path, args.task, 'val_uda_ids.pt'))
    unsupervised_dataset = data_utils.UnsupervisedDataset(
        os.path.join(data_path, args.task, 'unsup_ori_uda_ids.pt'),
        os.path.join(data_path, args.task, 'unsup_aug_uda_ids.pt'))

    supervised_train_dataloader = DataLoader(supervised_train_dataset,
                                             batch_size=args.batch_size,
                                             shuffle=True)
    supervised_validation_dataloader = DataLoader(
        supervised_validation_dataset,
        batch_size=args.batch_size,
コード例 #15
0
                    sample_idx = idx
                else:
                    sample_idx = idx - dset.cumulative_sizes[dataset_idx - 1]
                g.write(f'{dataset_idx},{sample_idx}\n')

        train_ds = subsets[0]
        train_dl = DataLoader(train_ds,
                              batch_size=batch_size,
                              shuffle=True,
                              collate_fn=collate_batch_transformer)

        val_ds = subsets[1]
        validation_evaluator = ClassificationEvaluator(val_ds, device)

        # Create the model
        model = BertForSequenceClassification.from_pretrained(
            bert_model, config=bert_config).to(device)
        if args.pretrained_model is not None:
            weights = {
                k: v
                for k, v in torch.load(args.pretrained_model).items()
                if "classifier" not in k
            }
            model_dict = model.state_dict()
            model_dict.update(weights)
            model.load_state_dict(model_dict)

        # Create the optimizer
        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [{
            'params': [
                p for n, p in model.named_parameters()
コード例 #16
0
ファイル: __init__.py プロジェクト: salohnana2018/camel_tools
 def __init__(self, model_path):
     self.model = BertForSequenceClassification.from_pretrained(model_path)
     self.tokenizer = BertTokenizer.from_pretrained(model_path)
     self.labels_map = self.model.config.id2label
コード例 #17
0
    # Set the directories where the models will be saved
    bert_dir = args.pretrain

    # Generate the data loader
    test_dataset = NaverSentimentDataset(train=False)
    test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=1,
        shuffle=False,
        collate_fn=test_dataset.collate_fn,
        num_workers=2,
    )

    # Generate question answering model using pretrained bert
    model = BertForSequenceClassification.from_pretrained(bert_dir)
    model = model.to(device)

    # Evaluation
    total_test_loss = 0.0
    total_test_accuracy = 0

    model.eval()
    with torch.no_grad():
        for input_ids, token_type_ids, attention_mask, rating in tqdm(
                test_loader):
            input_ids = input_ids.to(device)
            token_type_ids = token_type_ids.to(device)
            attention_mask = attention_mask.to(device)
            rating_bool = rating.bool().to(device)
            rating = rating.float().to(device)
コード例 #18
0
    def fit(self, sentences, labels, model_save_path, device, do_lower_case=True, debug=False, max_length=512, add_special_tokens=True, test_size=0.1, batch_size=8, output_attentions=True, output_hidden_states=True, epochs=2):
      '''
      - sentences : input string (as numpy array)
      - labels : numerical label (as numpy array)
        labels should be 0,1,2 like that
        Example on how you can obtain labels = train_data['labels'].values
      - test_size: is validation size (train and validation split basically)
      '''

      print('Loading BERT tokenizer...') # Load the BERT tokenizer.
      tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=do_lower_case)

      if debug:
        # Print the original sentence.
        print(' Original: ', sentences[0])

        # Print the sentence split into tokens.
        print('Tokenized: ', tokenizer.tokenize(sentences[0]))

        # Print the sentence mapped to token ids.
        print('Token IDs: ', tokenizer.convert_tokens_to_ids(tokenizer.tokenize(sentences[0])))

      
      input_ids, attention_masks = self.get_inputid_attentionmasks(tokenizer, sentences, debug=False, max_length=max_length, add_special_tokens=True)

      # Use 90% for training and 10% for validation.
      train_inputs, validation_inputs, train_labels, validation_labels = train_test_split(input_ids, labels, random_state=self.random_state, test_size=test_size)
      # Do the same for the masks
      train_masks, validation_masks, _, _ = train_test_split(attention_masks, labels,random_state=self.random_state, test_size=test_size)

      train_data, train_sampler, train_dataloader = self.create_data(train_inputs, train_masks, train_labels, batch_size)
      validation_data, validation_sampler, validation_dataloader = self.create_data(validation_inputs, validation_masks, validation_labels,batch_size)


      # Load BertForSequenceClassification, the pretrained BERT model with a single linear classification layer on top. 
      model = BertForSequenceClassification.from_pretrained("bert-base-uncased", 
        num_labels = self.NUM_CLASS, 
        output_attentions=output_attentions, 
        output_hidden_states=output_hidden_states)

      model.cuda() # Tell pytorch to run this model on the GPU.

      # Total number of training steps is number of batches * number of epochs.
      total_steps = len(train_dataloader) * epochs
      # Note: AdamW is a class from the huggingface library (as opposed to pytorch) I believe the 'W' stands for 'Weight Decay fix"
      optimizer = AdamW(model.parameters(), lr = 2e-5, eps = 1e-8 )
      scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

      print("")
      print('Training Batches: ',len(train_dataloader))
      print('Validation Batches: ',len(validation_dataloader))
      print('Batch Size: ',batch_size)
      print('Epochs: ',epochs)

      random.seed(self.seed_val)
      np.random.seed(self.seed_val)
      torch.manual_seed(self.seed_val)
      torch.cuda.manual_seed_all(self.seed_val)

      # Store the average loss after each epoch so we can plot them.
      loss_values = []
      logits_list, label_ids_list = [], []

      model = self.train_val_loop(model, epochs, train_dataloader, optimizer, scheduler, validation_dataloader, model_save_path, custom_name = '_')

      return model, tokenizer, device, max_length
コード例 #19
0
ファイル: train.py プロジェクト: MerziaAdamjee/YelpSpamData
    def __init__(self):
        super(BERT, self).__init__()

        options_name = "bert-base-uncased"
        self.encoder = BertForSequenceClassification.from_pretrained(options_name, num_labels = 2)
コード例 #20
0
 def load_pretrained_model(self, model_name):
     self.model = BertForSequenceClassification.from_pretrained(
         model_name,
         num_labels=len(self.label_dict),
         output_attentions=False,
         output_hidden_states=False)
コード例 #21
0
def main():
    # 创建模型
    model = BertForSequenceClassification.from_pretrained(
        model_name, num_labels=2, mirror="tuna")  # num_labels表示2个分类,好评和差评
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # 加载数据
    if model_name == "../bert-base-uncased":
        tokenizer = AlbertTokenizer.from_pretrained(model_name)
    else:
        tokenizer = BertTokenizer.from_pretrained(model_name, mirror="tuna")

    train_dataloader, test_dataloader = dataload.solve_data(
        tokenizer,
        dataname=DataName,
        limit_size=Limit_size,
        BATCH_SIZE=BATCH_SIZE,
        using_clickbait_dic=USING_CLICKBAIT_DIC,
    )

    # training steps 的数量: [number of batches] x [number of epochs].
    total_steps = len(train_dataloader) * epochs

    # 定义优化方法
    no_decay = ["bias", "LayerNorm.weight"]
    optimizer_grouped_parameters = [
        {
            "params": [
                p for n, p in model.named_parameters()
                if not any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            weight_decay,
        },
        {
            "params": [
                p for n, p in model.named_parameters()
                if any(nd in n for nd in no_decay)
            ],
            "weight_decay":
            0.0,
        },
    ]
    optimizer = AdamW(optimizer_grouped_parameters,
                      lr=learning_rate,
                      eps=epsilon)

    # 设计 learning rate scheduler.
    scheduler = get_linear_schedule_with_warmup(optimizer,
                                                num_warmup_steps=0,
                                                num_training_steps=total_steps)

    records_Train = {}
    records_Test = {}
    best_epoch = -1
    best_score = -1
    for epoch in range(epochs):
        train_loss, train_acc, train_precision, train_recall, train_F1 = train(
            model, device, train_dataloader, optimizer, scheduler)
        print("epoch={}, 训练准确率={}, 损失={}, 精度={}, 召回率={}, F1={}".format(
            epoch, train_acc, train_loss, train_precision, train_recall,
            train_F1))

        test_acc, test_precision, test_recall, test_F1 = evaluate(
            model, device, test_dataloader)
        print("epoch={}, 测试准确率={}, 精度={}, 召回率={}, F1={}".format(
            epoch, test_acc, test_precision, test_recall, test_F1))

        records_Train["Epoch" + str(epoch)] = [
            train_acc,
            train_loss,
            train_precision,
            train_recall,
            train_F1,
        ]

        records_Test["Epoch" + str(epoch)] = [
            test_acc,
            test_precision,
            test_recall,
            test_F1,
        ]

        if test_F1 > best_score:
            best_score = test_F1
            best_epoch = epoch

        # if epoch == 2:
        #     print("###### Save Model ######")
        #     model.save_pretrained("./save_model")
        #     tokenizer.save_pretrained("./save_model")

    print("###### Finished ######")
    train_acc, train_loss, train_precision, train_recall, train_F1 = records_Train[
        "Epoch" + str(best_epoch)]
    test_acc, test_precision, test_recall, test_F1 = records_Test[
        "Epoch" + str(best_epoch)]
    print("best_epoch={}, 训练准确率={}, 损失={}, 精度={}, 召回率={}, F1={}".format(
        best_epoch, train_acc, train_loss, train_precision, train_recall,
        train_F1))
    print("best_epoch={}, 测试准确率={}, 精度={}, 召回率={}, F1={}".format(
        best_epoch, test_acc, test_precision, test_recall, test_F1))
コード例 #22
0
    processor = preProcessor()

    tokenizer = BertTokenizer.from_pretrained(args_train["pre_train_model"],
                                              return_tensors='pt')

    train_dataset = load_and_cache_example(args_train, tokenizer, processor,
                                           'simtrain')
    val_dataset = load_and_cache_example(args_train, tokenizer, processor,
                                         'simdev')

    bert_config = BertConfig.from_pretrained(args_train["pre_train_model"])
    bert_config.num_labels = len(processor.get_labels())

    model_kwargs = {'config': bert_config, "from_tf": True}
    model = BertForSequenceClassification.from_pretrained(
        args_train["pre_train_model"], **model_kwargs)

    model = model.to(device)

    train_dataloader = DataLoader(train_dataset,
                                  sampler=RandomSampler(train_dataset),
                                  batch_size=args_train["batch_size"])
    val_dataloader = DataLoader(val_dataset,
                                sampler=SequentialSampler(val_dataset),
                                batch_size=args_train["batch_size"])

    del train_dataset, val_dataset
    gc.collect()

    train(args_train, train_dataloader, val_dataloader, model)
コード例 #23
0
    def __init__(self, weight="bert-base-uncased"):
        self.weight = weight

        self.tokenizer = BertTokenizer.from_pretrained(self.weight)
        self.model = BertForSequenceClassification.from_pretrained(self.weight)
コード例 #24
0
def test():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    args_test = {
        "data_dir": '/content/gdrive/My Drive/nlpqa3/data/',
        "load_path": '/content/gdrive/My Drive/nlpqa3/output/data/',
        #"vocab_file": 'bert-base-chinese-vocab.txt'
        #"model_config": 'pytorch_model.bin',
        #"model_path": 'config.json',
        "max_seq_length": 128,
        "batch_size": 32,
        "learning_rate": 6e-6,
        "epochs": 3,
        "device": device
    }

    tokenizer = BertTokenizer(os.path.join(args_test["data_dir"],
                                           'bert-base-chinese-vocab.txt'),
                              return_tensors='pt')

    processor = preProcessor()
    test_dataset = load_and_cache_example(args_test, tokenizer, processor,
                                          'simtest')

    bert_config = BertConfig.from_pretrained(
        os.path.join(args_test["load_path"], 'config.json'))
    bert_config.num_labels = len(processor.get_labels())

    test_dataloader = DataLoader(test_dataset,
                                 sampler=SequentialSampler(test_dataset),
                                 batch_size=args_test["batch_size"])

    model_kwargs = {'config': bert_config, "from_tf": False}
    model = BertForSequenceClassification.from_pretrained(
        os.path.join(args_test["load_path"], 'pytorch_model.bin'),
        **model_kwargs)
    #model.load_state_dict(torch.load(os.path.join(args["load_path"], "model_path")))

    model = model.to(device)

    del test_dataset
    gc.collect()

    total_loss = 0.0
    total_sample = 0  # 样本数
    all_real_labels = []
    all_pred_labels = []

    for batch in test_dataloader:
        model.eval()
        batch = tuple(t.to(device) for t in batch)
        with torch.no_grad():
            inputs = {
                'input_ids': batch[0],
                'attention_mask': batch[1],
                'token_type_ids': batch[2],
                'labels': batch[3]
            }
            outputs = model(**inputs)
            loss, logits = outputs[0], outputs[1]

            total_loss += loss.item()
            #total_loss += loss * batch[0].shape[0]  # loss * 样本数
            logits = logits.detach().cpu().numpy()
            label_ids = batch[3].to('cpu').numpy()

            total_sample += batch[0].shape[0]  # 记录样本个数

            #pred = logits.argmax(dim=-1).tolist()  # 得到预测的label转为list
            pred = np.argmax(logits, axis=1).flatten()

            all_pred_labels.extend(pred)
            all_real_labels.extend(batch[3].view(-1).tolist())

    loss = total_loss / total_sample
    question_acc, label_acc = calc_acc(all_real_labels, all_pred_labels)

    print("avg_loss", loss)
    print("question_acc", question_acc)
    print("label_acc", label_acc)
コード例 #25
0
# Create the DataLoader for our validation set.
validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels)
validation_sampler = SequentialSampler(validation_data)
validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=batch_size)

"""# 4. Train Our Classification Model"""

from transformers import BertForSequenceClassification, AdamW, BertConfig

# Load BertForSequenceClassification, the pretrained BERT model with a single 
# linear classification layer on top. 
model = BertForSequenceClassification.from_pretrained(
    "bert-base-uncased", # Use the 12-layer BERT model, with an uncased vocab.
    num_labels = 2, # The number of output labels--2 for binary classification.
                    # You can increase this for multi-class tasks.   
    output_attentions = False, # Whether the model returns attentions weights.
    output_hidden_states = False, # Whether the model returns all hidden-states.
)

# Tell pytorch to run this model on the GPU.
model.cuda()

# Get all of the model's parameters as a list of tuples.
params = list(model.named_parameters())

print('The BERT model has {:} different named parameters.\n'.format(len(params)))

print('==== Embedding Layer ====\n')

for p in params[0:5]:
コード例 #26
0
def main():
    args = parse_args()
    use_cuda = torch.cuda.is_available()
    torch.backends.cudnn.benchmark = True
    random.seed(1337)
    torch.manual_seed(1337)
    if not use_cuda:
        print("warning, the experiments would take ages to run on cpu")

    hyperparams = vars(args)

    heuristic = get_heuristic(hyperparams['heuristic'],
                              hyperparams['shuffle_prop'])

    model = BertForSequenceClassification.from_pretrained(
        pretrained_model_name_or_path=hyperparams["model"])
    tokenizer = BertTokenizer.from_pretrained(
        pretrained_model_name_or_path=hyperparams["model"])

    # In this example we use tokenizer once only in the beginning since it would
    # make the whole process faster. However, it is also possible to input tokenizer
    # in trainer.
    active_set, test_set = get_datasets(hyperparams['initial_pool'], tokenizer)

    # change dropout layer to MCDropout
    model = patch_module(model)

    if use_cuda:
        model.cuda()

    init_weights = deepcopy(model.state_dict())

    training_args = TrainingArguments(
        output_dir='/app/baal/results',  # output directory
        num_train_epochs=hyperparams['learning_epoch'],  # total # of training epochs
        per_device_train_batch_size=16,  # batch size per device during training
        per_device_eval_batch_size=64,  # batch size for evaluation
        weight_decay=0.01,  # strength of weight decay
        logging_dir='/app/baal/logs',  # directory for storing logs
    )

    # We wrap the huggingface Trainer to create an Active Learning Trainer
    model = BaalTransformersTrainer(model=model,
                                    args=training_args,
                                    train_dataset=active_set,
                                    eval_dataset=test_set,
                                    tokenizer=None)

    logs = {}
    logs['epoch'] = 0

    # In this case, nlp data is fast to process and we do NoT need to use a smaller batch_size
    active_loop = ActiveLearningLoop(active_set,
                                     model.predict_on_dataset,
                                     heuristic,
                                     hyperparams.get('n_data_to_label', 1),
                                     iterations=hyperparams['iterations'])

    for epoch in tqdm(range(args.epoch)):
        # we use the default setup of HuggingFace for training (ex: epoch=1).
        # The setup is adjustable when BaalHuggingFaceTrainer is defined.
        model.train()

        # Validation!
        eval_metrics = model.evaluate()

        # We reorder the unlabelled pool at the frequency of learning_epoch
        # This helps with speed while not changing the quality of uncertainty estimation.
        should_continue = active_loop.step()

        # We reset the model weights to relearn from the new trainset.
        model.load_state_dict(init_weights)
        model.lr_scheduler = None
        if not should_continue:
            break
        active_logs = {"epoch": epoch,
                       "labeled_data": active_set._labelled,
                       "Next Training set size": len(active_set)}

        logs = {**eval_metrics, **active_logs}
        print(logs)
コード例 #27
0
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

# path to all the files that will be used for inference
path = f"./app/models/"

# self.model_path = self.path + "traced_bert_epoch_1.pt"
model_path = path + "custom_trained_model.bin"

#tokenizer_path = "./app/models/bert-large-portuguese-cased"
tokenizer_path = "neuralmind/bert-large-portuguese-cased"

# self.model = torch.jit.load(self.model_path)
model = BertForSequenceClassification.from_pretrained(
    tokenizer_path,
    num_labels=17,
    #local_files_only=True
)
model.load_state_dict(torch.load(model_path, map_location=device))

tokenizer = BertTokenizer.from_pretrained(
    tokenizer_path,
    do_lower_case=True,
    torchscript=True,
)

LABELS = {0: "A", 1: "B", 2: "C"}


class ClassificationProcessor:
    def __init__(self):
コード例 #28
0
# Find GPU
device = torch.device("cuda")

# BERT constants:
BATCH_SIZE = 16
LEARNING_RATE = 2e-5
EPOCHS = 16
pretrained_weights = 'bert-base-uncased'

# Set these paths to train BERT
MODEL_PATH = '../models/bert_mnli/bert_model_mnli.pt'
TRAIN_LOSS_PATH = '../models/bert_mnli/train_loss_per_batch.npy'
VAL_LOSS_PATH = '../models/bert_mnli/val_loss_per_epoch.npy'

# Initialize Model and Optimizer
model = BertForSequenceClassification.from_pretrained(pretrained_weights,
                                                      num_labels=3)
model.cuda()
optimizer = AdamW(model.parameters(), lr=LEARNING_RATE, correct_bias=False)

# Load data
train_data = dl.SemEvalDataset("../data/preprocessed/bert_mnli_train.npy")
val_data = dl.SemEvalDataset("../data/preprocessed/bert_mnli_val.npy")

train_loader = DataLoader(train_data,
                          batch_size=BATCH_SIZE,
                          num_workers=0,
                          shuffle=True)
val_loader = DataLoader(val_data,
                        batch_size=BATCH_SIZE,
                        num_workers=0,
                        shuffle=True)
コード例 #29
0
        'recall': recall
    }

def send_inputs_to_device(inputs, device):
    return {key:tensor.to(device) for key, tensor in inputs.items()}

# Creating Data Loader

train_loader = torch.utils.data.DataLoader(dataset['train'], batch_size=16, collate_fn=DataCollatorWithPadding(tokenizer))
validation_loader = torch.utils.data.DataLoader(dataset['validation'], batch_size=32, collate_fn=DataCollatorWithPadding(tokenizer))
test_loader = torch.utils.data.DataLoader(dataset['test'], batch_size=32, collate_fn=DataCollatorWithPadding(tokenizer))

num_epochs = 4
num_warmup_steps = 5000

model = BertForSequenceClassification.from_pretrained("neuralmind/bert-base-portuguese-cased")

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model.train().to(device)

optimizer = AdamW(model.parameters(), lr=5e-6)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps, num_epochs*len(train_loader))

def predict(model, validation_loader, device):
    with torch.no_grad():
        model.eval()
        preds = []
        labels = []
        validation_losses = []
        for inputs in validation_loader:
コード例 #30
0
ファイル: sst_embeds.py プロジェクト: siduojiang/BERTVision
def main():
    # training settings
    def get_args():
        parser = ArgumentParser(description='SST')
        parser.add_argument('--name',
                            type=str,
                            default='SST',
                            metavar='S',
                            help="Model name")
        parser.add_argument('--checkpoint',
                            type=str,
                            default='bert-base-uncased',
                            metavar='S',
                            help="e.g., bert-base-uncased, etc")
        parser.add_argument('--model',
                            type=str,
                            default='bert-base-uncased',
                            metavar='S',
                            help="e.g., bert-base-uncased, etc")
        parser.add_argument('--batch-size',
                            type=int,
                            default=32,
                            metavar='N',
                            help='input batch size for training (default: 32)')
        parser.add_argument('--epochs',
                            type=int,
                            default=1,
                            metavar='N',
                            help='number of epochs to train (default: 1)')
        parser.add_argument('--lr',
                            type=float,
                            default=1e-5,
                            metavar='LR',
                            help='learning rate (default: 1e-5)')
        parser.add_argument('--seed',
                            type=int,
                            default=1,
                            metavar='S',
                            help='random seed (default: 1)')
        parser.add_argument('--num-workers',
                            type=int,
                            default=0,
                            metavar='N',
                            help='number of CPU cores (default: 0)')
        parser.add_argument('--num-labels',
                            type=int,
                            default=2,
                            metavar='N',
                            help='number of labels to classify (default: 2)')
        parser.add_argument('--l2',
                            type=float,
                            default=0.01,
                            metavar='LR',
                            help='l2 regularization weight (default: 0.01)')
        parser.add_argument(
            '--max-seq-length',
            type=int,
            default=66,
            metavar='N',
            help='max sequence length for encoding (default: 66)')
        parser.add_argument('--warmup-proportion',
                            type=int,
                            default=0.1,
                            metavar='N',
                            help='Warmup proportion (default: 0.1)')
        parser.add_argument('--embed-batch-size',
                            type=int,
                            default=1,
                            metavar='N',
                            help='Embedding batch size emission; (default: 1)')
        args = parser.parse_args()
        return args

    args = get_args()

    # set seeds and determinism
    torch.manual_seed(args.seed)
    torch.backends.cudnn.deterministic = True
    torch.cuda.amp.autocast(enabled=True)

    # set device
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    # build ds
    train_ds = SST(type='train', transform=Tokenize_Transform(args, logger))

    # build ds
    dev_ds = SST(type='dev', transform=Tokenize_Transform(args, logger))

    # create training dataloader
    train_dataloader = DataLoader(train_ds,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=args.num_workers,
                                  drop_last=False)

    # create embed dataloader
    train_embed_dataloader = DataLoader(train_ds,
                                        batch_size=args.embed_batch_size,
                                        shuffle=True,
                                        num_workers=args.num_workers,
                                        drop_last=False)

    # create embed dataloader
    dev_embed_dataloader = DataLoader(dev_ds,
                                      batch_size=args.embed_batch_size,
                                      shuffle=True,
                                      num_workers=args.num_workers,
                                      drop_last=False)

    # load the model
    model = BertForSequenceClassification.from_pretrained(
        args.checkpoint, num_labels=args.num_labels).to(device)

    # create gradient scaler for mixed precision
    scaler = GradScaler()

    # set optimizer
    param_optimizer = list(model.named_parameters())

    # exclude these from regularization
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    # give l2 regularization to any parameter that is not named after no_decay list
    # give no l2 regulariation to any bias parameter or layernorm bias/weight
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay':
        args.l2
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }]

    # set optimizer
    optimizer = AdamW(optimizer_grouped_parameters,
                      lr=args.lr,
                      correct_bias=False,
                      weight_decay=args.l2)

    num_train_optimization_steps = int(
        len(train_ds) / args.batch_size) * args.epochs

    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_training_steps=num_train_optimization_steps,
        num_warmup_steps=args.warmup_proportion * num_train_optimization_steps)

    # set epochs
    epochs = args.epochs

    # set location and make if necessary
    if args.checkpoint == 'bert-base-uncased':
        checkpoint_location = 'C:\\w266\\data\\embed_checkpoints\\'
    elif args.checkpoint == 'bert-large-uncased':
        checkpoint_location = 'C:\\w266\\data\\embed_checkpoints\\bert_large\\'
    os.makedirs(checkpoint_location, exist_ok=True)

    # train
    best_loss = np.inf
    for epoch in range(1, epochs + 1):
        train_log = train(model, train_dataloader, scaler, optimizer,
                          scheduler, device, args)
        logs = dict(train_log)
        if logs['loss'] < best_loss:
            # torch save
            torch.save(
                model.state_dict(),
                checkpoint_location + args.name + '_epoch_{}.pt'.format(epoch))
            best_loss = logs['loss']
        show_info = f'\nEpoch: {epoch} - ' + "-".join(
            [f' {key}: {value:.4f} ' for key, value in logs.items()])
        print(show_info)

    # now proceed to emit embeddings
    model = BertForSequenceClassification.from_pretrained(
        args.checkpoint, num_labels=args.num_labels,
        output_hidden_states=True).to(device)
    # load weights from 1 epoch
    model.load_state_dict(
        torch.load(checkpoint_location + args.name + '_epoch_1.pt'))

    # export embeddings
    emit_train_embeddings(train_embed_dataloader, train_ds, model, device,
                          args)
    emit_dev_embeddings(dev_embed_dataloader, dev_ds, model, device, args)