Example #1
0
def train_model_bert(args):
    # need remake config with device option for train with another cuda device
    config = BertConfig.from_pretrained(args.folder_model)

    config = config.to_dict()
    config.update({"device": args.device})
    config.update({"use_pooler": args.use_pooler})
    config.update({"weight_class": args.weight_class})
    config.update({"output_hidden_states": args.output_hidden_states})
    config = BertConfig.from_dict(config)

    tokenizer = BertTokenizer.from_pretrained(args.folder_model)
    model = BERTQa.from_pretrained(args.folder_model, config=config)
    model = model.to(args.device)
    train_squad(args, tokenizer, model)
Example #2
0
    def __init__(self, config, bertmodel):
        super(Parser, self).__init__()

        self.config = config

        # build and load BERT G2G model
        bertconfig = BertConfig.from_pretrained(
                config.main_path+"/model"+"/model_"+config.modelname+'/config.json')

        bertconfig.num_hidden_layers = config.n_attention_layer
        bertconfig.label_size = config.n_rels
        bertconfig.layernorm_value = config.layernorm_value
        bertconfig.layernorm_key = config.layernorm_key

        if self.config.input_graph:
            self.bert = BertGraphModel(bertconfig)
        else:
            self.bert = BertBaseModel(bertconfig)
        
        self.bert.load_state_dict(bertmodel.state_dict(),strict=False)
        self.mlp = Classifier(3*bertconfig.hidden_size,bertconfig.hidden_size,config.n_trans)
        self.mlp_rel = Classifier(2*bertconfig.hidden_size,bertconfig.hidden_size,config.n_rels)

        self.pad_index = config.pad_index
        self.unk_index = config.unk_index
Example #3
0
def convert_tf_checkpoint_to_pytorch(tf_checkpoint_path,bert_config_file,pytorch_dump_path):
  config = BertConfig.from_json_file(bert_config_file)
  model = BertForPretraining(config)
  
  load_tf_weights_in_bert(model,config,tf_checkpoint_path)
  
  torch.save(model.state_dict(),pytorch_dump_path)
Example #4
0
 def get_model():
     bert_config = BertConfig.from_json_file(self.config_path)
     bert_config.type_vocab_size = 3
     bert_config.eos_token_id = self.tokenizer.token_to_id('[SEP]')
     model = GenLM(bert_config)
     if not is_predict:
         load_tf_weights_in_bert(model, self.checkpoint_path)
     # model = keras.models.Model(model.inputs, model.outputs)
     return model
Example #5
0
def predict(predict_model_name_or_path, pre_data, pre_dataloader):

    print('进行预测')
    pro = processer()
    labellist = pro.get_labels()

    #*****加载模型*****
    print('加载模型')
    model = BertForSequenceClassification
    config = BertConfig.from_pretrained(predict_model_name_or_path,
                                        num_labels=len(labellist))
    model = model.from_pretrained(predict_model_name_or_path, config=config)

    print('模型加载到GPU或者CPU')
    #如果有GPU,使用GPU进行分布式计算,否则使用CPU
    if torch.cuda.is_available():
        #单GPU计算
        torch.cuda.set_device(0)
        device = torch.device('cuda', 0)  #设置GPU设备号
    else:
        device = torch.device('cpu')
    model.to(device)

    print('******** Running prediction ********')
    print("  Num examples = %d", len(pre_data))

    preds = None
    pbar = ProgressBar(n_total=len(pre_dataloader), desc="Predicting")

    #***进行预测***
    for step, batch in enumerate(pre_dataloader):
        model.eval()
        batch = tuple(t.to(device) for t in batch)
        with torch.no_grad():
            inputs = {
                'input_ids': batch[0],
                'token_type_ids': batch[2],
                'attention_mask': batch[1],
                'labels': batch[3]
            }
            outputs = model(**inputs)
            _, logits = outputs[:2]

        #***汇总每个batch的预测结果***
        if preds is None:
            preds = logits.softmax(-1).detach().cpu().numpy()
        else:
            preds = np.append(preds,
                              logits.softmax(-1).detach().cpu().numpy(),
                              axis=0)
        pbar(step)

    predict_label = np.argmax(preds, axis=1)
    print(preds)

    print(predict_label)
    return preds, predict_label
 def __init__(self):
     config = BertConfig.from_json_file(join(BERT_PATH, 'bert_config.json'))
     self.tokenizer = BertTokenizer(vocab_file=join(BERT_PATH, 'vocab.txt'))
     self.model = BertModel(config, add_pooling_layer=False)
     load_tf_weights_in_bert(self.model,
                             tf_checkpoint_path=join(
                                 BERT_PATH, 'bert_model.ckpt'),
                             strip_bert=True)
     self.model.to(PT_DEVICE)
     self.model.eval()
Example #7
0
 def __init(self, config=None, *args, **kwargs):
     super().__init__(*args, **kwargs)
     if config is None:
         from transformers.configuration_bert import BertConfig
         config = BertConfig.from_pretrained('bert-base-uncased')
         assert config.hidden_size == self.in_dim
     from transformers.modeling_bert import BertPredictionHeadTransform
     self.module = nn.Sequential(
         nn.Dropout(config.hidden_dropout_prob),
         BertPredictionHeadTransform(config),
         nn.Linear(self.in_dim, self.out_dim),
     )
Example #8
0
    def __init__(self, embeddings, device):
        super(Model, self).__init__()
        self.device = device
        
        cid_emb_size = embeddings[0].shape[1]
        creative_id_embedding = nn.Embedding(embeddings[0].shape[0], cid_emb_size)
        creative_id_embedding.weight.data.copy_(torch.from_numpy(embeddings[0]))
        creative_id_embedding.weight.requires_grad = False
        self.creative_id_embedding = creative_id_embedding

        aid_emb_size = embeddings[1].shape[1]
        ad_id_embedding = nn.Embedding(embeddings[1].shape[0], aid_emb_size)
        ad_id_embedding.weight.data.copy_(torch.from_numpy(embeddings[1]))
        ad_id_embedding.weight.requires_grad = False
        self.ad_id_embedding = ad_id_embedding
        
        adv_emb_size = embeddings[2].shape[1]
        advertiser_id_embedding = nn.Embedding(embeddings[2].shape[0], adv_emb_size)
        advertiser_id_embedding.weight.data.copy_(torch.from_numpy(embeddings[2]))
        advertiser_id_embedding.weight.requires_grad = False
        self.advertiser_id_embedding = advertiser_id_embedding
        
        pid_emb_size = embeddings[3].shape[1]
        product_id_embedding = nn.Embedding(embeddings[3].shape[0], pid_emb_size)
        product_id_embedding.weight.data.copy_(torch.from_numpy(embeddings[3]))
        product_id_embedding.weight.requires_grad = False
        self.product_id_embedding = product_id_embedding
            
        hidden_size = cid_emb_size + aid_emb_size + adv_emb_size + pid_emb_size
        
        # transformer
        config = BertConfig(num_hidden_layers=3,
                            num_attention_heads=8,
                            hidden_size=hidden_size,
                            layer_norm_eps=1e-12,
                            hidden_dropout_prob=0.2,
                            attention_probs_dropout_prob=0.2,
                            hidden_act='mish')
        self.config = config
        self.bert_encoder = BertEncoder(config)
        
        # DNN 层
        self.linears = nn.Sequential(nn.Linear(config.hidden_size, 1024), Mish(), nn.BatchNorm1d(1024),
                                     nn.Linear(1024, 256), Mish(), nn.BatchNorm1d(256), 
                                     nn.Linear(256, 64), Mish(), nn.BatchNorm1d(64),
                                     nn.Linear(64, 16), Mish(), nn.BatchNorm1d(16), 
                                     nn.Dropout(0.1))

        # 输出层
        self.age_output = nn.Linear(16, 10)
        self.gender_output = nn.Linear(16, 2)
 def __init__(self, is_predict=False):
     super().__init__()
     config = BertConfig.from_json_file(join(BERT_PATH, 'bert_config.json'))
     self.bert = BertModel(config, add_pooling_layer=True)
     self.tokenizer = self.get_tokenizer()
     if not is_predict:
         load_tf_weights_in_bert(self.bert,
                                 tf_checkpoint_path=join(
                                     BERT_PATH, 'bert_model.ckpt'),
                                 strip_bert=True)
     self.cls = torch.nn.Linear(768, 2)
     self.save_dir = join(MODEL_PATH, 'consistent')
     if not os.path.isdir(self.save_dir):
         os.makedirs(self.save_dir)
     self.save_path = join(self.save_dir, 'trained.pt')
Example #10
0
    def __init__(self, in_dim=768, out_dim=2, config=None, *args, **kwargs):
        super().__init__()
        from transformers.models.bert.modeling_bert import BertPredictionHeadTransform

        if config is None:
            from transformers.configuration_bert import BertConfig

            config = BertConfig.from_pretrained("bert-base-uncased")

        assert config.hidden_size == in_dim

        self.module = nn.Sequential(
            nn.Dropout(config.hidden_dropout_prob),
            BertPredictionHeadTransform(config),
            nn.Linear(in_dim, out_dim),
        )
Example #11
0
    def __init__(self, device, serial_model_path, par_model_path):
        self.device = device

        pretrained_path = 'cl-tohoku/bert-base-japanese-whole-word-masking'
        self.tokenizer = BertTokenizer.from_pretrained(pretrained_path,
                                                       do_lower_case=False)
        config = BertConfig.from_pretrained(pretrained_path)
        config.num_labels = 4
        self.serial_model = BertForSequenceClassification(config)
        config.num_labels = 2
        self.par_model = BertForSequenceClassification(config)

        self.serial_model.load_state_dict(torch.load(serial_model_path))
        self.serial_model.to(self.device)
        self.serial_model.eval()
        self.par_model.load_state_dict(torch.load(par_model_path))
        self.par_model.to(self.device)
        self.par_model.eval()
Example #12
0
def classify(fname: str, verbose: bool = False):
    '''
    Returns a 1 dimensional numpy array of predictions
    Currently predictions 0, -1, 1 are indexed at 0, 1, 2
    Therefore when reading the return array:
    0 = 'Neutral', 1 = 'Deny', 2 = 'Favor'
    '''
    tokenizer = BertTokenizer('../models/BERT-vocab1.dms')
    config = BertConfig.from_json_file('../models/BERT-config0.json')
    model = TFBertForSequenceClassification.from_pretrained(
        '../models/BERT-transfer1/', config=config)

    # BATCH_SIZE = 64
    feat_spec = {
        'idx': tf.io.FixedLenFeature([], tf.int64),
        'sentence': tf.io.FixedLenFeature([], tf.string),
        'label': tf.io.FixedLenFeature([], tf.int64)
    }

    def parse_ex(ex_proto):
        return tf.io.parse_single_example(ex_proto, feat_spec)

    tweets = tf.data.TFRecordDataset(fname)
    tweets = tweets.map(parse_ex)

    # with open('data/tweet_info.json')as j_file:
    #     data_info = json.load(j_file)
    #     num_samples = data_info['DF_length']

    eval_df = glue_convert_examples_to_features(examples=tweets,
                                                tokenizer=tokenizer,
                                                max_length=128,
                                                task='sst-2',
                                                label_list=['0', '-1', '1'])
    eval_df = eval_df.batch(64)

    y_preds = model.predict(eval_df, use_multiprocessing=True, verbose=verbose)
    y_preds_sm = tf.nn.softmax(y_preds)
    y_preds_argmax = tf.math.argmax(y_preds_sm, axis=1)
    return y_preds_argmax.numpy()
    def __init__(self,
                 pretrained_model_dir,
                 num_classes,
                 segment_len=200,
                 overlap=50,
                 dropout_p=0.5):
        super(BertLSTMWithOverlap, self).__init__()

        self.seg_len = segment_len
        self.overlap = overlap

        self.config = BertConfig.from_json_file(pretrained_model_dir +
                                                'bert_config.json')
        self.bert = BertModel.from_pretrained(pretrained_model_dir,
                                              config=self.config)

        if feature_extract:
            for p in self.bert.parameters():  # 迁移学习:bert作为特征提取器
                p.requires_grad = False

        d_model = self.config.hidden_size  # 768

        self.bi_lstm2 = torch.nn.LSTM(input_size=d_model,
                                      hidden_size=d_model // 2,
                                      bidirectional=True,
                                      batch_first=True)
        self.attn_weights2 = torch.nn.Sequential(
            torch.nn.Linear(
                d_model,
                d_model),  # sent_attn_energy [b,num_seg,768]=>[b,num_seg,768]
            torch.nn.Tanh(),
            torch.nn.Linear(
                d_model, 1, bias=False
            ),  # sent_attn_weights [b,num_seg,768]=>[b,num_seg,1]
            torch.nn.Softmax(dim=1),  # [b,num_seg,1]
        )

        self.fc = torch.nn.Sequential(torch.nn.Dropout(p=dropout_p),
                                      torch.nn.Linear(d_model, num_classes))
Example #14
0
def main():
    parser = argparse.ArgumentParser()

    # Required parameters
    parser.add_argument("--data_dir",
                        default='./data/input/',
                        type=str,
                        required=True,
                        help="The input data dir. Should contain the .tsv files (or other data files) for the task.")
    parser.add_argument("--bert_model", default='bert-base-chinese', type=str, required=True,
                        help="Bert pre-trained model selected in the list: bert-base-uncased, "
                             "bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, "
                             "bert-base-multilingual-cased, bert-base-chinese.")
    parser.add_argument("--config_file", default='bert-base-chinese', type=str, required=True,
                        help="Bert pre-trained model selected in the list: bert-base-uncased, "
                             "bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, "
                             "bert-base-multilingual-cased, bert-base-chinese.")
    parser.add_argument("--task_name",
                        default='xgfy',
                        type=str,
                        required=True,
                        help="The name of the task to train.")
    parser.add_argument("--vacab_root",
                        default='./data/model/',
                        type=str,
                        required=True,
                        help="The directory where the vocab file is saved.")
                        
    parser.add_argument("--output_dir",
                        default='./data/output/',
                        type=str,
                        required=True,
                        help="The output directory where the model predictions and checkpoints will be written.")
    parser.add_argument("--weight_name",
                        default='net_weight_1.bin',
                        type=str,
                        )
    parser.add_argument("--config_name",
                        default='config_name_1.bin',
                        type=str,
                        )
    # Other parameters
    parser.add_argument("--cache_dir",
                        default="./data/model/",
                        type=str,
                        help="Where do you want to store the pre-trained models downloaded from s3")
    parser.add_argument("--max_seq_length",
                        default=128,
                        type=int,
                        help="The maximum total input sequence length after WordPiece tokenization. \n"
                             "Sequences longer than this will be truncated, and sequences shorter \n"
                             "than this will be padded.")
    parser.add_argument("--do_train",
                        action='store_true',
                        help="Whether to run training.")
    parser.add_argument("--do_eval",
                        action='store_true',
                        help="Whether to run eval on the dev set.")
    parser.add_argument("--do_lower_case",
                        action='store_true',
                        help="Set this flag if you are using an uncased model.")
    parser.add_argument("--train_batch_size",
                        default=32,
                        type=int,
                        help="Total batch size for training.")
    parser.add_argument("--eval_batch_size",
                        default=8,
                        type=int,
                        help="Total batch size for eval.")
    parser.add_argument("--log_frq",
                        default=50,
                        type=int)
    parser.add_argument("--learning_rate",
                        default=5e-5,
                        type=float,
                        help="The initial learning rate for Adam.")
    parser.add_argument("--num_train_epochs",
                        default=1.0,
                        type=int,
                        help="Total number of training epochs to perform.")
    parser.add_argument("--n_warmup",
                        default=1000,
                        type=int,
                        help="step of training to perform linear learning rate warmup for.")
    parser.add_argument("--no_cuda",
                        action='store_true',
                        help="Whether not to use CUDA when available")
    parser.add_argument("--local_rank",
                        type=int,
                        default=-1,
                        help="local_rank for distributed training on gpus")
    parser.add_argument('--seed',
                        type=int,
                        default=42,
                        help="random seed for initialization")
    parser.add_argument('--gradient_accumulation_steps',
                        type=int,
                        default=1,
                        help="Number of updates steps to accumulate before performing a backward/update pass.")
    parser.add_argument('--fp16',
                        action='store_true',
                        help="Whether to use 16-bit float precision instead of 32-bit")
    parser.add_argument('--parall',
                        action='store_true')
    parser.add_argument('--loss_scale',
                        type=float, default=0,
                        help="Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n"
                             "0 (default value): dynamic loss scaling.\n"
                             "Positive power of 2: static loss scaling value.\n")
    args = parser.parse_args()


    # 新冠肺炎
    processors = {
        "xgfy": SimProcessor
    }

    num_labels_task = {
        "xgfy": 2,
    }

    if args.local_rank == -1 or args.no_cuda:
        device = torch.device("cuda:0" if torch.cuda.is_available() and not args.no_cuda else "cpu")
        n_gpu = torch.cuda.device_count()
    else:
        torch.cuda.set_device(args.local_rank)
        device = torch.device("cuda", args.local_rank)
        n_gpu = 1
        # Initializes the distributed backend which will take care of sychronizing nodes/GPUs
        # torch.distributed.init_process_group(backend='nccl')
    logger.info("device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}".format(
        device, n_gpu, bool(args.local_rank != -1), args.fp16))

    if args.gradient_accumulation_steps < 1:
        raise ValueError("Invalid gradient_accumulation_steps parameter: {}, should be >= 1".format(
            args.gradient_accumulation_steps))

    args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if n_gpu > 0:
        torch.cuda.manual_seed_all(args.seed)

    if not args.do_train and not args.do_eval:
        raise ValueError("At least one of `do_train` or `do_eval` must be True.")

    # if os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train:
    #     raise ValueError("Output directory ({}) already exists and is not empty.".format(args.output_dir))
    # if not os.path.exists(args.output_dir):
    #     os.makedirs(args.output_dir)

    task_name = args.task_name.lower()

    if task_name not in processors:
        raise ValueError("Task not found: %s" % (task_name))

    processor = processors[task_name]
    num_labels = num_labels_task[task_name]
    label_list = processor.get_labels()

    tokenizer = BertTokenizer.from_pretrained(args.vacab_root, do_lower_case=args.do_lower_case)

    train_examples = None
    num_train_optimization_steps = None
    if args.do_train:
        train_examples = processor.get_train_examples(args.data_dir)
        num_train_optimization_steps = int(
            len(train_examples) / args.train_batch_size / args.gradient_accumulation_steps) * args.num_train_epochs
        if args.local_rank != -1:
            num_train_optimization_steps = num_train_optimization_steps // torch.distributed.get_world_size()

    # Prepare model
    cache_dir = args.cache_dir if args.cache_dir else os.path.join(str(PYTORCH_PRETRAINED_BERT_CACHE), 'distributed_{0}')
    # cache_dir = args.cache_dir if args.cache_dir else os.path.join(PYTORCH_PRETRAINED_BERT_CACHE, 'distributed_{}'.format(str(args.local_rank)))
    config = BertConfig.from_pretrained(args.config_file, num_labels=num_labels)
    model = BertForSequenceClassification.from_pretrained(args.bert_model,
                                                          config=config,
                                                          cache_dir=cache_dir)
    if args.fp16:
        model.half()
    model.to(device)
    if args.local_rank != -1:
        try:
            from apex.parallel import DistributedDataParallel as DDP
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.")

        model = DDP(model)
    elif n_gpu > 1 and args.parall:
        model = torch.nn.DataParallel(model)

    # Prepare optimizer
    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
        {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]
    if args.fp16:
        try:
            from apex.optimizers import FP16_Optimizer
            from apex.optimizers import FusedAdam
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.")

        optimizer = FusedAdam(optimizer_grouped_parameters,
                              lr=args.learning_rate,
                              bias_correction=False,
                              max_grad_norm=1.0)
        if args.loss_scale == 0:
            optimizer = FP16_Optimizer(optimizer, dynamic_loss_scale=True)
        else:
            optimizer = FP16_Optimizer(optimizer, static_loss_scale=args.loss_scale)

    else:
        optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate)


    global_step = 0
    nb_tr_steps = 0
    tr_loss = 0
    if args.do_train:
        train_features = convert_examples_to_features(
            train_examples, label_list, args.max_seq_length, tokenizer)
        logger.info("***** Running training *****")
        logger.info("  Num examples = %d", len(train_examples))
        logger.info("  Batch size = %d", args.train_batch_size)
        logger.info("  Num steps = %d", num_train_optimization_steps)
        all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long)
        all_label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.long)
        train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids)
        if args.local_rank == -1:
            train_sampler = RandomSampler(train_data)
        else:
            train_sampler = DistributedSampler(train_data)
        train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.train_batch_size)
        
        
        t_total = len(train_dataloader) // args.gradient_accumulation_steps * args.num_train_epochs
        scheduler = get_linear_schedule_with_warmup(
            optimizer, num_warmup_steps=args.n_warmup, num_training_steps=t_total
        )
        model.train()
        for _ in trange(int(args.num_train_epochs), desc="Epoch"):
            tr_loss = 0
            nb_tr_examples, nb_tr_steps = 0, 0
            for step, batch in enumerate(tqdm(train_dataloader, desc="Iteration")):
                batch = tuple(t.to(device) for t in batch)
                input_ids, input_mask, segment_ids, label_ids = batch
                loss, _ = model(input_ids, token_type_ids=segment_ids, attention_mask=input_mask, labels=label_ids)
                if n_gpu > 1 and args.parall:
                    loss = loss.mean()  # mean() to average on multi-gpu.
                if args.gradient_accumulation_steps > 1:
                    loss = loss / args.gradient_accumulation_steps

                if args.fp16:
                    optimizer.backward(loss)
                else:
                    loss.backward()

                tr_loss += loss.item()
                nb_tr_examples += input_ids.size(0)
                nb_tr_steps += 1
                if (step + 1) % args.gradient_accumulation_steps == 0:
                    if args.fp16:
                        # modify learning rate with special warm up BERT uses
                        # if args.fp16 is False, BertAdam is used that handles this automatically
                        for param_group in optimizer.param_groups:
                            param_group['lr'] = lr_this_step
                    optimizer.step()
                    scheduler.step()
                    optimizer.zero_grad()
                    global_step += 1
                    if (global_step) % args.log_frq == 0:
                        logger.info("TrLoss: {:.2f} | Loss: {:.2f} | Lr: {:.2f}".format(tr_loss, loss.item(), scheduler.get_lr()[0]))

    if args.do_train:
        # Save a trained model and the associated configuration
        model_to_save = model.module if hasattr(model, 'module') else model  # Only save the model it-self
        output_model_file = os.path.join(args.output_dir, args.weight_name)
        torch.save(model_to_save.state_dict(), output_model_file)
        output_config_file = os.path.join(args.output_dir, args.config_name)
        with open(output_config_file, 'w') as f:
            f.write(model_to_save.config.to_json_string())

        # Load a trained model and config that you have fine-tuned
        config = BertConfig(output_config_file)
        model = BertForSequenceClassification(config)
        model.load_state_dict(torch.load(output_model_file))
    else:
        output_model_file = os.path.join(args.output_dir, args.weight_name)
        output_config_file = os.path.join(args.output_dir, args.config_name)
        config = BertConfig(output_config_file)
        model = BertForSequenceClassification(config)
        model.load_state_dict(torch.load(output_model_file))
        # model = BertForSequenceClassification.from_pretrained(args.bert_model)
    model.to(device)

    if args.do_eval and (args.local_rank == -1 or torch.distributed.get_rank() == 0):
        eval_examples = processor.get_dev_examples(args.data_dir)
        eval_features = convert_examples_to_features(
            eval_examples, label_list, args.max_seq_length, tokenizer)
        logger.info("***** Running evaluation *****")
        logger.info("  Num examples = %d", len(eval_examples))
        logger.info("  Batch size = %d", args.eval_batch_size)
        all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long)
        all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long)
        all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long)
        all_label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long)
        eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids)
        # Run prediction for full data
        eval_sampler = SequentialSampler(eval_data)
        eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.eval_batch_size)
        model.eval()
        eval_loss, eval_accuracy = 0, 0
        nb_eval_steps, nb_eval_examples = 0, 0

        for input_ids, input_mask, segment_ids, label_ids in tqdm(eval_dataloader, desc="Evaluating"):
            input_ids = input_ids.to(device)
            input_mask = input_mask.to(device)
            segment_ids = segment_ids.to(device)
            label_ids = label_ids.to(device)

            with torch.no_grad():
                tmp_eval_loss, logits = model(input_ids, token_type_ids=segment_ids, attention_mask=input_mask, labels=label_ids)

            logits = logits.detach().cpu().numpy()
            label_ids = label_ids.to('cpu').numpy()
            tmp_eval_accuracy = accuracy(logits, label_ids)

            eval_loss += tmp_eval_loss.mean().item()
            eval_accuracy += tmp_eval_accuracy

            nb_eval_examples += input_ids.size(0)
            nb_eval_steps += 1

        eval_loss = eval_loss / nb_eval_steps
        eval_accuracy = eval_accuracy / nb_eval_examples
        loss = tr_loss / nb_tr_steps if args.do_train else None
        result = {'eval_loss': eval_loss,
                  'eval_accuracy': eval_accuracy,
                  'global_step': global_step,
                  'loss': loss}
        logger.info(result)
        output_eval_file = os.path.join(args.output_dir, "eval_results.txt")
Example #15
0
        outputs = (
            start_logits,
            end_logits,
        ) + outputs[2:]
        if start_positions is not None and end_positions is not None:
            # If we are on multi-GPU, split add a dimension
            if len(start_positions.size()) > 1:
                start_positions = start_positions.squeeze(-1)
            if len(end_positions.size()) > 1:
                end_positions = end_positions.squeeze(-1)
            # sometimes the start/end positions are outside our model inputs, we ignore these terms
            ignored_index = start_logits.size(1)
            start_positions.clamp_(0, ignored_index)
            end_positions.clamp_(0, ignored_index)

            loss_fct = CrossEntropyLoss(ignore_index=ignored_index)
            start_loss = loss_fct(start_logits, start_positions)
            end_loss = loss_fct(end_logits, end_positions)
            total_loss = (start_loss + end_loss) / 2
            outputs = (total_loss, ) + outputs

        return outputs  # (loss), start_logits, end_logits, (hidden_states), (attentions)


if __name__ == '__main__':
    from transformers import BertConfig
    config = BertConfig()
    config.bitW = 8
    config.layer_name_list = []
    encoder = BertEncoder(config=config)
Example #16
0
 def init_encoder(cls, args, dropout: float = 0.1):
     cfg = BertConfig.from_pretrained("bert-base-uncased")
     if dropout != 0:
         cfg.attention_probs_dropout_prob = dropout
         cfg.hidden_dropout_prob = dropout
     return cls.from_pretrained("bert-base-uncased", config=cfg)
def train(args, model_name_or_path, train_data, train_dataloader, valid_data,
          valid_dataloader):

    pro = processer()
    labellist = pro.get_labels()
    trainloss = TrainLoss()

    #*****加载模型*****
    model = BertForSequenceClassification
    config = BertConfig.from_pretrained(model_name_or_path,
                                        num_labels=len(labellist))
    model = model.from_pretrained(model_name_or_path, config=config)

    # *****模型加载到设备*****
    if torch.cuda.is_available():
        # 单GPU计算
        torch.cuda.set_device(0)
        device = torch.device('cuda', 0)  # 设置GPU设备号
    else:
        device = torch.device('cpu')
    model.to(device)

    #*****优化函数*****
    t_total = len(train_dataloader
                  ) // args.gradient_accumulation_steps * args.num_train_epochs
    warmup_steps = int(t_total * args.warmup_proportion)

    no_decay = ['bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params': [
            p for n, p in model.named_parameters()
            if not any(nd in n for nd in no_decay)
        ],
        'weight_decay':
        args.weight_decay
    }, {
        'params': [
            p for n, p in model.named_parameters()
            if any(nd in n for nd in no_decay)
        ],
        'weight_decay':
        0.0
    }]
    optimizer = AdamW(optimizer_grouped_parameters,
                      lr=args.learning_rate,
                      eps=args.adam_epsilon)
    scheduler = WarmupLinearSchedule(optimizer,
                                     warmup_steps=warmup_steps,
                                     t_total=t_total)

    #*****训练过程相关信息*****
    logger.info("***** Running training *****")
    logger.info("  Num examples = %d", len(train_data))
    logger.info("  Num Epochs = %d", args.num_train_epochs)
    logger.info("  Instantaneous batch size per GPU = %d",
                args.train_batch_size)
    logger.info("  Gradient Accumulation steps = %d",
                args.gradient_accumulation_steps)
    logger.info("  Total optimization steps = %d", t_total)

    #*****开始训练*****
    tr_loss, logging_loss = 0.0, 0.0

    model.zero_grad()
    seed_everything(args.seed)

    for num in range(args.num_train_epochs):
        train_all_steps = 0
        train_steps = []
        train_losses = []

        global_step = 0
        logger.info(f'****************Train epoch-{num}****************')
        pbar = ProgressBar(n_total=len(train_dataloader), desc='Train')
        for step, batch in enumerate(train_dataloader):
            #***存储step用于绘制Loss曲线***
            train_all_steps += 1
            train_steps.append(train_all_steps)

            model.train()

            #***输入模型进行计算***
            batch = tuple(t.to(device) for t in batch)
            inputs = {
                'input_ids': batch[0],
                'attention_mask': batch[1],
                'token_type_ids': batch[2],
                'labels': batch[3]
            }
            outputs = model(
                **inputs)  #模型原文件中已经使用损失函数对输出值和标签值进行了计算,返回的outputs中包含损失函数值

            #***损失函数值反向传播***
            loss = outputs[0]
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                           args.max_grad_norm)  #梯度裁剪

            #***存储loss用于绘制loss曲线***
            train_losses.append(loss.detach().cpu().numpy())

            #***优化器进行优化***
            pbar(step, {'loss': loss.item()})
            tr_loss += loss.item()
            if (step + 1) % args.gradient_accumulation_steps == 0:
                optimizer.step()  #优化器优化
                scheduler.step()  #学习率机制更新
                model.zero_grad()
                global_step += 1

        #训练一个epoch保存一个模型
        output_dir = os.path.join(args.output_dir,
                                  f'model_checkpoint_epoch_{num}')
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        print('')  #避免输出信息都在同一行
        # logger.info(f'save model checkpoint-{global_step} to {output_dir} ')
        model.save_pretrained(output_dir)  #保存模型

        #***训练一个epoch绘制一个Loss曲线***
        trainloss.train_loss(steps=train_steps,
                             losses=train_losses,
                             epoch=num,
                             args=args,
                             type='train',
                             max_step=train_all_steps)

        #*****一个epoch训练结束以后,进行验证*****
        print('')
        logger.info(f'****************Valid epoch-{num}****************')
        logger.info("  Num examples = %d", len(valid_data))
        logger.info("  Batch size = %d", args.valid_batch_size)
        valid_steps, valid_losses, valid_all_steps = valid(
            args=args,
            model=model,
            device=device,
            valid_data=valid_data,
            valid_dataloader=valid_dataloader)
        trainloss.train_loss(steps=valid_steps,
                             losses=valid_losses,
                             epoch=num,
                             args=args,
                             type='valid',
                             max_steps=valid_all_steps)

        #每训练一个epoch清空cuda缓存
        if 'cuda' in str(device):
            torch.cuda.empty_cache()
# In[ ]:

import numpy as np
import json
from sklearn.model_selection import train_test_split

import tensorflow as tf
from transformers import BertTokenizer, TFBertForSequenceClassification, glue_convert_examples_to_features

from transformers.configuration_bert import BertConfig

# In[ ]:

tokenizer = BertTokenizer('../models/BERT-vocab1.dms')

config = BertConfig.from_json_file('../models/BERT-config0.json')

model = TFBertForSequenceClassification.from_pretrained(
    '../models/BERT-transfer1', config=config)

# In[ ]:

fname = '../data/prelabeled/test47_even.tfrecord'
# BATCH_SIZE = 64
feat_spec = {
    'idx': tf.io.FixedLenFeature([], tf.int64),
    'sentence': tf.io.FixedLenFeature([], tf.string),
    'label': tf.io.FixedLenFeature([], tf.int64)
}

Example #19
0
def main():
    parser = ArgumentParser()
    parser.add_argument("--model_name", type=str, required=True,
                        choices=["GMMBert", "LogBert", "ExpBert", "FlowBert", "DisBert"])

    
    parser.add_argument("--dataset", type=str, required=True,
                        choices=["fin-all", "fin-dol", "sci-doc"])
    
    parser.add_argument('--saved_checkpoint', type=str, default=None, required=False)

    parser.add_argument("--bert_model", type=str, default='bert-base-uncased', 
                            help="Bert pre-trained model selected in the list: bert-base-uncased, "
                             "bert-large-uncased, bert-base-cased, bert-base-multilingual, bert-base-chinese.")

    parser.add_argument('--do_lower_case', type=str_to_bool, default=True, help="Lower case the text and model.")

    parser.add_argument('--do_pretrain', type=str_to_bool, default=True, help="Use a pretrained Bert Parameters.")
    parser.add_argument('--do_pretrain_wpe', type=str_to_bool, default=True, help="Use a pretrained Bert Parameters only for wpe embeddings")
    

    parser.add_argument('--log_criterion', type=str, default='L1',  choices=["L1", "L2", ''], help="Loss function to use for LogBert")

    parser.add_argument('--do_gmm', type=str_to_bool, default=False, help="Use the Gaussian mixture model components.")
    parser.add_argument('--do_log', type=str_to_bool, default=False, help="Do L2 over the numbers in logspace")
    parser.add_argument('--do_dis', type=str_to_bool, default=False, help="Discriminative baseline")
    parser.add_argument('--do_anomaly', type=str_to_bool, default=True, help="Do anomaly evaluation")

    parser.add_argument('--do_exp', type=str_to_bool, default=False, help="Latent Exponent Model")
    parser.add_argument('--exp_truncate', type=str_to_bool, default=True, help="Use a truncated normal distribution.")
    
    
    parser.add_argument('--do_flow', type=str_to_bool, default=False, help="Do flow over the numbers in logspace")
    parser.add_argument('--flow_criterion', type=str, default='L1',  choices=["L1", "L2", ''], help="Loss function to use for 'Flow'Bert")
    parser.add_argument('--flow_v', type=str, default='',  choices=['1a', '1b', '2a', '2b', ''], help="Mode for 'Flow'Bert")
    parser.add_argument('--flow_fix_mu', type=str_to_bool, default=False, help="Use a fixed mu for flow model")
    parser.add_argument("--flow_scale", type=float, default=10.0)

    parser.add_argument("--exp_logvar_scale", type=float, default=-5.0)
    parser.add_argument("--exp_logvar", type=str_to_bool, default=False)

    parser.add_argument("--drop_rate", type=float, default=0.0, help='Droprate of 0 is no droprate')

    parser.add_argument("--do_eval", type=str_to_bool, default=False)
    parser.add_argument("--do_test", type=str_to_bool, default=False)

    parser.add_argument("--reduce_memory", action="store_true",
                        help="Store training data as on-disc memmaps to massively reduce memory usage")

    parser.add_argument("--patience", type=int, default=3, help="Number of early stop epochs patience ")
    parser.add_argument("--epochs", type=int, default=10, help="Number of epochs to train for")
    parser.add_argument("--no_cuda",
                        action='store_true',
                        help="Whether not to use CUDA when available")
    parser.add_argument('--gradient_accumulation_steps',
                        type=int,
                        default=1,
                        help="Number of updates steps to accumulate before performing a backward/update pass.")
    parser.add_argument("--train_batch_size",
                        default=32,
                        type=int,
                        help="Total batch size for training.")
    parser.add_argument("--eval_batch_size",
                        default=512,
                        type=int,
                        help="Total batch size for training.")
    parser.add_argument("--warmup_steps",
                        default=0,
                        type=int,
                        help="Linear warmup over warmup_steps.")
    parser.add_argument("--adam_epsilon",
                        default=1e-8,
                        type=float,
                        help="Epsilon for Adam optimizer.")
    
    parser.add_argument("--lr_bert", default=3e-5, type=float, help="The initial learning rate for Adam for bert params")
    parser.add_argument("--lr_mlp", default=3e-5, type=float)

    parser.add_argument("--weight_decay",
                        default=0.01,
                        type=float,
                        help="Adam's weight l2 regularization")
    parser.add_argument("--clip_grad",
                        default=5,
                        type=float,
                        help="The initial learning rate for Adam.")
    parser.add_argument('--seed',
                        type=int,
                        default=42,
                        help="random seed for initialization")


    parser.add_argument('--gmm_crossentropy', type=str_to_bool, default=False, help="GMM Crossentropy.")
    parser.add_argument('--gmm_exponent', type=str_to_bool, default=True, help="Instead of Kernels use powers of 10")
    parser.add_argument('--gmm_nmix',
                        type=int,
                        default=31,
                        help="number of mixtures used only for gmm. [1,3,7,15,31,63,127,255,511]")
    parser.add_argument('--optim', type=str, default='sgd',  choices=['sgd', 'adam'], help="Loss function to use for LogBert")
    
    parser.add_argument('--min_exponent', type=int, default=-1, help="min exponent size")
    parser.add_argument('--max_exponent', type=int, default=16, help="max exponent size")
    parser.add_argument('--n_exponent', type=int, default=17, help="sum of min and max")
    
    parser.add_argument('--embed_exp', type=str_to_bool, default=False, help="Learn an input exponent embedding")
    parser.add_argument('--embed_exp_opt', type=str, default='high', choices=['low', 'high', ''], help="high or low learning rate for embeddings")

    parser.add_argument('--embed_digit', type=str_to_bool, default=False, help="Learn in input embedding of numbers using LSTM over digits")
    parser.add_argument('--output_embed_exp', type=str_to_bool, default=False, help="Learn in input embedding and attach after Bert")
    parser.add_argument('--zero_init', type=str_to_bool, default=False, help="Start non pretrained embeddings at zero")

    
    parser.add_argument("--n_digits", type=int, default=14, help="Size of digit vocab includes e.+-")
    parser.add_argument("--ez_digits", type=int, default=32, help="Digit embedding size")


    args = parser.parse_args()

    args.pregenerated_data = Path(PREGENERATED_DATA[args.dataset])
    args.output_dir = Path(f'{CHECKPOINT_PATH}/{args.dataset}')
    
    sanity_check(args)

    args.savepath = args.output_dir
    
    if args.saved_checkpoint is not None:
        args.output_dir = Path(args.saved_checkpoint)
        args.run_name = args.output_dir.stem
        num_data_epochs = 1
    else:
        args.output_dir, args.run_name = build_savepath(args)

    print('dataset', args.dataset)
    print('output_dir', args.output_dir)
    print('pregenerated_data', args.pregenerated_data)
    print('run_name', args.run_name)
    
    wandb.init(project="mnm-paper", name=f'{args.run_name}')
    wandb.config.update(args, allow_val_change=True)

    samples_per_epoch = []
    for i in range(args.epochs):
        epoch_file = args.pregenerated_data / f"train_epoch_{i}.json"
        metrics_file = args.pregenerated_data / f"train_epoch_{i}_metrics.json"
        if epoch_file.is_file() and metrics_file.is_file():
            metrics = json.loads(metrics_file.read_text())
            samples_per_epoch.append(metrics['num_training_examples'])
        else:
            if i == 0:
                print(f'epoch_file:{epoch_file}')
                exit("No training data was found!")
            print(f"Warning! There are fewer epochs of pregenerated data ({i}) than training epochs ({args.epochs}).")
            print("This script will loop over the available data, but training diversity may be negatively impacted.")
            num_data_epochs = i
            break
    else:
        num_data_epochs = args.epochs

    device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
    n_gpu = torch.cuda.device_count()
    
    logging.info("device: {} n_gpu: {}".format(
        device, n_gpu))

    
    args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if n_gpu > 0:
        torch.cuda.manual_seed_all(args.seed)


    if args.output_dir.is_dir() and list(args.output_dir.iterdir()):
        logging.warning(f"Output directory ({args.output_dir}) already exists and is not empty!")
    args.output_dir.mkdir(parents=True, exist_ok=True)


    total_train_examples = 0
    for i in range(args.epochs):
        # The modulo takes into account the fact that we may loop over limited epochs of data
        total_train_examples += samples_per_epoch[i % len(samples_per_epoch)]

    num_train_optimization_steps = int(
        total_train_examples / args.train_batch_size / args.gradient_accumulation_steps)

    # Prepare model
    NumberBertModel = get_model(args)


    if args.do_test:
        best_model, tokenizer, best_path = load_best(args)    
        global_step = 0
        train_numbers = get_numbers_from_split(args, tokenizer, device, num_data_epochs, split='train')
        train_mean, train_median = np.mean(train_numbers), np.median(train_numbers)
        
        best_model.to(device)
        best_model.eval()
        
        if args.do_dis:
            test_metrics = evaluate_discriminative(args, best_model, tokenizer, device, global_step, 'test', train_mean, train_median, train_numbers)
        else:
            test_metrics = evaluation(args, best_model, tokenizer, device, global_step, 'test', train_mean, train_median, train_numbers)
        save_results(best_path, test_metrics)
        save_args(best_path, args)
        return

    early_stopper = EarlyStopping('valid_one_loss', min_delta=0.0,
                                patience=args.patience, monitor_mode='min')

    if args.saved_checkpoint is not None:
        print('args.saved_checkpoint', args.saved_checkpoint)
        tokenizer = BertNumericalTokenizer.from_pretrained(args.saved_checkpoint)
        model = NumberBertModel.from_pretrained(args.saved_checkpoint, args=args)
        #uncomment this
        train_numbers = get_numbers_from_split(args, tokenizer, device, num_data_epochs, split='train')
    else:
        tokenizer = BertNumericalTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case)
        train_numbers = get_numbers_from_split(args, tokenizer, device, num_data_epochs, split='train')
        # old_save_dir = None

        if args.do_pretrain:
            model = NumberBertModel.from_pretrained(args.bert_model, args=args)
        else:
            config = BertConfig.from_json_file('./bert-base-uncased-config.json')
            model = NumberBertModel(config, args)

            if args.do_pretrain_wpe:
                pre_model = NumberBertModel.from_pretrained(args.bert_model, args=args)
                # pretrained_dict = 
                pretrained_dict = pre_model.state_dict()
                # print('pretrained_dict', pretrained_dict)
                
                pretrained_dict = {k: v for k, v in pretrained_dict.items() if 'embedding' in k}

                model_dict = model.state_dict()

                pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
                # 2. overwrite entries in the existing state dict
                model_dict.update(pretrained_dict) 
                # 3. load the new state dict
                model.load_state_dict(model_dict)

        
        if args.do_gmm:
            kernel_locs, kernel_scales = get_gmm_components(args, train_numbers)
            model.set_kernel_locs(kernel_locs, kernel_scales)

        special_tokens_dict = {'additional_special_tokens': ('[UNK_NUM]',)}
        num_added_toks = tokenizer.add_special_tokens(special_tokens_dict)
        print('We have added', num_added_toks, 'tokens')
        model.resize_token_embeddings(len(tokenizer))
        # model.set_params(args)

    def set_dropout(model, drop_rate):
        for name, child in model.named_children():
            if isinstance(child, torch.nn.Dropout):
                child.p = drop_rate
            set_dropout(child, drop_rate=drop_rate)
    set_dropout(model, drop_rate=args.drop_rate)


    wandb.watch(model, log="all")
    model.to(device)

    # Prepare optimizer
    param_optimizer = list(model.named_parameters())

    optimizer_grouped_parameters = set_lr(args, param_optimizer)
    
    if args.optim == 'sgd':
        optimizer = torch.optim.SGD(optimizer_grouped_parameters, lr=args.lr_bert)
    elif args.optim == 'adam':
        optimizer = torch.optim.AdamW(optimizer_grouped_parameters, lr=args.lr_bert, eps=args.adam_epsilon)
    
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=args.warmup_steps,
                                                 num_training_steps=num_train_optimization_steps)

    global_step = 0
    logging.info("***** Running training *****")
    logging.info(f"  Num examples = {total_train_examples}")
    logging.info("  Batch size = %d", args.train_batch_size)
    logging.info("  Num steps = %d", num_train_optimization_steps)

    train_mean, train_median = np.mean(train_numbers), np.median(train_numbers)
    
    if args.do_eval:
        model.eval()
        if args.do_dis:
            train_epoch_metrics = evaluate_discriminative(args, model, tokenizer, device, global_step, 'train', train_mean, train_median, train_numbers)
            valid_epoch_metrics = evaluate_discriminative(args, model, tokenizer, device, global_step, 'valid', train_mean, train_median, train_numbers)
        else:
            # evaluation(args, model, tokenizer, device, global_step, 'train', train_mean, train_median, train_numbers)
            # valid_epoch_metrics = evaluation(args, model, tokenizer, device, global_step, 'valid', train_mean, train_median, train_numbers)
            
            #EMNLP FINAL
            test_metrics = evaluation(args, model, tokenizer, device, global_step, 'test', train_mean, train_median, train_numbers)
        return



    model.train()
    global_step = train_loop(args, model, optimizer, scheduler, tokenizer, device, optimizer_grouped_parameters, early_stopper,
        train_numbers, train_mean, train_median, global_step, n_gpu,
        num_data_epochs)

    del model
    best_model, tokenizer, best_path = load_best(args)    
    best_model.to(device)
    best_model.eval()
    if args.do_dis:
        test_metrics = evaluate_discriminative(args, best_model, tokenizer, device, global_step, 'test', train_mean, train_median, train_numbers)
    else:
        test_metrics = evaluation(args, best_model, tokenizer, device, global_step, 'test', train_mean, train_median, train_numbers)
    save_results(best_path, test_metrics)
    save_args(best_path, args)

    #flush check
    wandb.log({})
Example #20
0
        end_logits = end_logits.squeeze(-1)

        outputs = (
            start_logits,
            end_logits,
        ) + outputs[2:]
        if start_positions is not None and end_positions is not None:
            # If we are on multi-GPU, split add a dimension
            if len(start_positions.size()) > 1:
                start_positions = start_positions.squeeze(-1)
            if len(end_positions.size()) > 1:
                end_positions = end_positions.squeeze(-1)
            # sometimes the start/end positions are outside our model inputs, we ignore these terms
            ignored_index = start_logits.size(1)
            start_positions.clamp_(0, ignored_index)
            end_positions.clamp_(0, ignored_index)

            loss_fct = CrossEntropyLoss(ignore_index=ignored_index)
            start_loss = loss_fct(start_logits, start_positions)
            end_loss = loss_fct(end_logits, end_positions)
            total_loss = (start_loss + end_loss) / 2
            outputs = (total_loss, ) + outputs

        return outputs  # (loss), start_logits, end_logits, (hidden_states), (attentions)


if __name__ == '__main__':
    from transformers import BertConfig
    config = BertConfig()

    encoder = BertEncoder(config=config)
Example #21
0
val_clean_ds = val_parse_ds
test_clean_ds = test_parse_ds

with open('data/info.json') as json_file:
    data_info = json.load(json_file)
train_examples = data_info['train_length']
valid_examples = data_info['validation_length']
test_examples = data_info['test_length']

USE_XLA = False
USE_AMP = False
tf.config.optimizer.set_jit(USE_XLA)
tf.config.optimizer.set_experimental_options({"auto_mixed_precision": USE_AMP})

tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
config = BertConfig("bert_config.json")
model = TFBertForSequenceClassification.from_pretrained('bert-base-cased',
                                                        config=config)

#Training Dataset
train_dataset = glue_convert_examples_to_features(examples=tr_clean_ds,
                                                  tokenizer=tokenizer,
                                                  max_length=128,
                                                  task='sst-2',
                                                  label_list=['1', '3'])
train_dataset = train_dataset.shuffle(train_examples).batch(BATCH_SIZE).repeat(
    -1)

#Validation Dataset
valid_dataset = glue_convert_examples_to_features(examples=val_clean_ds,
                                                  tokenizer=tokenizer,
Example #22
0
        path_input_test_data = "../module_dataset/dataset/dataset_preprocess/pair_sequence/test_data/" \
                               "private_test_pair_without_punc.csv"

        no_cuda = False
        n_gpu = 1
        device = "cuda:0"
        seed = 42

        max_seq_length = 400
        max_query_length = 64
        weight_class = [1, 1]

    args = Args()

    device = torch.device(args.device)
    tokenizer = BertTokenizer.from_pretrained(args.folder_model,
                                              do_lower_case=args.do_lower_case)

    config = BertConfig.from_pretrained(args.folder_model)

    # # custom some parameter for custom bert
    config = config.to_dict()
    config.update({"device": args.device})
    config = BertConfig.from_dict(config)

    model = BERTQa.from_pretrained(args.folder_model, config=config)

    model = model.to(device)
    get_predict_dl(model, tokenizer, args)
Example #23
0
    def loss(self, input_ids, attention_mask, token_type_ids, label):
        target = label

        final_output = self.compute(input_ids, attention_mask, token_type_ids)
        if self.use_pooler:
            logits = self.qa_outputs(final_output)
        else:
            logits = self.qa_outputs_cat(final_output)

        class_weights = torch.FloatTensor(self.weight_class).to(self.device)
        loss = F.cross_entropy(logits, target, weight=class_weights)

        predict_value = torch.max(logits, 1)[1]
        list_predict = predict_value.cpu().numpy().tolist()
        list_target = target.cpu().numpy().tolist()

        return loss, list_predict, list_target


if __name__ == '__main__':
    from transformers.configuration_bert import BertConfig

    config = BertConfig.from_pretrained("bert-base-multilingual-uncased",
                                        cache_dir="../resources/cache_model")
    config = config.to_dict()
    config.update({"weight_class": [1, 1]})
    config = BertConfig.from_dict(config)
    # model = BERTQa.from_pretrained("bert-base-multilingual-uncased",
    #                                cache_dir="../resources/cache_model", config=config)
Example #24
0
#%%
import torch
from torch.optim import Adam
from transformers.configuration_albert import AlbertConfig
from transformers.configuration_bert import BertConfig
from src.dataloader.Dataset import EETaskDataloader
from src.dataloader.utils import load_data
from src.model.AlbertCRF import AlbertCrfForNer
from src.model.BertCRF import BertCrfForNer
from src.model.BertSoftMax import BertSoftmaxForNer
from src.util.EETaskRun import Run
from src.util.extract_arguments import extract_arguments_crf, extract_arguments_softmax
from src.util.utils import lcs
#%%
config = BertConfig.from_pretrained(
    r"/home/longred/lic2020_baselines/chinese_L-12_H-768_A-12/bert-base-chinese-config.json"
)
config.pretrained_path = r"/home/longred/lic2020_baselines/chinese_L-12_H-768_A-12/bert-base-chinese-pytorch_model.bin"
config.vocab_path = r"/home/longred/lic2020_baselines/chinese_L-12_H-768_A-12/vocab.txt"
config.train_data_path = r"/home/longred/EETask/data/train.json"
config.batch_size = 32
config.event_schema_path = r"/home/longred/EETask/data/event_schema.json"

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
EE = EETaskDataloader(config)
train_loader = EE.get_train_data_loader()
config.num_labels = EE.num_labels
config.label2id = EE.label2id
data = load_data("/home/longred/EETask/data/dev.json")
model = BertCrfForNer.from_pretrained(
    pretrained_model_name_or_path=config.pretrained_path,