Python BertClassifier.to Examples

Programming Language: Python

Namespace/Package Name: model

Class/Type: BertClassifier

Method/Function: to

Examples at hotexamples.com: 5

Python BertClassifier.to - 5 examples found. These are the top rated real world Python examples of model.BertClassifier.to extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

BertClassifier(23)

parameters(10)

load_state_dict(7)

eval(6)

to(5)

state_dict(4)

train(3)

named_parameters(2)

zero_grad(2)

cuda(1)

infer(1)

predict_on_batch(1)

train_on_batch(1)

validate_on_batch(1)

Example #1

Show file

File: inference.py Project: dtxwhzw/STAT3007

class Predictor(object):
    def __init__(self, args):
        pretrain_name = 'bert-base-cased'
        if args.model_info.bert_path:
            pretrain_name = args.model_info.bert_path
        self.tokenizer = BertTokenizer.from_pretrained(pretrain_name)
        print(f"Tokenizer from:{pretrain_name}")
        train_conf = args.train_info
        model_conf = args.model_info
        self.device = train_conf.device
        self.class_num = model_conf.class_num
        self.model = BertClassifier(model_conf)
        self.model.load_state_dict(
            torch.load(train_conf.model_path,
                       map_location=torch.device(self.device)))
        self.model.to(self.device)
        self.lr = train_conf.lr
        self.max_len = train_conf.max_seq_len
        self.conf = args
        self.label_map = json.load(open(args.label_map_path))
        self.id2label = dict([(i, label_str)
                              for label_str, i in self.label_map.items()])
        self.softmax = Softmax(dim=1)

    def predict(self, sens):
        d_loader = self.sen_2_dl(sens)
        y_pred = list()
        with torch.no_grad():
            for batch in d_loader:
                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                logits = self.model(input_ids, attention_mask)
                logits = torch.sigmoid(logits)
                y_pred.append(logits)
        y_pred = torch.cat(y_pred)
        y_pred = y_pred.cpu().numpy()
        res = list()
        for y in y_pred:
            res.append(self._score_2_dict(y))
        return res

    def _score_2_dict(self, single_pred):
        res = dict()
        for i, score in enumerate(single_pred):
            label_str = self.id2label[i]
            res[label_str] = float(score)
        return res

    def sen_2_dl(self, sens):
        texts = [i.strip() for i in sens]
        labels = [
            999
        ]  # this is a invalid parameter but dataloader needs the this
        ds = SentimentDataset(self.tokenizer, texts, labels, self.max_len)
        _loader = dataloader.DataLoader(
            ds, batch_size=self.conf.train_info.batch_size, shuffle=False)
        return _loader

Example #2

Show file

File: evaluate.py Project: dtxwhzw/STAT3007

class Evaluator(object):
    def __init__(self, args):
        pretrain_name = 'bert-base-cased'
        if args.model_info.bert_path:
            pretrain_name = args.model_info.bert_path
        print(f"Tokenizer from:{pretrain_name}")
        train_conf = args.train_info
        model_conf = args.model_info
        self.model_type = model_conf.model
        if self.model_type == 'bert_seq':
            self.model = BertClassifier(model_conf)
            self.tokenizer = BertTokenizer.from_pretrained(pretrain_name)
            self.ds = SentimentDataset
        if self.model_type == 'GPT2':
            self.model = GPT2Classifier(model_conf)
            self.tokenizer = GPT2Tokenizer.from_pretrained(pretrain_name)
            self.ds = GPT2Dataset
        self.model.load_state_dict(torch.load(train_conf.model_path))
        self.device = train_conf.device
        self.class_num = model_conf.class_num
        self.model.to(self.device)
        self.lr = train_conf.lr
        self.max_len = train_conf.max_seq_len
        self.conf = args
        self.label_map = json.load(open(args.label_map_path))
        self.id2label = dict([(i, label_str)
                              for label_str, i in self.label_map.items()])

    def run(self, batch_size=64):
        test_path = self.conf.train_info.test_path
        test_loader = self.get_data_loader(test_path, batch_size)
        acc, recall, f1_score, cm, report, res = self.evaluate(test_loader)
        print(f"Accuracy score of the model is {acc}")
        print(f"Recall score of the model is {recall}")
        print(f"F1 score of the model is {f1_score}")
        print(f"Confusion matrix of the model is {cm}")
        print(report)
        dir_ = os.path.dirname(test_path)
        dir_ = os.path.dirname(dir_)
        dir_ = os.path.split(dir_)[0]
        new_path = os.path.join(dir_, 'logs', 'bad_case.json')
        f = open(new_path, 'w')
        for i in res:
            print(json.dumps(i, ensure_ascii=False), file=f)

    def evaluate(self, _loader):
        self.model.eval()
        y_true = list()
        y_pred = list()
        res = []
        with torch.no_grad():
            for batch in _loader:
                input_ids = batch['input_ids'].to(self.device)
                attention_mask = batch['attention_mask'].to(self.device)
                y = batch['labels']
                y = torch.squeeze(y, 1)
                y = y.to(self.device)
                logits = self.model(input_ids, attention_mask)
                y_true.append(y)
                y_pred.append(logits)
                pred_labels = torch.argmax(logits, dim=1)
                preds = pred_labels.cpu().numpy()
                true = batch['labels'].squeeze().numpy()
                if len(true) < 1:
                    continue
                for i, c_y in enumerate(true):
                    if c_y != preds[i]:
                        tmp_dict = {
                            'true_label': self.id2label[c_y],
                            'pred_label': self.id2label[preds[i]],
                            'text': batch['text'][i]
                        }
                        res.append(tmp_dict)
            y_true = torch.cat(y_true)
            y_pred = torch.cat(y_pred)
        cm = metrics.cal_cm(y_true, y_pred)
        acc_score = metrics.cal_accuracy(y_true, y_pred)
        recall = metrics.cal_recall(y_true, y_pred)
        f1_score = metrics.cal_f1(y_true, y_pred)
        label_range = [i for i in range(len(self.label_map))]
        target_name = [
            x[0] for x in sorted(self.label_map.items(), key=lambda x: x[1])
        ]
        report = metrics.get_classification_report(y_true, y_pred, label_range,
                                                   target_name)
        return acc_score, recall, f1_score, cm, report, res

    def get_data_loader(self, f_path, batch_size):
        np.random.seed(14)
        texts, labels = prepare(f_path, self.label_map)
        ds = self.ds(self.tokenizer, texts, labels, self.max_len)
        return dataloader.DataLoader(ds,
                                     batch_size=batch_size,
                                     num_workers=self.conf.num_workers,
                                     shuffle=True)

Example #3

Show file

File: main.py Project: hflserdaniel/emoji-prediction

        total_count += gt.shape[0]
        total_loss.append(criterion(preds, labels).item())

    loss, acc = np.array(total_loss).mean(), total_correct / total_count
    print("Average Loss: {:.6f}, Accuracy: {:.6f}".format(loss, acc))
    return loss, acc


device = 'cuda' if torch.cuda.is_available() else 'cpu'
epochs = 30
best_acc = 0.0
eval_losses, eval_accs = [], []
train_losses, train_accs = [], []

model = BertClassifier(freeze_bert=False)
model = model.to(device)
# model = nn.DataParallel(model)

train_dataset = EmojiDataset('../../data/train_bert_sentences.npy',
                             '../../data/train_bert_labels.npy')
train_dataloader = DataLoader(train_dataset,
                              batch_size=64,
                              shuffle=False,
                              collate_fn=collate_fn)

test_dataset = EmojiDataset('../../data/test_bert_sentences.npy',
                            '../../data/test_bert_labels.npy')
test_dataloader = DataLoader(test_dataset,
                             batch_size=128,
                             shuffle=False,
                             collate_fn=collate_fn)

Example #4

Show file

File: predict.py Project: Xilixili/CCF-data-competetion

import pandas as pd

labels = ['体育', '娱乐', '家居', '房产', '教育', '时尚', '时政', '游戏', '科技', '财经']

bert_config = BertConfig.from_pretrained('chinese_wwm_pytorch')
bert_config.num_labels = len(labels)
model = BertClassifier(bert_config)
model.load_state_dict(
    torch.load('./best_model_on_trainset.pkl',
               map_location=torch.device('cpu')))

tokenizer = BertTokenizer(vocab_file='chinese_wwm_pytorch/vocab.txt')

device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')
model = torch.nn.DataParallel(model, device_ids=[2])
model.to(device)


def predict_text(text):
    input_id, attention_mask, token_type_id = get_bert_input(text, tokenizer)

    input_id = torch.tensor([input_id], dtype=torch.long)
    attention_mask = torch.tensor([attention_mask], dtype=torch.long)
    token_type_id = torch.tensor([token_type_id], dtype=torch.long)

    predicted = model(
        input_id,
        attention_mask,
        token_type_id,
    )
    pred_label = torch.argmax(predicted, dim=1)

Example #5

Show file

File: app.py Project: msgolovina/Toxic

    attention_mask = (x != 0).float().to(config.DEVICE).long()
    outputs = MODEL(x, attention_mask=attention_mask)
    return outputs.cpu().detach().numpy()


@app.route('/predict')
def predict():
    comment = request.args.get('comment')
    start_time = time.time()
    prediction = comment_prediction(comment)
    response = {
        'response': {
            label: str(prob)
            for label, prob in zip(config.CLASS_COLS, prediction[0])
        }
    }
    response['response']['comment'] = comment
    response['response']['time_taken'] = str(time.time() - start_time)

    return flask.jsonify(response)


if __name__ == '__main__':
    bert_config = BertConfig.from_pretrained(config.BERT_NAME)
    bert_config.num_labels = config.NUM_CLASSES
    MODEL = BertClassifier(bert_config)
    MODEL.load_state_dict(torch.load(config.TRAINED_MODEL_PATH))
    MODEL.to(config.DEVICE)
    MODEL.eval()
    app.run(host=config.HOST, port=config.PORT)