Ejemplo n.º 1
0
 def __init__(self, bert, freeze_bert=False):
     super(RegressionBert, self).__init__()
     self.bert = BertModel.from_pretrained(bert)
     if freeze_bert:
         for p in self.bert.parameters():
             p.requires_grad = False
     self.fc0 = torch.nn.Linear(768, 1)
Ejemplo n.º 2
0
    def __init__(self): 
        super(Bert, self).__init__()

        self.tokenizer = BertTokenizer.from_pretrained(os.path.join(config.get('model_config')['language_model_path'], 'bert-base-uncased-vocab.txt'))
        modelConfig = BertConfig.from_pretrained(os.path.join(config.get('model_config')['language_model_path'], 'bert_config.json'))
        self.textExtractor = BertModel.from_pretrained(
            os.path.join(config.get('model_config')['language_model_path'], 'pytorch_model.bin'), config=modelConfig)
    def __init__(self, opt):
        self.opt = opt

        if 'bert' in opt.model_name:
            tokenizer = Tokenizer4Bert(opt.max_seq_len,
                                       opt.pretrained_bert_name)
            bert = BertModel.from_pretrained(opt.pretrained_bert_name)
            self.pretrained_bert_state_dict = bert.state_dict()
            self.model = opt.model_class(bert, opt).to(opt.device)
        else:
            tokenizer = build_tokenizer(
                fnames=[opt.dataset_file['train'], opt.dataset_file['test']],
                max_seq_len=opt.max_seq_len,
                dat_fname='{0}_tokenizer.dat'.format(opt.dataset))
            embedding_matrix = build_embedding_matrix(
                word2idx=tokenizer.word2idx,
                embed_dim=opt.embed_dim,
                dat_fname='{0}_{1}_embedding_matrix.dat'.format(
                    str(opt.embed_dim), opt.dataset))
            self.model = opt.model_class(embedding_matrix, opt).to(opt.device)

        self.trainset = ABSADataset(opt.dataset_file['train'], tokenizer)
        self.testset = ABSADataset(opt.dataset_file['test'], tokenizer)

        if opt.device.type == 'cuda':
            logger.info('cuda memory allocated: {}'.format(
                torch.cuda.memory_allocated(device=opt.device.index)))
        self._print_args()
Ejemplo n.º 4
0
def createCsvData():
    config = BertConfig.from_pretrained('bert-base-uncased')
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    model = BertModel(config)
    with Cd("lemmadata"):
        with open("id_to_sent.json") as sent_id_dict_file:
            sent_id_dict = json.load(sent_id_dict_file)
        for dir_item in os.listdir():
            if os.path.isfile(dir_item):
                if dir_item.endswith(".json") and dir_item != "id_to_sent.json":
                    print(dir_item)
                    with open(dir_item, "r") as f:
                        lemma_data = json.load(f)
                    with Cd("vectors"):
                        with open(dir_item[:-5]+".csv", "w") as vector_file:
                            writer = csv.writer(vector_file, delimiter=",")
                            for instance in lemma_data:
                                inst_sent_id = instance["sent_id"]
                                inst_sense = instance["sense"]
                                inst_sent = sent_id_dict[str(inst_sent_id)]
                                if(len(inst_sent) > 511):
                                    continue 
                                vector = vectorizeWordInContext(inst_sent, instance["pos"], tokenizer, model)
                                vec_list = vector.detach().tolist()
                                row_data = [inst_sent_id, instance["pos"], inst_sense] + vec_list
                                writer.writerow(row_data)
def get_bert(bert_type='bert'):
    tokenizer, model = None, None
    if (bert_type == 'bert'):
        ######## bert ###########

        tokenizer = BertTokenizer.from_pretrained('bert-base-cased', do_lower_case=False)
        model = BertModel.from_pretrained('bert-base-cased')

        ########################

    if (bert_type == 'biobert'):
        #### Bio BERT #########

        model = bm.from_pretrained('biobert_v1.1_pubmed')
        tokenizer = BertTokenizer(vocab_file="biobert_v1.1_pubmed/vocab.txt", do_lower_case=False)

        #### Bio BERT #########

    if (bert_type == 'scibert'):
        #### sci bert #########


        config = AutoConfig.from_pretrained('allenai/scibert_scivocab_cased', output_hidden_states=False)
        tokenizer = AutoTokenizer.from_pretrained('allenai/scibert_scivocab_cased', do_lower_case=False)
        model = AutoModel.from_pretrained('allenai/scibert_scivocab_cased', config=config)

        #######################

    return tokenizer, model
Ejemplo n.º 6
0
    def __init__(self, opt):
        self.opt = opt
        if 'bert' in opt.model_name:
            tokenizer = Tokenizer4Bert(opt.max_seq_len, opt.pretrained_bert_name)
            bert = BertModel.from_pretrained(opt.pretrained_bert_name)
            self.model = opt.model_class(bert, opt).to(opt.device)
        else:
            tokenizer = build_tokenizer(
                fnames=[opt.dataset_file['train'], opt.dataset_file['test']],
                max_seq_len=opt.max_seq_len,
                dat_fname='{0}_tokenizer.dat'.format(opt.dataset))
            embedding_matrix = build_embedding_matrix(
                word2idx=tokenizer.word2idx,
                embed_dim=opt.embed_dim,
                dat_fname='{0}_{1}_embedding_matrix.dat'.format(str(opt.embed_dim), opt.dataset))
            self.model = opt.model_class(embedding_matrix, opt).to(opt.device)

        # self.trainset = ABSADataset(opt.dataset_file['train'], tokenizer)
        self.testset = ABSADataset(opt.dataset_file['test'], tokenizer)
        # assert 0 <= opt.valset_ratio < 1
        # if opt.valset_ratio > 0:
        #     valset_len = int(len(self.trainset) * opt.valset_ratio)
        #     self.trainset, self.valset = random_split(self.trainset, (len(self.trainset) - valset_len, valset_len))
        # else:
        #     self.valset = self.testset
        #
        # if opt.device.type == 'cuda':
        #     logger.info('cuda memory allocated: {}'.format(torch.cuda.memory_allocated(device=opt.device.index)))

        model_path = 'saved/'+opt.model_name+'.hdf5'
        self.model.load_state_dict(torch.load(model_path))
Ejemplo n.º 7
0
    def __init__(self, args, device, checkpoint=None, bert_from_extractive=None):
        super(AbsSummarizer, self).__init__()
        self.args = args
        self.device = device
        self.bert = Bert(args.large, args.temp_dir, args.finetune_bert)

        if bert_from_extractive is not None:
            self.bert.model.load_state_dict(
                dict([(n[11:], p) for n, p in bert_from_extractive.items() if n.startswith('bert.model')]), strict=True)

        if (args.encoder == 'baseline'):
            bert_config = BertConfig(self.bert.model.config.vocab_size, hidden_size=args.enc_hidden_size,
                                     num_hidden_layers=args.enc_layers, num_attention_heads=8,
                                     intermediate_size=args.enc_ff_size,
                                     hidden_dropout_prob=args.enc_dropout,
                                     attention_probs_dropout_prob=args.enc_dropout)
            self.bert.model = BertModel(bert_config)

        if(args.max_pos>512):
            my_pos_embeddings = nn.Embedding(args.max_pos, self.bert.model.config.hidden_size)
            my_pos_embeddings.weight.data[:512] = self.bert.model.embeddings.position_embeddings.weight.data
            my_pos_embeddings.weight.data[512:] = self.bert.model.embeddings.position_embeddings.weight.data[-1][None,:].repeat(args.max_pos-512,1)
            self.bert.model.embeddings.position_embeddings = my_pos_embeddings
        self.vocab_size = self.bert.model.config.vocab_size
        tgt_embeddings = nn.Embedding(self.vocab_size, self.bert.model.config.hidden_size, padding_idx=0)
        if (self.args.share_emb):
            tgt_embeddings.weight = copy.deepcopy(self.bert.model.embeddings.word_embeddings.weight)

        self.decoder = TransformerDecoder(
            self.args.dec_layers,
            self.args.dec_hidden_size, heads=self.args.dec_heads,
            d_ff=self.args.dec_ff_size, dropout=self.args.dec_dropout, embeddings=tgt_embeddings)

        self.generator = get_generator(self.vocab_size, self.args.dec_hidden_size, device, self.args.task)
        self.generator[0].weight = self.decoder.embeddings.weight

        if checkpoint is not None:
            print("Abstractor is loading.")
            self.load_state_dict(checkpoint['model'], strict=True)
        else:
            for module in self.decoder.modules():
                if isinstance(module, (nn.Linear, nn.Embedding)):
                    module.weight.data.normal_(mean=0.0, std=0.02)
                elif isinstance(module, nn.LayerNorm):
                    module.bias.data.zero_()
                    module.weight.data.fill_(1.0)
                if isinstance(module, nn.Linear) and module.bias is not None:
                    module.bias.data.zero_()
            for p in self.generator.parameters():
                if p.dim() > 1:
                    xavier_uniform_(p)
                else:
                    p.data.zero_()
            if(args.use_bert_emb):
                tgt_embeddings = nn.Embedding(self.vocab_size, self.bert.model.config.hidden_size, padding_idx=0)
                tgt_embeddings.weight = copy.deepcopy(self.bert.model.embeddings.word_embeddings.weight)
                self.decoder.embeddings = tgt_embeddings
                self.generator[0].weight = self.decoder.embeddings.weight
        self.to(device)
Ejemplo n.º 8
0
def aspectSentiment_api():
    data = request.json

    opt = get_parameters()
    opt.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    tokenizer = Tokenizer4Bert(opt.max_seq_len, opt.pretrained_bert_name)
    bert = BertModel.from_pretrained(opt.pretrained_bert_name)
    model = AEN_BERT(bert, opt).to(opt.device)

    print('loading model {0} ...'.format(opt.model_name))
    model.load_state_dict(
        torch.load('aen_bert_restaurant_val_acc0.8098',
                   map_location=opt.device))
    model.eval()
    torch.autograd.set_grad_enabled(False)

    out = []
    for entity, sentences in data.items():
        for sentence in sentences:
            sentence_d = {'aspect': '', 'sentiment': '', 'sentence': ''}
            sentiment_d = {-1: 'Negative', 0: 'Neutral', 1: 'Positive'}

            left = sentence['left']
            aspect = sentence['aspect']
            right = sentence['right']
            sentence = left + aspect + right
            text_bert_indices, bert_segments_ids, text_raw_bert_indices, aspect_bert_indices = prepare_data(
                left, aspect, right, tokenizer)

            text_bert_indices = torch.tensor([text_bert_indices],
                                             dtype=torch.int64).to(opt.device)
            bert_segments_ids = torch.tensor([bert_segments_ids],
                                             dtype=torch.int64).to(opt.device)
            text_raw_bert_indices = torch.tensor([text_raw_bert_indices],
                                                 dtype=torch.int64).to(
                                                     opt.device)
            aspect_bert_indices = torch.tensor([aspect_bert_indices],
                                               dtype=torch.int64).to(
                                                   opt.device)

            inputs = [text_raw_bert_indices, aspect_bert_indices]
            outputs = model(inputs)
            t_probs = F.softmax(outputs, dim=-1).cpu().numpy()
            aspect_sentiment_n = t_probs.argmax(axis=-1) - 1
            print(aspect_sentiment_n)
            aspect_sentiment = sentiment_d[aspect_sentiment_n[0]]

            sentence_d['aspect'] = aspect
            sentence_d['sentiment'] = aspect_sentiment
            sentence_d['sentence'] = sentence
            out.append(sentence_d)
    dic = absa_chapter_combined_s(out)
    absaChapterCombinedS = absa_chapter_to_react(dic)
    returnJson = {
        'sentimentTableData': absaChapterCombinedS,
        'absaChapter': dic
    }
    return returnJson
Ejemplo n.º 9
0
 def __init__(self, config, num_labels):
     super(BERTCRF, self).__init__(config)
     self.num_labels = num_labels  # ent label num
     self.bert = BertModel(config)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.fc = nn.Linear(config.hidden_size, num_labels)
     self.crf = CRF(num_labels, batch_first=True)
     self.apply(self.init_bert_weights)
Ejemplo n.º 10
0
 def __init__(self, bert_config, num_choices):
     super(BertCloze, self).__init__(bert_config)
     self.num_choices = num_choices
     self.bert = BertModel(bert_config)
     self.idiom_embedding = nn.Embedding(len(config.idiom2index),
                                         bert_config.hidden_size)
     self.my_fc = nn.Sequential(nn.Dropout(config.hidden_dropout_prob),
                                nn.Linear(bert_config.hidden_size, 1))
Ejemplo n.º 11
0
    def __init__(self, config):
        super(BertForLes, self).__init__(config)
        self.num_labels = config.num_labels

        self.bert = BertModel(config)
        self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels)

        self.apply(self.init_weights)
Ejemplo n.º 12
0
 def __init__(self):
     super().__init__()
     self.bert = BertModel.from_pretrained(
         Config.pretrained_model_name_or_path)
     self.soft_max = nn.Softmax(dim=-1)
     self.fc = nn.Linear(768 * 3, 2)
     self.dropout = nn.Dropout(0.8)
     self.loss = nn.CrossEntropyLoss()
Ejemplo n.º 13
0
 def __init__(self):
     self.tokenizer = BertTokenizer.from_pretrained('bert-base-cased', do_lower_case=False)
     self.model = BertModel.from_pretrained('bert-base-cased', output_hidden_states=True)
     self.model.eval()
     self.pad_token_id = self.tokenizer.encode(self.tokenizer.pad_token)[0]
     self.cls_token_id = self.tokenizer.encode(self.tokenizer.cls_token)[0]
     self.sep_token_id = self.tokenizer.encode(self.tokenizer.sep_token)[0]
     self.device = 'cpu'
Ejemplo n.º 14
0
 def __init__(self, tokenizing_data, device, embedding_path=None):
     # Load pretrained model/tokenizer
     self.device = device
     self.bert_model = BertModel.from_pretrained(
         'bert-base-uncased', output_hidden_states=True).to(self.device)
     self.tokenizing_data = tokenizing_data
     self.embedding_path = embedding_path
     self.songs_features = torch.tensor([])
    def __init__(self, opt):
        super(BertEmbedding, self).__init__()
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased',
                                                       do_lower_case=True)

        # Load pre-trained model (weights)
        self.bert_model = BertModel.from_pretrained('bert-base-uncased',
                                                    output_hidden_states=True)
Ejemplo n.º 16
0
    def __init__(self, vocab_size, config):
        super(LM, self).__init__()
        self.model_name = 'LM'
        self.lm_name = config.lm.lm_file
        self.out_dim = config.relation_type

        self.lm = BertModel.from_pretrained(self.lm_name)
        self.fc = nn.Linear(768, self.out_dim)
Ejemplo n.º 17
0
    def __init__(self, hidden_dim, n_layers, tagset_size):
        super(BertLSTM, self).__init__()
        config = BertConfig.from_pretrained('bert-base-multilingual-cased')
        self.model = BertModel(config)

        self.decoder = nn.LSTM(768, hidden_dim, n_layers)

        self.hiddentotag = nn.Linear(hidden_dim, tagset_size)
Ejemplo n.º 18
0
 def __init__(self, config):
     super(ABSABert, self).__init__(config)
     self.bert = BertModel(config)
     self.num_labels = config.num_labels
     self.classifier = torch.nn.Linear(config.hidden_size, self.num_labels)
     self.cls = DepBertPreTrainingHeads(config)
     self.domain_cls = DomainPredictionHead(config)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
Ejemplo n.º 19
0
def prepare_models():
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    model = BertModel.from_pretrained('bert-base-uncased',
                                      output_attentions=True)
    model.eval()
    mask_model = BertForMaskedLM.from_pretrained('bert-base-uncased')
    mask_model.eval()
    return tokenizer, model, mask_model
Ejemplo n.º 20
0
def main():
    torch.cuda.empty_cache()
    parser = setup_parser()
    args = parser.parse_args()
    if os.path.exists(args.output_dir) and os.listdir(
            args.output_dir
    ) and args.do_train and not args.overwrite_output_dir:
        raise ValueError("Output directory already exists and is not empty.")
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    args.n_gpu = torch.cuda.device_count()
    args.device = device
    set_seed(args)
    args.task_name = args.task_name.lower()
    if args.task_name not in processors:
        raise ValueError("Task not found: {}".format(args.task_name))
    processor = processors[args.task_name]()
    args.output_mode = output_modes[args.task_name]
    label_list = processor.get_labels()
    num_labels = len(label_list)

    ##Load Models
    config = BertConfig.from_pretrained(args.config_name)
    tokenizer = BertTokenizer.from_pretrained(args.text_encoder_checkpoint,
                                              do_lower_case=args.do_lower_case)
    text_encoder = BertModel.from_pretrained(args.text_encoder_checkpoint,
                                             config=config)
    graph_encoder = GraphEncoder(args.n_hidden, args.min_score)
    if args.graph_encoder_checkpoint:
        graph_encoder.gcnnet.load_state_dict(
            torch.load(args.graph_encoder_checkpoint))

    medsts_classifier = PairClassifier(config.hidden_size + args.n_hidden, 1)
    medsts_c_classifier = PairClassifier(config.hidden_size + args.n_hidden, 5)
    medsts_type_classifier = PairClassifier(config.hidden_size + args.n_hidden,
                                            4)
    model = MedstsNet(text_encoder, graph_encoder, medsts_classifier,
                      medsts_c_classifier, medsts_type_classifier)
    model.to(args.device)

    args.n_gpu = 1

    if args.do_train:
        train_dataset = load_and_cache_examples(args,
                                                args.task_name,
                                                tokenizer,
                                                evaluate=False)
        global_step, tr_loss = train(args, train_dataset, model, tokenizer)
        logger.info('global step = {}, average loss = {}'.format(
            global_step, tr_loss))
        if not os.path.exists(args.output_dir):
            os.makedirs(args.output_dir)
        logger.info("saving model checkpoint to {}".format(args.output_dir))
        model_to_save = model.module if hasattr(model, 'module') else model
        # model_to_save.save_pretrained(args.output_dir)
        torch.save(model_to_save.state_dict(),
                   os.path.join(args.output_dir, 'saved_model.pth'))
        tokenizer.save_pretrained(args.output_dir)
        torch.save(args, os.path.join(args.output_dir, 'training_args.bin'))
Ejemplo n.º 21
0
def main():
    torch.cuda.empty_cache()
    parser = setup_parser()
    args = parser.parse_args()
    if os.path.exists(args.output_dir) and os.listdir(
            args.output_dir
    ) and args.do_train and not args.overwrite_output_dir:
        raise ValueError("Output directory already exists and is not empty.")
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    args.n_gpu = torch.cuda.device_count()
    args.device = device
    set_seed(args)
    args.task_name = args.task_name.lower()
    if args.task_name not in processors:
        raise ValueError("Task not found: {}".format(args.task_name))
    processor = processors[args.task_name]()
    args.output_mode = output_modes[args.task_name]
    label_list = processor.get_labels()

    ##Load Models
    config = BertConfig.from_pretrained(args.config_name)
    tokenizer = BertTokenizer.from_pretrained(args.text_encoder_checkpoint,
                                              do_lower_case=args.do_lower_case)
    text_encoder = BertModel.from_pretrained(args.text_encoder_checkpoint,
                                             config=config)
    graph_encoder = GraphEncoder(args.n_hidden, args.min_score)

    medsts_classifier = PairClassifier(config.hidden_size + args.n_hidden, 1)
    medsts_c_classifier = PairClassifier(config.hidden_size + args.n_hidden, 5)
    medsts_c2_classifier = PairClassifier(config.hidden_size + args.n_hidden,
                                          2)
    medsts_type_classifier = PairClassifier(config.hidden_size + args.n_hidden,
                                            4)
    model = MedstsNet(text_encoder, graph_encoder, medsts_classifier,
                      medsts_c_classifier, medsts_c2_classifier,
                      medsts_type_classifier)
    if args.text_only:
        medsts_classifier = PairClassifier(config.hidden_size, 1)
        medsts_c_classifier = PairClassifier(config.hidden_size, 5)
        medsts_c2_classifier = PairClassifier(config.hidden_size, 2)
        medsts_type_classifier = PairClassifier(config.hidden_size, 4)
        model = MedstsNet_Textonly(text_encoder, medsts_classifier,
                                   medsts_c_classifier, medsts_c2_classifier,
                                   medsts_type_classifier)

    model.to(args.device)

    args.n_gpu = 1

    if args.do_train:
        train_dataset = load_and_cache_examples(args,
                                                args.task_name,
                                                tokenizer,
                                                evaluate=False,
                                                reverse=True)
        global_step, tr_loss = train(args, train_dataset, model, tokenizer)
        logger.info('global step = {}, average loss = {}'.format(
            global_step, tr_loss))
Ejemplo n.º 22
0
    def __init__(self,
                 *,
                 pretrained_model_name=None,
                 config_filename=None,
                 vocab_size=None,
                 hidden_size=768,
                 num_hidden_layers=12,
                 num_attention_heads=12,
                 intermediate_size=3072,
                 hidden_act="gelu",
                 max_position_embeddings=512,
                 random_init=False,
                 **kwargs):
        TrainableNM.__init__(self, **kwargs)

        # Check that only one of pretrained_model_name, config_filename, and
        # vocab_size was passed in
        total = 0

        if pretrained_model_name is not None:
            total += 1
        if config_filename is not None:
            total += 1
        if vocab_size is not None:
            total += 1

        if total != 1:
            raise ValueError(
                "Only one of pretrained_model_name, vocab_size, " +
                "or config_filename should be passed into the " +
                "BERT constructor.")

        if vocab_size is not None:
            config = BertConfig(
                vocab_size_or_config_json_file=vocab_size,
                hidden_size=hidden_size,
                num_hidden_layers=num_hidden_layers,
                num_attention_heads=num_attention_heads,
                intermediate_size=intermediate_size,
                hidden_act=hidden_act,
                max_position_embeddings=max_position_embeddings)
            model = BertModel(config)
        elif pretrained_model_name is not None:
            model = BertModel.from_pretrained(pretrained_model_name)
        elif config_filename is not None:
            config = BertConfig.from_json_file(config_filename)
            model = BertModel(config)
        else:
            raise ValueError(
                "Either pretrained_model_name or vocab_size must" +
                "be passed into the BERT constructor")

        model.to(self._device)

        self.add_module("bert", model)
        self.config = model.config

        if random_init:
            self.apply(
                lambda module: transformer_weights_init(module, xavier=False))
Ejemplo n.º 23
0
    def __init__(self, bert_model_path, num_classes):
        super(Bert, self).__init__()
        self.bert = BertModel.from_pretrained(bert_model_path)
        # 不对bert进行训练
        for param in self.bert.parameters():
            param.requires_grad = True

        self.fc = nn.Linear(self.bert.config.to_dict()['hidden_size'],
                            num_classes)
Ejemplo n.º 24
0
    def __init__(self):
        super(BERTClassifier, self).__init__()

        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.fc = nn.Sequential(
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(768, 2)
        )
Ejemplo n.º 25
0
    def __init__(self, code_length):  # code_length为fc映射到的维度大小
        super(TextNet, self).__init__()

        modelConfig = BertConfig.from_pretrained(
            './data/bert-base-uncased-config.json')
        self.textExtractor = BertModel.from_pretrained(
            './data/bert-base-uncased-pytorch_model.bin', config=modelConfig)
        # self.textExtractor.eval()
        embedding_dim = self.textExtractor.config.hidden_size
Ejemplo n.º 26
0
 def __init__(self, dataset, model_path):
     super(Model, self).__init__()
     self.config = config.IMDBConfig()
     model = BertModel.from_pretrained(self.config.BERT_MODEL)
     self.model = ModelTrainer(model, 2)
     self.model.load_state_dict(torch.load(model_path))
     self.model.eval()
     self.dataset = dataset
     self.tokenizer = BertTokenizer.from_pretrained(self.config.BERT_MODEL)
Ejemplo n.º 27
0
    def __init__(self, config):
        super(BertForSequenceClassification, self).__init__(config)
        self.num_labels = config.num_labels

        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, self.config.num_labels)

        self.init_weights()
Ejemplo n.º 28
0
 def __init__(self):
     super(BSME_model, self).__init__()
     self.tokenizer = BertTokenizer.from_pretrained(
         'bert-base-multilingual-cased', do_lower_case=False)
     self.model = BertModel.from_pretrained(
         'bert-base-multilingual-cased',
         do_lower_case=False,
         output_hidden_states=True).to('cuda')
     self.classifier = DropoutClassifier(768 * 2, 4).to('cuda')
Ejemplo n.º 29
0
    def __init__(self, config):
        super(NoHiddenLayerClassification, self).__init__(config)
        self.num_labels = config.num_labels

        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.fc1 = nn.Linear(in_features=768 * 5, out_features=2)

        self.init_weights()
Ejemplo n.º 30
0
    def __init__(self, config, num_choices=1, num_docs_rank=30):
        super(BertForMultipleChoice, self).__init__(config)

        self.num_choices = num_choices
        self.bert = BertModel(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, num_choices)

        self.init_weights()