예제 #1
0
 def __init__(self, args):
     super(HateBert, self).__init__()
     self.args = args
     configuration = BertConfig()
     configuration.output_attentions = True
     self.bert = BertForSequenceClassification.from_pretrained(
         args.bert_model, config=configuration)
     # freeze for fine tuning
     if args.fine_tune == True:
         #import IPython; IPython.embed(); exit(1)
         for param in self.bert.bert.parameters():
             param.requires_grad = False
예제 #2
0
    def __init__(
        self,
        pretrained_bert_model,
        language,
        name,
        prediction_type,
        output_hidden_states,
        output_attentions,
        attention_length_before=1,
        attention_length_after=1,
        config_path=None,
        max_length=512,
        number_of_sentence=1,
        number_of_sentence_before=0,
        number_of_sentence_after=0,
        seed=1111,
        hidden_dropout_prob=0.,
        attention_probs_dropout_prob=0.,
        stop_attention_at_sent_before=None,
        stop_attention_before_sent=0,
    ):
        super(BertExtractor, self).__init__()
        # Load pre-trained model tokenizer (vocabulary)
        # Crucially, do not do basic tokenization; PTB is tokenized. Just do wordpiece tokenization.
        if config_path is None:
            configuration = BertConfig()
            configuration.hidden_dropout_prob = hidden_dropout_prob
            configuration.attention_probs_dropout_prob = attention_probs_dropout_prob
            configuration.output_hidden_states = output_hidden_states
            configuration.output_attentions = output_attentions
            self.model = BertModel.from_pretrained(
                pretrained_bert_model,
                config=configuration)  #, config=configuration
        else:
            self.model = BertModel.from_pretrained(
                pretrained_bert_model)  #, config=configuration
        self.tokenizer = AutoTokenizer.from_pretrained(pretrained_bert_model)

        self.language = language
        self.attention_length_before = attention_length_before
        self.attention_length_after = attention_length_after
        self.pretrained_bert_model = pretrained_bert_model
        self.NUM_HIDDEN_LAYERS = self.model.config.num_hidden_layers
        self.FEATURE_COUNT = self.model.config.hidden_size
        self.NUM_ATTENTION_HEADS = self.model.config.num_attention_heads
        self.name = name
        self.config = {
            'max_length':
            max_length,
            'seed':
            seed,
            'number_of_sentence':
            number_of_sentence,
            'number_of_sentence_before':
            number_of_sentence_before,
            'number_of_sentence_after':
            number_of_sentence_after,
            'attention_length_before':
            attention_length_before,
            'attention_length_after':
            attention_length_after,
            'stop_attention_at_sent_before':
            stop_attention_at_sent_before,
            'stop_attention_before_sent':
            stop_attention_before_sent,
            'output_hidden_states':
            output_hidden_states,
            'output_attentions':
            output_attentions,
            'model_type':
            'bert',
            'hidden_size':
            self.model.config.hidden_size,
            'hidden_act':
            self.model.config.hidden_act,
            'initializer_range':
            self.model.config.initializer_range,
            'vocab_size':
            self.model.config.vocab_size,
            'hidden_dropout_prob':
            self.model.config.hidden_dropout_prob,
            'num_attention_heads':
            self.model.config.num_attention_heads,
            'type_vocab_size':
            self.model.config.type_vocab_size,
            'max_position_embeddings':
            self.model.config.max_position_embeddings,
            'num_hidden_layers':
            self.model.config.num_hidden_layers,
            'intermediate_size':
            self.model.config.intermediate_size,
            'attention_probs_dropout_prob':
            self.model.config.attention_probs_dropout_prob
        }
        if config_path is not None:
            with open(config_path, 'r') as f:
                self.config.update(json.load(f))

        self.prediction_type = prediction_type  # ['sentence', 'token-level']