def model_fn(features, labels, mode, params): input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] model = BertModel(config, True, input_ids, input_mask, segment_ids) final_hidden = model.get_sequence_output() return final_hidden
def __init__(self): bert_pretrained_dir = args.pretrain_models_path + args.bert_model_name self.do_lower_case = args.bert_model_name.startswith('uncased') self.vocab_file = os.path.join(bert_pretrained_dir, 'vocab.txt') self.config_file = os.path.join(bert_pretrained_dir, 'bert_config.json') self.tokenizer = FullTokenizer(vocab_file=self.vocab_file, do_lower_case=self.do_lower_case) self.input_id = tf.placeholder(tf.int64, [None, None], 'input_ids') self.input_mask = tf.placeholder(tf.int64, [None, None], 'input_mask') self.segment_ids = tf.placeholder(tf.int64, [None, None], 'segment_ids') bert_config = BertConfig.from_json_file(self.config_file) model = BertModel(config=bert_config, is_training=False, input_ids=self.input_id, input_mask=self.input_mask, token_type_ids=self.segment_ids, use_one_hot_embeddings=True, scope='bert') self.output_layer = model.get_sequence_output() self.embedding_layer = model.get_embedding_output() saver = tf.train.Saver() config = tf.ConfigProto() config.gpu_options.allow_growth = True self.session = tf.Session(config=config) saver.restore(self.session, bert_pretrained_dir + '/bert_model.ckpt')
def _bert_model(self, input_ids, input_tag_embeddings, input_masks, bert_config, bert_checkpoint_file, is_training=False): """Creates the Bert model. Args: input_ids: A [batch, max_seq_len] int tensor. input_masks: A [batch, max_seq_len] int tensor. """ bert_model = BertModel(bert_config, is_training, input_ids=input_ids, input_mask=input_masks, use_tag_embeddings=True, tag_embeddings=input_tag_embeddings) # Restore from checkpoint. assignment_map, _ = get_assignment_map_from_checkpoint( tf.global_variables(), bert_checkpoint_file) if 'global_step' in assignment_map: assignment_map.pop('global_step') tf.compat.v1.train.init_from_checkpoint(bert_checkpoint_file, assignment_map) return bert_model.get_pooled_output()
def _bert_model(self, input_ids, input_tag_features, input_masks): """Creates the Bert model. Args: input_ids: A [batch, max_seq_len] int tensor. input_masks: A [batch, max_seq_len] int tensor. """ is_training = self._is_training options = self._model_proto bert_config = BertConfig.from_json_file(options.bert_config_file) bert_model = BertModel(bert_config, is_training, input_ids=input_ids, input_mask=input_masks, use_tag_embeddings=True, tag_features=input_tag_features) # Restore from checkpoint. assignment_map, _ = get_assignment_map_from_checkpoint( tf.global_variables(), options.bert_checkpoint_file) if 'global_step' in assignment_map: assignment_map.pop('global_step') tf.compat.v1.train.init_from_checkpoint(options.bert_checkpoint_file, assignment_map) return bert_model.get_pooled_output()
def __init__(self, path, training=False, max_seq_length=512): self.max_seq_length = max_seq_length self.graph = tf.Graph() with self.graph.as_default(): self.input_ids = tf.compat.v1.placeholder( tf.int32, shape=(None, self.max_seq_length)) self.input_mask = tf.compat.v1.placeholder( tf.int32, shape=(None, self.max_seq_length)) self.segment_ids = tf.compat.v1.placeholder( tf.int32, shape=(None, self.max_seq_length)) self.bert_config = BertConfig.from_json_file(path + '/bert_config.json') self.bert_module = BertModel(config=self.bert_config, is_training=training, input_ids=self.input_ids, input_mask=self.input_mask, token_type_ids=self.segment_ids, use_one_hot_embeddings=False) assignment_map, initialized_variable_names = get_assignment_map_from_checkpoint( tf.trainable_variables(), path + '/bert_model.ckpt') tf.train.init_from_checkpoint(path + '/bert_model.ckpt', assignment_map) self.sess = tf.compat.v1.Session() self.sess.run( tf.group(tf.compat.v1.global_variables_initializer(), tf.compat.v1.tables_initializer())) self.bert_outputs = { 'sequence_output': self.bert_module.get_sequence_output(), 'pooled_output': self.bert_module.get_pooled_output(), } self.tok = tokenization.FullTokenizer(vocab_file=path + '/vocab.txt', do_lower_case=True)
def get_bert_embeddings(self, flattened_input_ids, flattened_input_mask, is_training: bool): """ applying BERT to each sliding window, and get token embeddings corresponding to the right tokens :param flattened_input_ids: [-1] :param flattened_input_mask: [-1] :param is_training: :return: (num_tokens, embed_size) """ input_ids = tf.reshape(flattened_input_ids, [-1, self.config.sliding_window_size]) input_mask = tf.reshape(flattened_input_mask, [-1, self.config.sliding_window_size]) actual_mask = tf.cast(tf.not_equal(input_mask, self.config.pad_idx), tf.int32) with tf.variable_scope('bert', reuse=tf.AUTO_REUSE): bert_model = BertModel(self.bert_config, is_training, input_ids, actual_mask, scope='bert') bert_embeddings = bert_model.get_sequence_output( ) # (num_windows, window_size, embed_size) flattened_embeddings = tf.reshape(bert_embeddings, [-1, self.bert_config.hidden_size]) flattened_mask = tf.greater_equal(flattened_input_mask, 0) output_embeddings = tf.boolean_mask(flattened_embeddings, flattened_mask) print('xixi', bert_embeddings.get_shape(), output_embeddings.get_shape(), flattened_embeddings.get_shape(), flattened_mask.get_shape()) return output_embeddings
def get_bert(BERT_PT_PATH, bert_type, do_lower_case, no_pretraining): bert_config_file = os.path.join(BERT_PT_PATH, f'bert_config_{bert_type}.json') vocab_file = os.path.join(BERT_PT_PATH, f'vocab_{bert_type}.txt') init_checkpoint = os.path.join(BERT_PT_PATH, f'pytorch_model_{bert_type}.bin') bert_config = BertConfig.from_json_file(bert_config_file) tokenizer = tokenization.FullTokenizer( vocab_file=vocab_file, do_lower_case=do_lower_case) bert_config.print_status() model_bert = BertModel(bert_config) # if no_pretraining: # pass # else: # model_bert.load_state_dict(torch.load(init_checkpoint, map_location='cpu')) # print("Load pre-trained parameters.") # model_bert=torch.nn.DataParallel(model_bert, device_ids=[0, 4, 5]) model_bert.to(device) # model_bert.cuda(2) return model_bert, tokenizer, bert_config
def qa_loop_body(i, starts, ends, labels, scores): input_ids = tf.reshape(flattened_input_ids, [-1, self.config.sliding_window_size ]) # (num_windows, window_size) input_mask = tf.reshape(flattened_input_mask, [-1, self.config.sliding_window_size]) actual_mask = tf.cast(tf.not_equal(input_mask, self.config.pad_idx), tf.int32) # (num_windows, window_size) num_windows = tf.shape(actual_mask)[0] question_tokens = self.get_question_token_ids( sentence_map, flattened_input_ids, flattened_input_mask, top_span_starts[i], top_span_ends[i]) # (num_question_tokens) tiled_question = tf.tile( tf.expand_dims(question_tokens, 0), [num_windows, 1]) # (num_windows, num_ques_tokens) question_ones = tf.ones_like(tiled_question, dtype=tf.int32) question_zeros = tf.zeros_like(tiled_question, dtype=tf.int32) qa_input_ids = tf.concat( [tiled_question, input_ids], 1) # (num_windows, num_ques_tokens + window_size) qa_input_mask = tf.concat( [question_ones, actual_mask], 1) # (num_windows, num_ques_tokens + window_size) token_type_ids = tf.concat([question_zeros, actual_mask], 1) with tf.variable_scope('bert', reuse=tf.AUTO_REUSE): bert_model = BertModel(self.bert_config, is_training, qa_input_ids, qa_input_mask, token_type_ids, scope='bert') bert_embeddings = bert_model.get_sequence_output( ) # num_windows, num_ques_tokens + window_size, embed_size flattened_embeddings = tf.reshape( bert_embeddings, [-1, self.bert_config.hidden_size]) output_mask = tf.concat( [-1 * question_ones, input_mask], 1) # (num_windows, num_ques_tokens + window_size) flattened_mask = tf.reshape(tf.greater_equal(output_mask, 0), [-1]) qa_embeddings = tf.boolean_mask( flattened_embeddings, flattened_mask) # (num_tokens, embed_size) qa_scores, qa_indices, qa_starts, qa_ends, qa_embs = self.filter_by_mention_scores( qa_embeddings, candidate_starts, candidate_ends, dropout, c) qa_cluster_ids = self.get_top_span_cluster_ids( candidate_starts, candidate_ends, span_starts, span_ends, cluster_ids, qa_indices) return (i + 1, tf.concat( [starts, tf.expand_dims(qa_starts, axis=0)], axis=0), tf.concat([ends, tf.expand_dims(qa_ends, axis=0)], axis=0), tf.concat([labels, tf.expand_dims(qa_cluster_ids, axis=0)], axis=0), tf.concat( [scores, tf.expand_dims(qa_scores, axis=0)], axis=0))
def predict(self, inputs, **kwargs): """Predicts the resulting tensors. Args: inputs: A dictionary of input tensors keyed by names. Returns: predictions: A dictionary of prediction tensors keyed by name. """ is_training = self._is_training options = self._model_proto (answer_choices, answer_choices_len, answer_label) = (inputs[InputFields.answer_choices_with_question], inputs[InputFields.answer_choices_with_question_len], inputs[InputFields.answer_label]) # Create model layers. token_to_id_layer = token_to_id.TokenToIdLayer( options.bert_vocab_file, options.bert_unk_token_id) # Convert tokens into token ids. batch_size = answer_choices.shape[0] answer_choices_token_ids = token_to_id_layer(answer_choices) answer_choices_token_ids_reshaped = tf.reshape( answer_choices_token_ids, [batch_size * NUM_CHOICES, -1]) answer_choices_mask = tf.sequence_mask( answer_choices_len, maxlen=tf.shape(answer_choices)[-1]) answer_choices_mask_reshaped = tf.reshape( answer_choices_mask, [batch_size * NUM_CHOICES, -1]) # Bert prediction. bert_config = BertConfig.from_json_file(options.bert_config_file) bert_model = BertModel(bert_config, is_training, input_ids=answer_choices_token_ids_reshaped, input_mask=answer_choices_mask_reshaped) answer_choices_cls_feature_reshaped = bert_model.get_pooled_output() answer_choices_cls_feature = tf.reshape( answer_choices_cls_feature_reshaped, [batch_size, NUM_CHOICES, -1]) assignment_map, _ = get_assignment_map_from_checkpoint( tf.global_variables(), options.bert_checkpoint_file) tf.compat.v1.train.init_from_checkpoint(options.bert_checkpoint_file, assignment_map) # Classification layer. output = tf.compat.v1.layers.dense(answer_choices_cls_feature, units=1, activation=None) output = tf.squeeze(output, axis=-1) return {FIELD_ANSWER_PREDICTION: output}
def convert(args): # Initialise PyTorch model config = BertConfig.from_json_file(args.bert_config_file) model = BertModel(config) # Load weights from TF model path = args.tf_checkpoint_path print("Converting TensorFlow checkpoint from {}".format(path)) init_vars = tf.train.list_variables(path) names = [] arrays = [] for name, shape in init_vars: print("Loading {} with shape {}".format(name, shape)) array = tf.train.load_variable(path, name) print("Numpy array shape {}".format(array.shape)) names.append(name) arrays.append(array) for name, array in zip(names, arrays): name = name[5:] # skip "bert/" print("Loading {}".format(name)) name = name.split('/') if name[0] in ['redictions', 'eq_relationship']: print("Skipping") continue pointer = model for m_name in name: if re.fullmatch(r'[A-Za-z]+_\d+', m_name): l = re.split(r'_(\d+)', m_name) else: l = [m_name] if l[0] == 'kernel': pointer = getattr(pointer, 'weight') else: if l[0] != 'l_step': pointer = getattr(pointer, l[0], name) else: print(l[0]) if len(l) >= 2: num = int(l[1]) pointer = pointer[num] if m_name[-11:] == '_embeddings': pointer = getattr(pointer, 'weight') elif m_name == 'kernel': array = np.transpose(array) try: assert pointer.shape == array.shape except AssertionError as e: e.args += (pointer.shape, array.shape) raise except AttributeError: continue pointer.data = torch.from_numpy(array) # Save pytorch-model torch.save(model.state_dict(), args.pytorch_dump_path)
def build(self, data_iter, bert_config_file): # get the inputs with tf.variable_scope('inputs'): input_map = data_iter.get_next() usrid, prdid, input_x, input_y, doc_len = \ (input_map['usr'], input_map['prd'], input_map['content'], input_map['rating'], input_map['doc_len']) input_x = tf.reshape(input_x, [-1, self.max_sen_len]) sen_len = tf.count_nonzero(input_x, axis=-1) doc_len = doc_len // self.max_sen_len input_x = tf.cast(input_x, tf.int32) self.usr = lookup(self.embeddings['usr_emb'], usrid, name='cur_usr_embedding') self.prd = lookup(self.embeddings['prd_emb'], prdid, name='cur_prd_embedding') input_x = tf.reshape(input_x, [-1, self.max_sen_len]) input_mask = tf.sequence_mask(sen_len, self.max_sen_len) input_mask = tf.cast(input_mask, tf.int32) bert_config = BertConfig.from_json_file(bert_config_file) bert = BertModel(bert_config, is_training=False, input_ids=input_x, input_mask=input_mask, token_type_ids=None, use_one_hot_embeddings=False) # input_x = bert.get_sequence_output() input_x = bert.get_embedding_output() # build the process of model d_hat = self.nsc(input_x, self.max_sen_len, self.max_doc_len // self.max_sen_len, sen_len, doc_len) prediction = tf.argmax(d_hat, 1, name='prediction') with tf.variable_scope("loss"): sce = tf.nn.softmax_cross_entropy_with_logits_v2 self.loss = sce(logits=d_hat, labels=tf.one_hot(input_y, self.cls_cnt)) regularizer = tf.zeros(1) params = tf.trainable_variables() for param in params: if param not in self.embeddings.values(): regularizer += tf.nn.l2_loss(param) self.loss = tf.reduce_sum(self.loss) + self.l2_rate * regularizer prediction = tf.argmax(d_hat, 1, name='prediction') with tf.variable_scope("metrics"): correct_prediction = tf.equal(prediction, input_y) mse = tf.reduce_sum(tf.square(prediction - input_y), name="mse") correct_num = tf.reduce_sum(tf.cast(correct_prediction, dtype=tf.int32), name="correct_num") accuracy = tf.reduce_sum(tf.cast(correct_prediction, "float"), name="accuracy") return self.loss, mse, correct_num, accuracy
def __init__(self, config, is_training, input_ids, input_mask=None, token_type_ids=None): self.model = BertModel(config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=token_type_ids) self.embeddings_table = self.model.get_embedding_table()
def __init__(self, config, output_hidden_size): super(BertForInteractSpanExtractAndClassification, self).__init__() # Shared Part self.bert = BertModel(config) # Private Part self.te_bilstm = nn.LSTM(input_size=config.hidden_size, hidden_size=config.hidden_size, batch_first=True, bidirectional=True) self.tc_bilstm = nn.LSTM(input_size=config.hidden_size, hidden_size=config.hidden_size, batch_first=True, bidirectional=True) self.te_dense = nn.Linear(config.hidden_size * 2, config.hidden_size) self.tc_dense = nn.Linear(config.hidden_size * 2, config.hidden_size) self.attention = nn.Linear(config.hidden_size * 2, 1) self.tc_output_layer = nn.Linear(config.hidden_size * 2, output_hidden_size) self.extraction = nn.Linear(config.hidden_size * 2, 2) self.classifier = nn.Linear(output_hidden_size, 5) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.activation = nn.Tanh() self.mse = nn.MSELoss(reduction="mean") def init_weights(module): if isinstance(module, (nn.Linear, nn.Embedding)): module.weight.data.normal_(mean=0.0, std=config.initializer_range) elif isinstance(module, BERTLayerNorm): module.beta.data.normal_(mean=0.0, std=config.initializer_range) module.gamma.data.normal_(mean=0.0, std=config.initializer_range) elif isinstance(module, nn.LSTM): for name, param in module.named_parameters(): if 'weight_ih' in name: nn.init.xavier_normal_(param) elif 'weight_hh' in name: nn.init.orthogonal_(param) elif 'bias' in name: nn.init.constant_(param, 0.0) param.chunk(4)[1].fill_(1) if isinstance(module, nn.Linear): module.bias.data.zero_() self.apply(init_weights)
def _buildModel(self, input_ids, token_type_ids, input_mask): bert_model = BertModel(self.config, self.config.training, input_ids, input_mask, token_type_ids, self.config.use_one_hot_embeddings) bert_output = bert_model.get_pooled_output() output = tf.layers.dense( bert_output, self.config.output_dim, kernel_initializer=tf.truncated_normal_initializer( stddev=self.config.initializer_range), kernel_regularizer=tf.contrib.layers.l2_regularizer(1.0), bias_regularizer=tf.contrib.layers.l2_regularizer(1.0), name='output') return output
def get_model(self): logging.info("get bert model") graph = tf.Graph() with graph.as_default(): ph_input_ids = tf.placeholder(dtype=tf.int32, shape=[None, self._seq_length + 2], name="ph_input_ids") con = BertConfig.from_json_file(config.PROJECT_ROOT + "/bert_config.json") bert_model = BertModel(config=con, is_training=False, input_ids=ph_input_ids, use_one_hot_embeddings=True) output = bert_model.get_sequence_output() init = tf.global_variables_initializer() sess = tf.Session(graph=graph) sess.run(init) return sess, ph_input_ids, output
def make_bert_graph(bert_config, max_seq_length, dropout_keep_prob_rate, num_labels, tune=False): input_ids = tf.placeholder(tf.int32, [None, max_seq_length], name='inputs_ids') input_mask = tf.placeholder(tf.int32, [None, max_seq_length], name='input_mask') segment_ids = tf.placeholder(tf.int32, [None, max_seq_length], name='segment_ids') model = BertModel(config=bert_config, is_training=tune, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids) if tune: bert_embeddings_dropout = tf.nn.dropout(model.pooled_output, keep_prob=(1 - dropout_keep_prob_rate)) label_ids = tf.placeholder(tf.int32, [None], name='label_ids') else: bert_embeddings_dropout = model.pooled_output label_ids = None logits = tf.contrib.layers.fully_connected(inputs=bert_embeddings_dropout, num_outputs=num_labels, activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=0.02), biases_initializer=tf.zeros_initializer()) if tune: # loss layer CE = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=label_ids, logits=logits) loss = tf.reduce_mean(CE) return input_ids, input_mask, segment_ids, label_ids, logits, loss else: # prob layer probs = tf.nn.softmax(logits, axis=-1, name='probs') return model, input_ids, input_mask, segment_ids, probs
def __init__(self, config, params): super(NestedNERModel, self).__init__(config) self.params = params self.ner_label_limit = params["ner_label_limit"] self.thresholds = params["ner_threshold"] self.num_entities = params["mappings"]["nn_mapping"]["num_entities"] self.num_triggers = params["mappings"]["nn_mapping"]["num_triggers"] self.max_span_width = params["max_span_width"] self.bert = BertModel(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.entity_classifier = nn.Linear(config.hidden_size * 3, self.num_entities) self.trigger_classifier = nn.Linear(config.hidden_size * 3, self.num_triggers) self.register_buffer( "label_ids", torch.tensor( params["mappings"]["nn_mapping"]["mlb"].classes_, dtype=torch.uint8 ), ) self.apply(self.init_bert_weights) self.params = params
def __init__(self, model_folder, max_length=256, lowercase=True): # 1. Create tokenizer self.max_length = max_length vocab_file = os.path.join(model_folder, 'vocab.txt') self.tokenizer = FullTokenizer(vocab_file, do_lower_case=lowercase) # 2. Read Config config_file = os.path.join(model_folder, 'bert_config.json') self.config = BertConfig.from_json_file(config_file) # 3. Create Model self.session = tf.Session() self.token_ids_op = tf.placeholder(tf.int32, shape=(None, max_length), name='token_ids') self.model = BertModel(config=self.config, is_training=False, input_ids=self.token_ids_op, use_one_hot_embeddings=False) # 4. Restore Trained Model self.saver = tf.train.Saver() ckpt_file = os.path.join(model_folder, 'bert_model.ckpt') # RCS ckpt_file = os.path.join(model_folder, 'model.ckpt-1000000') self.saver.restore(self.session, ckpt_file) hidden_layers = self.config.num_hidden_layers self.embeddings_op = tf.get_default_graph().get_tensor_by_name( "bert/encoder/Reshape_{}:0".format(hidden_layers + 1))
def __init__(self, config, use_crf=False): super(BertForJointBIOExtractAndClassification, self).__init__() self.bert = BertModel(config) self.use_crf = use_crf # TODO check with Google if it's normal there is no dropout on the token classifier of SQuAD in the TF version # self.dropout = nn.Dropout(config.hidden_dropout_prob) self.bio_affine = nn.Linear(config.hidden_size, 3) self.cls_affine = nn.Linear(config.hidden_size, 5) if self.use_crf: self.cls_crf = ConditionalRandomField(5) def init_weights(module): if isinstance(module, (nn.Linear, nn.Embedding)): # Slightly different from the TF version which uses truncated_normal for initialization # cf https://github.com/pytorch/pytorch/pull/5617 module.weight.data.normal_(mean=0.0, std=config.initializer_range) elif isinstance(module, BERTLayerNorm): module.beta.data.normal_(mean=0.0, std=config.initializer_range) module.gamma.data.normal_(mean=0.0, std=config.initializer_range) if isinstance(module, nn.Linear): module.bias.data.zero_() self.apply(init_weights)
def get_bert(BERT_PT_PATH, bert_type, do_lower_case, my_pretrain_bert): # bert_config_file = os.path.join(BERT_PT_PATH, f'bert_config_{bert_type}.json') # vocab_file = os.path.join(BERT_PT_PATH, f'vocab_{bert_type}.txt') # init_checkpoint = os.path.join(BERT_PT_PATH, f'pytorch_model_{bert_type}.bin') # bert_config = BertConfig.from_json_file(bert_config_file) # tokenizer = tokenization.FullTokenizer( # vocab_file=vocab_file, do_lower_case=do_lower_case) # bert_config.print_status() # model_bert = BertModel(bert_config) tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=args.do_lower_case) model_bert, bert_config = BertModel.from_pretrained('bert-base-uncased') if my_pretrain_bert: model_bert.load_state_dict( torch.load(init_checkpoint, map_location='cpu')) print("Load pre-trained parameters.") else: pass model_bert.to(device) return model_bert, tokenizer, bert_config
def __init__(self, config): super(BertForCollapsedSpanAspectExtractionAndClassification, self).__init__() self.bert = BertModel(config) # TODO check with Google if it's normal there is no dropout on the token classifier of SQuAD in the TF version # self.dropout = nn.Dropout(config.hidden_dropout_prob) self.neu_outputs = nn.Linear(config.hidden_size, 2) self.pos_outputs = nn.Linear(config.hidden_size, 2) self.neg_outputs = nn.Linear(config.hidden_size, 2) def init_weights(module): if isinstance(module, (nn.Linear, nn.Embedding)): # Slightly different from the TF version which uses truncated_normal for initialization # cf https://github.com/pytorch/pytorch/pull/5617 module.weight.data.normal_(mean=0.0, std=config.initializer_range) elif isinstance(module, BERTLayerNorm): module.beta.data.normal_(mean=0.0, std=config.initializer_range) module.gamma.data.normal_(mean=0.0, std=config.initializer_range) if isinstance(module, nn.Linear): module.bias.data.zero_() self.apply(init_weights)
def __init__(self, config, num_labels: int, num_pos: int, use_pos: bool, arc_representation_dim: int, arc_feedforward: FeedForward = None, use_mst_decoding_for_validation: bool = True, dropout: float = 0.) -> None: super(DistanceDependencyParser, self).__init__(config) self.bert = BertModel(config) self.apply(self.init_bert_weights) encoder_dim = config.hidden_size self.arc_feedforward = arc_feedforward or \ FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("linear")()) self.arc_attention = DistanceAttention() self._dropout = InputVariationalDropout(dropout) self.use_mst_decoding_for_validation = use_mst_decoding_for_validation self._attachment_scores = UndirectedAttachmentScores()
def __init__(self, config, use_bert_ffn): super(BertForSequenceClassificationWithSelfAtt, self).__init__() self.bert = BertModel(config) if use_bert_ffn: self.rank_ffn = BertFeedForward(config, config.hidden_size, config.hidden_size, 2) else: self.rank_ffn = NormalFeedForward(config, config.hidden_size, config.hidden_size, 2) self.rank_affine = nn.Linear(config.hidden_size, 1) def init_weights(module): if isinstance(module, (nn.Linear, nn.Embedding)): # Slightly different from the TF version which uses truncated_normal for initialization # cf https://github.com/pytorch/pytorch/pull/5617 module.weight.data.normal_(mean=0.0, std=config.initializer_range) elif isinstance(module, BERTLayerNorm): module.beta.data.normal_(mean=0.0, std=config.initializer_range) module.gamma.data.normal_(mean=0.0, std=config.initializer_range) if isinstance(module, nn.Linear): module.bias.data.zero_() self.apply(init_weights)
def __init__(self): super().__init__() self.bert = BertModel.from_pretrained('bert_base/') if args.bert_freeze: for param in self.bert.parameters(): param.requires_grad = False self.lstm = BiLSTM( input_size=args.bert_hidden_size + args.cnn_output_size, hidden_size=args.rnn_hidden_size + args.cnn_output_size, num_layers=args.rnn_num_layers, num_dirs=args.rnn_num_dirs) self.lstm_dropout = nn.Dropout(p=args.rnn_dropout) self.cnn = CharCNN(embedding_num=len(CHAR_VOCAB), embedding_dim=args.cnn_embedding_dim, filters=eval(args.cnn_filters), output_size=args.cnn_output_size) self.crf = CRF(target_size=len(VOCAB) + 2, use_cuda=args.crf_use_cuda) self.linear = nn.Linear(in_features=args.rnn_hidden_size + args.cnn_output_size, out_features=len(VOCAB) + 2) self.attn = MultiHeadAttention(model_dim=args.rnn_hidden_size + args.cnn_output_size, num_heads=args.attn_num_heads, dropout=args.attn_dropout) self.feat_dropout = nn.Dropout(p=args.feat_dropout)
def _predict_logits(self, answer_choices, answer_choices_len, token_to_id_fn, bert_config, slim_fc_scope, keep_prob=1.0, is_training=False): """Predicts answer for a particular task. Args: answer_choices: A [batch, NUM_CHOICES, max_answer_len] string tensor. answer_choices_len: A [batch, NUM_CHOICES] int tensor. token_to_id_fn: A callable to convert the token tensor to an int tensor. slim_fc_scope: Slim FC scope. keep_prob: Keep probability of dropout layers. bert_config: A BertConfig instance to initialize BERT model. Returns: logits: A [batch, NUM_CHOICES] float tensor. """ batch_size = answer_choices.shape[0] # Convert tokens into token ids. answer_choices_token_ids = token_to_id_fn(answer_choices) answer_choices_token_ids = tf.reshape(answer_choices_token_ids, [batch_size * NUM_CHOICES, -1]) answer_choices_mask = tf.sequence_mask( answer_choices_len, maxlen=tf.shape(answer_choices)[-1]) answer_choices_mask = tf.reshape(answer_choices_mask, [batch_size * NUM_CHOICES, -1]) # Bert prediction. bert_model = BertModel(bert_config, is_training, input_ids=answer_choices_token_ids, input_mask=answer_choices_mask) output = bert_model.get_pooled_output() # Classification layer. with slim.arg_scope(slim_fc_scope): output = slim.fully_connected(output, num_outputs=1, activation_fn=None, scope='logits') return tf.reshape(output, [batch_size, NUM_CHOICES])
def bertModel(*args, **kwargs): """ BertModel is the basic BERT Transformer model with a layer of summed token, position and sequence embeddings followed by a series of identical self-attention blocks (12 for BERT-base, 24 for BERT-large). """ model = BertModel.from_pretrained(*args, **kwargs) return model
def bert(bert_config_file, mode, dim, input_ids, input_mask, input_type, activation, init_checkpoint=None): bert_config = BertConfig.from_json_file(bert_config_file) bert_model = BertModel(config=bert_config, is_training=mode == tf.estimator.ModeKeys.TRAIN, input_ids=input_ids, input_mask=input_mask, token_type_ids=input_type, scope="bert_query") output = bert_model.get_pooled_output() if mode == tf.estimator.ModeKeys.TRAIN: output = tf.nn.dropout(output, keep_prob=0.9) sig = tf.layers.dense(output, dim, activation=activation, kernel_initializer=tf.truncated_normal_initializer( stddev=bert_config.initializer_range), name="bert_query/query") tvars = tf.trainable_variables('bert_query') initialized_variable_names = {} if init_checkpoint: (assignment_map, initialized_variable_names) = get_assignment_map_from_checkpoint( tvars, init_checkpoint) tf.train.init_from_checkpoint(init_checkpoint, assignment_map) """ for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) """ return sig
class BertEncoder(object): def __init__(self, config, is_training, input_ids, input_mask=None, token_type_ids=None): self.model = BertModel(config=config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=token_type_ids) self.embeddings_table = self.model.get_embedding_table() def encode(self): #encoded is => sequence_output` shape = [batch_size, seq_length, hidden_size]. output = self.model.get_sequence_output() states = () for layer in self.model.get_all_encoder_layers(): states += (tf.reduce_mean(layer, axis=1), ) return output, states,
def __init__(self, config, num_labels, word_pool_type='mean'): super(BertForSequenceLabeling, self).__init__(config) if word_pool_type.lower() not in {'first', 'mean', 'sum'}: raise ValueError('No {} pooling methods!'.format(word_pool_type)) if word_pool_type.lower() == 'sum': self.layer_norm = BertLayerNorm(config) self.word_pool_type = word_pool_type self.bert = BertModel(config) self.dropout = torch.nn.Dropout(config.hidden_dropout_prob) self.classifier = torch.nn.Linear(config.hidden_size, num_labels) self.crf = ConditionalRandomField(num_labels) self.apply(self.init_bert_weights)
def body(self, features, mode): """Body of the model, aka Bert Arguments: features {dict} -- feature dict, keys: input_ids, input_mask, segment_ids mode {mode} -- mode Returns: dict -- features extracted from bert. keys: 'seq', 'pooled', 'all', 'embed' seq: tensor, [batch_size, seq_length, hidden_size] pooled: tensor, [batch_size, hidden_size] all: list of tensor, num_hidden_layers * [batch_size, seq_length, hidden_size] embed: tensor, [batch_size, seq_length, hidden_size] """ config = self.config input_ids = features["input_ids"] input_mask = features["input_mask"] segment_ids = features["segment_ids"] is_training = (mode == tf.estimator.ModeKeys.TRAIN) model = BertModel(config=config.bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=config.use_one_hot_embeddings) feature_dict = {} for logit_type in ['seq', 'pooled', 'all', 'embed', 'embed_table']: if logit_type == 'seq': # tensor, [batch_size, seq_length, hidden_size] feature_dict[logit_type] = model.get_sequence_output() elif logit_type == 'pooled': # tensor, [batch_size, hidden_size] feature_dict[logit_type] = model.get_pooled_output() elif logit_type == 'all': # list, num_hidden_layers * [batch_size, seq_length, hidden_size] feature_dict[logit_type] = model.get_all_encoder_layers() elif logit_type == 'embed': # for res connection feature_dict[logit_type] = model.get_embedding_output() elif logit_type == 'embed_table': feature_dict[logit_type] = model.get_embedding_table() return feature_dict