def __init__(self, config): self.config = config self.max_segment_len = config['max_segment_len'] self.max_span_width = config["max_span_width"] self.genres = { g:i for i,g in enumerate(config["genres"]) } self.subtoken_maps = {} self.gold = {} self.eval_data = None # Load eval data lazily. self.bert_config = modeling.BertConfig.from_json_file(config["bert_config_file"]) self.tokenizer = tokenization.FullTokenizer( vocab_file=config['vocab_file'], do_lower_case=False) ### loading the frequenct spans self.freq_spans = json.load(open("./data/freq_spans.json")) input_props = [] input_props.append((tf.int32, [None, None])) # input_ids. input_props.append((tf.int32, [None, None])) # input_mask input_props.append((tf.int32, [None])) # Text lengths. input_props.append((tf.int32, [None, None])) # Speaker IDs. input_props.append((tf.int32, [])) # Genre. input_props.append((tf.bool, [])) # Is training. input_props.append((tf.int32, [None])) # Gold starts. input_props.append((tf.int32, [None])) # Gold ends. input_props.append((tf.int32, [None])) # Cluster ids. input_props.append((tf.int32, [None])) # Sentence Map self.queue_input_tensors = [tf.placeholder(dtype, shape) for dtype, shape in input_props] dtypes, shapes = zip(*input_props) queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes) self.enqueue_op = queue.enqueue(self.queue_input_tensors) self.input_tensors = queue.dequeue() self.predictions, self.loss = self.get_predictions_and_loss(*self.input_tensors) # bert stuff tvars = tf.trainable_variables() # If you're using TF weights only, tf_checkpoint and init_checkpoint can be the same # Get the assignment map from the tensorflow checkpoint. Depending on the extension, use TF/Pytorch to load weights. assignment_map, initialized_variable_names = modeling.get_assignment_map_from_checkpoint(tvars, config['tf_checkpoint']) init_from_checkpoint = tf.train.init_from_checkpoint if config['init_checkpoint'].endswith('ckpt') else load_from_pytorch_checkpoint init_from_checkpoint(config['init_checkpoint'], assignment_map) print("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" # tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, # init_string) print(" name = %s, shape = %s%s" % (var.name, var.shape, init_string)) num_train_steps = int( self.config['num_docs'] * self.config['num_epochs']) num_warmup_steps = int(num_train_steps * 0.1) self.global_step = tf.train.get_or_create_global_step() self.train_op = optimization.create_custom_optimizer(tvars, self.loss, self.config['bert_learning_rate'], self.config['task_learning_rate'], num_train_steps, num_warmup_steps, False, self.global_step, freeze=-1, task_opt=self.config['task_optimizer'], eps=config['adam_eps'])
def __init__(self, config): self.config = config self.max_segment_len = config['max_segment_len'] self.max_span_width = config["max_span_width"] self.genres = {g: i for i, g in enumerate(config["genres"])} self.subtoken_maps = {} self.gold = {} self.eval_data = None # Load eval data lazily. self.dropout = None self.bert_config = modeling.BertConfig.from_json_file( config["bert_config_file"]) self.bert_config.hidden_dropout_prob = self.config["dropout_rate"] self.tokenizer = tokenization.FullTokenizer( vocab_file=config['vocab_file'], do_lower_case=False) self.bce_loss = tf.keras.losses.BinaryCrossentropy( reduction=tf.keras.losses.ReductionV2.NONE) input_props = [] input_props.append( (tf.int32, [None, None])) # input_ids. (batch_size, seq_len) input_props.append( (tf.int32, [None, None])) # input_mask (batch_size, seq_len) input_props.append((tf.int32, [None])) # Text lengths. input_props.append( (tf.int32, [None, None])) # Speaker IDs. (batch_size, seq_len) input_props.append( (tf.int32, [])) # Genre. 能确保整个batch都是同主题,能因为一篇文章的多段放在一个batch里 input_props.append((tf.bool, [])) # Is training. input_props.append( (tf.int32, [None])) # Gold starts. 一个instance只有一个start?是整篇文章的所有mention的start input_props.append((tf.int32, [None])) # Gold ends. 整篇文章的所有mention的end input_props.append( (tf.int32, [None])) # Cluster ids. 整篇文章的所有mention的id input_props.append( (tf.int32, [None])) # Sentence Map 整篇文章的每个token属于哪个句子 self.queue_input_tensors = [ tf.placeholder(dtype, shape) for dtype, shape in input_props ] dtypes, shapes = zip(*input_props) queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes) # 10是batch_size? self.enqueue_op = queue.enqueue(self.queue_input_tensors) self.input_tensors = queue.dequeue() # self.queue_input_tensors 不一样? self.bce_loss = tf.keras.losses.BinaryCrossentropy() if self.config["run"] == "session": self.loss, self.pred_start_scores, self.pred_end_scores = self.get_mention_proposal_and_loss( *self.input_tensors) tvars = tf.trainable_variables() # If you're using TF weights only, tf_checkpoint and init_checkpoint can be the same # Get the assignment map from the tensorflow checkpoint. # Depending on the extension, use TF/Pytorch to load weights. assignment_map, initialized_variable_names = modeling.get_assignment_map_from_checkpoint( tvars, config['tf_checkpoint']) init_from_checkpoint = tf.train.init_from_checkpoint init_from_checkpoint(config['init_checkpoint'], assignment_map) print("**** Trainable Variables ****") for var in tvars: init_string = "" if var.name in initialized_variable_names: init_string = ", *INIT_FROM_CKPT*" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) print(" name = %s, shape = %s%s" % (var.name, var.shape, init_string)) num_train_steps = int(self.config['num_docs'] * self.config['num_epochs']) # 文章数 * 训练轮数 num_warmup_steps = int(num_train_steps * 0.1) # 前1/10做warm_up self.global_step = tf.train.get_or_create_global_step( ) # 根据不同的model得到不同的optimizer self.train_op = optimization.create_custom_optimizer( tvars, self.loss, self.config['bert_learning_rate'], self.config['task_learning_rate'], num_train_steps, num_warmup_steps, False, self.global_step, freeze=-1, task_opt=self.config['task_optimizer'], eps=config['adam_eps']) # else: # pass # self.loss, self.pred_start_scores, self.pred_end_scores, self.pred_mention_scores = self.get_mention_proposal_and_loss(*self.input_tensors) self.coref_evaluator = metrics.CorefEvaluator()
def model_fn(features, labels, mode, params): # pylint: disable=unused-argument """The `model_fn` for TPUEstimator.""" tf.logging.info("*** Features ***") for name in sorted(features.keys()): tf.logging.info(" name = %s, shape = %s" % (name, features[name].shape)) is_training = (mode == tf.estimator.ModeKeys.TRAIN) predictions, total_loss = coref_model.forward(features, is_training) doc_idx, subtoken_map, top_span_starts, top_span_ends, antecedent_starts, antecedent_ends, antecedent_scores = predictions tvars = tf.trainable_variables() initialized_variables = {} scaffold_fn = None if init_checkpoint: assignment_map, initialized_variables = modeling.get_assignment_map_from_checkpoint( tvars, init_checkpoint) if config.use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(init_checkpoint, assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(init_checkpoint, assignment_map) tf.logging.info("**** Trainable Variables ****") for var in tvars: init_string = ", *INIT_FROM_CKPT*" if var.name in initialized_variables else "" tf.logging.info(" name = %s, shape = %s%s", var.name, var.shape, init_string) if mode == tf.estimator.ModeKeys.TRAIN: train_op = create_custom_optimizer(total_loss, config) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, train_op=train_op, scaffold_fn=scaffold_fn) elif mode == tf.estimator.ModeKeys.EVAL: def metric_fn(loss): return {"eval_loss": tf.metrics.mean(loss)} eval_metrics = (metric_fn, [total_loss]) output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, loss=total_loss, eval_metrics=eval_metrics, scaffold_fn=scaffold_fn) else: output_spec = tf.contrib.tpu.TPUEstimatorSpec( mode=mode, predictions={ "doc_idx": doc_idx, "subtoken_map": subtoken_map, "top_span_starts": top_span_starts, "top_span_ends": top_span_ends, "antecedent_starts": antecedent_starts, "antecedent_ends": antecedent_ends, "antecedent_scores": antecedent_scores, "loss": total_loss }, scaffold_fn=scaffold_fn) return output_spec
def __init__(self, config): self.config = config self.max_seq_len=config['max_seq_len'] self.label2id = config['label2id'] self.num_tags = len(self.label2id) self.bert_config = modeling.BertConfig.from_json_file(config["bert_config_file"]) # add placeholders for the model self.input_ids = tf.placeholder(dtype=tf.int32, shape=[None,None], name="Input_ids") self.input_mask = tf.placeholder(dtype=tf.int32, shape=[None, None], name="Input_mask") self.labels_ids = tf.placeholder(dtype=tf.int32, shape=[None, None], name="Labels_ids") self.input_lens = tf.placeholder(dtype=tf.int32, shape=[None], name="Input_lens") self.segment_ids = tf.placeholder(dtype=tf.int32, shape=[None,None], name="Segment_ids") self.a_input_ids = tf.placeholder(dtype=tf.int32, shape=[None,None,None], name="Aug_input_ids") self.a_input_mask = tf.placeholder(dtype=tf.int32, shape=[None,None,None], name="Aug_input_mask") self.a_labels_ids = tf.placeholder(dtype=tf.int32, shape=[None, None,None], name="Aug_labels_ids") self.a_input_lens = tf.placeholder(dtype=tf.int32, shape=[None,None], name="Aug_input_lens") self.a_segment_ids = tf.placeholder(dtype=tf.int32, shape=[None,None,None], name="Segment_ids") self.is_train = tf.placeholder(dtype=tf.bool, shape=[], name='is_train') self.logits=self.get_predictions(self.input_ids,self.input_mask,self.input_lens,self.segment_ids,self.a_input_ids,self.a_labels_ids,self.a_input_mask,self.a_input_lens,self.a_segment_ids,self.is_train) self.loss, self.trans = self.loss_layer(self.logits,self.labels_ids,self.input_lens) tvars = tf.trainable_variables() assignment_map, initialized_variable_names = modeling.get_assignment_map_from_checkpoint(tvars, self.config['tf_checkpoint']) tf.train.init_from_checkpoint(self.config['init_checkpoint'], assignment_map) initialized_vars = [v for v in tvars if v.name in initialized_variable_names] not_initialized_vars = [v for v in tvars if v.name not in initialized_variable_names] for v in initialized_vars: print('--initialized: %s, shape = %s' % (v.name, v.shape)) for v in not_initialized_vars: print('--not initialized: %s, shape = %s' % (v.name, v.shape)) num_train_steps = math.ceil(self.config['train_examples_len'] / self.config["batch_size"])* self.config["epochs"] num_warmup_steps = int(num_train_steps* self.config['warmup_proportion']) self.global_step = tf.train.get_or_create_global_step() self.train_op = optimization.create_custom_optimizer(tvars,self.loss,self.config['bert_learning_rate'],self.config['task_learning_rate'], num_train_steps,num_warmup_steps, False,self.global_step,freeze=-1, task_opt=self.config['task_optimizer'], eps=config['adam_eps']) self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=5)