예제 #1
0
  def __init__(self, config):
    self.config = config
    self.max_segment_len = config['max_segment_len']
    self.max_span_width = config["max_span_width"]
    self.genres = { g:i for i,g in enumerate(config["genres"]) }
    self.subtoken_maps = {}
    self.gold = {}
    self.eval_data = None # Load eval data lazily.
    self.bert_config = modeling.BertConfig.from_json_file(config["bert_config_file"])
    self.tokenizer = tokenization.FullTokenizer(
                vocab_file=config['vocab_file'], do_lower_case=False)
    ### loading the frequenct spans
    self.freq_spans = json.load(open("./data/freq_spans.json"))
    

    input_props = []
    input_props.append((tf.int32, [None, None])) # input_ids.
    input_props.append((tf.int32, [None, None])) # input_mask
    input_props.append((tf.int32, [None])) # Text lengths.
    input_props.append((tf.int32, [None, None])) # Speaker IDs.
    input_props.append((tf.int32, [])) # Genre.
    input_props.append((tf.bool, [])) # Is training.
    input_props.append((tf.int32, [None])) # Gold starts.
    input_props.append((tf.int32, [None])) # Gold ends.
    input_props.append((tf.int32, [None])) # Cluster ids.
    input_props.append((tf.int32, [None])) # Sentence Map

    self.queue_input_tensors = [tf.placeholder(dtype, shape) for dtype, shape in input_props]
    dtypes, shapes = zip(*input_props)
    queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes)
    self.enqueue_op = queue.enqueue(self.queue_input_tensors)
    self.input_tensors = queue.dequeue()

    self.predictions, self.loss = self.get_predictions_and_loss(*self.input_tensors)
    # bert stuff
    tvars = tf.trainable_variables()
    # If you're using TF weights only, tf_checkpoint and init_checkpoint can be the same
    # Get the assignment map from the tensorflow checkpoint. Depending on the extension, use TF/Pytorch to load weights.
    assignment_map, initialized_variable_names = modeling.get_assignment_map_from_checkpoint(tvars, config['tf_checkpoint'])
    init_from_checkpoint = tf.train.init_from_checkpoint if config['init_checkpoint'].endswith('ckpt') else load_from_pytorch_checkpoint
    init_from_checkpoint(config['init_checkpoint'], assignment_map)
    print("**** Trainable Variables ****")
    for var in tvars:
      init_string = ""
      if var.name in initialized_variable_names:
        init_string = ", *INIT_FROM_CKPT*"
      # tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                      # init_string)
      print("  name = %s, shape = %s%s" % (var.name, var.shape, init_string))

    num_train_steps = int(
                    self.config['num_docs'] * self.config['num_epochs'])
    num_warmup_steps = int(num_train_steps * 0.1)
    self.global_step = tf.train.get_or_create_global_step()
    self.train_op = optimization.create_custom_optimizer(tvars,
                      self.loss, self.config['bert_learning_rate'], self.config['task_learning_rate'],
                      num_train_steps, num_warmup_steps, False, self.global_step, freeze=-1,
                      task_opt=self.config['task_optimizer'], eps=config['adam_eps'])
예제 #2
0
    def __init__(self, config):
        self.config = config
        self.max_segment_len = config['max_segment_len']
        self.max_span_width = config["max_span_width"]
        self.genres = {g: i for i, g in enumerate(config["genres"])}
        self.subtoken_maps = {}
        self.gold = {}
        self.eval_data = None  # Load eval data lazily.
        self.dropout = None
        self.bert_config = modeling.BertConfig.from_json_file(
            config["bert_config_file"])
        self.bert_config.hidden_dropout_prob = self.config["dropout_rate"]
        self.tokenizer = tokenization.FullTokenizer(
            vocab_file=config['vocab_file'], do_lower_case=False)
        self.bce_loss = tf.keras.losses.BinaryCrossentropy(
            reduction=tf.keras.losses.ReductionV2.NONE)

        input_props = []
        input_props.append(
            (tf.int32, [None, None]))  # input_ids. (batch_size, seq_len)
        input_props.append(
            (tf.int32, [None, None]))  # input_mask (batch_size, seq_len)
        input_props.append((tf.int32, [None]))  # Text lengths.
        input_props.append(
            (tf.int32, [None, None]))  # Speaker IDs.  (batch_size, seq_len)
        input_props.append(
            (tf.int32, []))  # Genre.  能确保整个batch都是同主题,能因为一篇文章的多段放在一个batch里
        input_props.append((tf.bool, []))  # Is training.
        input_props.append(
            (tf.int32,
             [None]))  # Gold starts. 一个instance只有一个start?是整篇文章的所有mention的start
        input_props.append((tf.int32, [None]))  # Gold ends. 整篇文章的所有mention的end
        input_props.append(
            (tf.int32, [None]))  # Cluster ids. 整篇文章的所有mention的id
        input_props.append(
            (tf.int32, [None]))  # Sentence Map 整篇文章的每个token属于哪个句子

        self.queue_input_tensors = [
            tf.placeholder(dtype, shape) for dtype, shape in input_props
        ]
        dtypes, shapes = zip(*input_props)
        queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes,
                                    shapes=shapes)  # 10是batch_size?
        self.enqueue_op = queue.enqueue(self.queue_input_tensors)
        self.input_tensors = queue.dequeue()  # self.queue_input_tensors 不一样?
        self.bce_loss = tf.keras.losses.BinaryCrossentropy()

        if self.config["run"] == "session":
            self.loss, self.pred_start_scores, self.pred_end_scores = self.get_mention_proposal_and_loss(
                *self.input_tensors)
            tvars = tf.trainable_variables()
            # If you're using TF weights only, tf_checkpoint and init_checkpoint can be the same
            # Get the assignment map from the tensorflow checkpoint.
            # Depending on the extension, use TF/Pytorch to load weights.
            assignment_map, initialized_variable_names = modeling.get_assignment_map_from_checkpoint(
                tvars, config['tf_checkpoint'])
            init_from_checkpoint = tf.train.init_from_checkpoint
            init_from_checkpoint(config['init_checkpoint'], assignment_map)
            print("**** Trainable Variables ****")
            for var in tvars:
                init_string = ""
                if var.name in initialized_variable_names:
                    init_string = ", *INIT_FROM_CKPT*"
                    tf.logging.info("  name = %s, shape = %s%s", var.name,
                                    var.shape, init_string)
                    print("  name = %s, shape = %s%s" %
                          (var.name, var.shape, init_string))

            num_train_steps = int(self.config['num_docs'] *
                                  self.config['num_epochs'])  # 文章数 * 训练轮数
            num_warmup_steps = int(num_train_steps * 0.1)  # 前1/10做warm_up
            self.global_step = tf.train.get_or_create_global_step(
            )  # 根据不同的model得到不同的optimizer
            self.train_op = optimization.create_custom_optimizer(
                tvars,
                self.loss,
                self.config['bert_learning_rate'],
                self.config['task_learning_rate'],
                num_train_steps,
                num_warmup_steps,
                False,
                self.global_step,
                freeze=-1,
                task_opt=self.config['task_optimizer'],
                eps=config['adam_eps'])

        # else:
        #    pass
        # self.loss, self.pred_start_scores, self.pred_end_scores, self.pred_mention_scores = self.get_mention_proposal_and_loss(*self.input_tensors)

        self.coref_evaluator = metrics.CorefEvaluator()
예제 #3
0
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        tf.logging.info("*** Features ***")
        for name in sorted(features.keys()):
            tf.logging.info("  name = %s, shape = %s" %
                            (name, features[name].shape))

        is_training = (mode == tf.estimator.ModeKeys.TRAIN)

        predictions, total_loss = coref_model.forward(features, is_training)
        doc_idx, subtoken_map, top_span_starts, top_span_ends, antecedent_starts, antecedent_ends, antecedent_scores = predictions
        tvars = tf.trainable_variables()
        initialized_variables = {}
        scaffold_fn = None
        if init_checkpoint:
            assignment_map, initialized_variables = modeling.get_assignment_map_from_checkpoint(
                tvars, init_checkpoint)
            if config.use_tpu:

                def tpu_scaffold():
                    tf.train.init_from_checkpoint(init_checkpoint,
                                                  assignment_map)
                    return tf.train.Scaffold()

                scaffold_fn = tpu_scaffold
            else:
                tf.train.init_from_checkpoint(init_checkpoint, assignment_map)

        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            init_string = ", *INIT_FROM_CKPT*" if var.name in initialized_variables else ""
            tf.logging.info("  name = %s, shape = %s%s", var.name, var.shape,
                            init_string)

        if mode == tf.estimator.ModeKeys.TRAIN:

            train_op = create_custom_optimizer(total_loss, config)

            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                train_op=train_op,
                scaffold_fn=scaffold_fn)
        elif mode == tf.estimator.ModeKeys.EVAL:

            def metric_fn(loss):
                return {"eval_loss": tf.metrics.mean(loss)}

            eval_metrics = (metric_fn, [total_loss])
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                loss=total_loss,
                eval_metrics=eval_metrics,
                scaffold_fn=scaffold_fn)
        else:
            output_spec = tf.contrib.tpu.TPUEstimatorSpec(
                mode=mode,
                predictions={
                    "doc_idx": doc_idx,
                    "subtoken_map": subtoken_map,
                    "top_span_starts": top_span_starts,
                    "top_span_ends": top_span_ends,
                    "antecedent_starts": antecedent_starts,
                    "antecedent_ends": antecedent_ends,
                    "antecedent_scores": antecedent_scores,
                    "loss": total_loss
                },
                scaffold_fn=scaffold_fn)
        return output_spec
예제 #4
0
    def __init__(self, config):

        self.config = config
        self.max_seq_len=config['max_seq_len']
        self.label2id = config['label2id']
        self.num_tags = len(self.label2id)
        self.bert_config = modeling.BertConfig.from_json_file(config["bert_config_file"])
        # add placeholders for the model
        self.input_ids = tf.placeholder(dtype=tf.int32,
                                        shape=[None,None],
                                        name="Input_ids")
        self.input_mask = tf.placeholder(dtype=tf.int32,
                                         shape=[None, None],
                                         name="Input_mask")
        self.labels_ids = tf.placeholder(dtype=tf.int32,
                                         shape=[None, None],
                                         name="Labels_ids")
        self.input_lens = tf.placeholder(dtype=tf.int32,
                                         shape=[None],
                                         name="Input_lens")
        self.segment_ids = tf.placeholder(dtype=tf.int32,
                                         shape=[None,None],
                                         name="Segment_ids")
        self.a_input_ids = tf.placeholder(dtype=tf.int32,
                                        shape=[None,None,None],
                                        name="Aug_input_ids")
        self.a_input_mask = tf.placeholder(dtype=tf.int32,
                                         shape=[None,None,None],
                                         name="Aug_input_mask")
        self.a_labels_ids = tf.placeholder(dtype=tf.int32,
                                         shape=[None, None,None],
                                         name="Aug_labels_ids")
        self.a_input_lens = tf.placeholder(dtype=tf.int32,
                                         shape=[None,None],
                                         name="Aug_input_lens")
        self.a_segment_ids = tf.placeholder(dtype=tf.int32,
                                          shape=[None,None,None],
                                          name="Segment_ids")
        self.is_train = tf.placeholder(dtype=tf.bool, shape=[], name='is_train')

        self.logits=self.get_predictions(self.input_ids,self.input_mask,self.input_lens,self.segment_ids,self.a_input_ids,self.a_labels_ids,self.a_input_mask,self.a_input_lens,self.a_segment_ids,self.is_train)
        self.loss, self.trans = self.loss_layer(self.logits,self.labels_ids,self.input_lens)

        tvars = tf.trainable_variables()
        assignment_map, initialized_variable_names = modeling.get_assignment_map_from_checkpoint(tvars, self.config['tf_checkpoint'])
        tf.train.init_from_checkpoint(self.config['init_checkpoint'], assignment_map)
        initialized_vars = [v for v in tvars if v.name in initialized_variable_names]
        not_initialized_vars = [v for v in tvars if v.name not in initialized_variable_names]
        for v in initialized_vars:
            print('--initialized: %s, shape = %s' % (v.name, v.shape))
        for v in not_initialized_vars:
            print('--not initialized: %s, shape = %s' % (v.name, v.shape))

        num_train_steps = math.ceil(self.config['train_examples_len'] / self.config["batch_size"])* self.config["epochs"]
        num_warmup_steps = int(num_train_steps* self.config['warmup_proportion'])
        
        self.global_step = tf.train.get_or_create_global_step()
        self.train_op = optimization.create_custom_optimizer(tvars,self.loss,self.config['bert_learning_rate'],self.config['task_learning_rate'],
                                             num_train_steps,num_warmup_steps, False,self.global_step,freeze=-1,
                                             task_opt=self.config['task_optimizer'], eps=config['adam_eps'])

        self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=5)