Ejemplo n.º 1
0
    def add_model_specific_valuables(self, config):
        self.context_embeddings = util.EmbeddingDictionary(
            config["context_embeddings"])
        self.head_embeddings = util.EmbeddingDictionary(
            config["head_embeddings"], maybe_cache=self.context_embeddings)
        self.char_embedding_size = config["char_embedding_size"]
        self.char_dict = util.load_char_dict(config["char_vocab_path"])
        self.max_span_width = config["max_span_width"]
        if config["lm_path"]:
            self.lm_file = h5py.File(self.config["lm_path"], "r")
        else:
            self.lm_file = None
        self.lm_layers = self.config["lm_layers"]
        self.lm_size = self.config["lm_size"]
        self.eval_data = None  # Load eval data lazily.

        input_props = []
        input_props.append((tf.string, [None, None]))  # Tokens.
        input_props.append(
            (tf.float32, [None, None, self.context_embeddings.size
                          ]))  # Context embeddings.
        input_props.append(
            (tf.float32, [None, None,
                          self.head_embeddings.size]))  # Head embeddings.
        input_props.append(
            (tf.float32, [None, None, self.lm_size,
                          self.lm_layers]))  # LM embeddings.
        input_props.append((tf.int32, [None, None,
                                       None]))  # Character indices.
        input_props.append((tf.int32, [None]))  # Text lengths.
        input_props.append((tf.bool, []))  # Is training.
        input_props.append((tf.int32, [None]))  # Gold starts.
        input_props.append((tf.int32, [None]))  # Gold ends.

        return input_props
Ejemplo n.º 2
0
    def __init__(self, config):
        self.config = config
        self.embedding_info = [(emb["size"], emb["lowercase"])
                               for emb in config["embeddings"]]
        self.embedding_size = sum(size for size, _ in self.embedding_info)
        self.char_embedding_size = config["char_embedding_size"]
        self.char_dict = util.load_char_dict(config["char_vocab_path"])
        self.embedding_dicts = [
            util.load_embedding_dict(emb["path"], emb["size"], emb["format"])
            for emb in config["embeddings"]
        ]
        self.max_mention_width = config["max_mention_width"]
        self.max_context_width = config["max_context_width"]
        self.genres = {g: i for i, g in enumerate(config["genres"])}
        self.eval_data = None  # Load eval data lazily.

        input_props = []
        input_props.append(
            (tf.float32, [None, None,
                          self.embedding_size]))  # Text embeddings.
        input_props.append((tf.int32, [None, None,
                                       None]))  # Character indices.
        input_props.append((tf.int32, [None]))  # Text lengths.
        input_props.append((tf.int32, [None]))  # Speaker IDs.
        input_props.append((tf.int32, []))  # Genre.
        input_props.append((tf.bool, []))  # Is training.
        input_props.append((tf.int32, [None]))  # Gold starts.
        input_props.append((tf.int32, [None]))  # Gold ends.
        input_props.append((tf.int32, [None]))  # Cluster ids.

        self.queue_input_tensors = [
            tf.placeholder(dtype, shape) for dtype, shape in input_props
        ]
        dtypes, shapes = zip(*input_props)
        queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes)
        self.enqueue_op = queue.enqueue(self.queue_input_tensors)
        self.input_tensors = queue.dequeue()

        self.predictions, self.loss = self.get_predictions_and_loss(
            *self.input_tensors)
        self.global_step = tf.Variable(0, name="global_step", trainable=False)
        self.reset_global_step = tf.assign(self.global_step, 0)
        learning_rate = tf.train.exponential_decay(
            self.config["learning_rate"],
            self.global_step,
            self.config["decay_frequency"],
            self.config["decay_rate"],
            staircase=True)
        trainable_params = tf.trainable_variables()
        gradients = tf.gradients(self.loss, trainable_params)
        gradients, _ = tf.clip_by_global_norm(gradients,
                                              self.config["max_gradient_norm"])
        optimizers = {
            "adam": tf.train.AdamOptimizer,
            "sgd": tf.train.GradientDescentOptimizer
        }
        optimizer = optimizers[self.config["optimizer"]](learning_rate)
        self.train_op = optimizer.apply_gradients(zip(gradients,
                                                      trainable_params),
                                                  global_step=self.global_step)
Ejemplo n.º 3
0
  def __init__(self, config):
    self.config = config
    self.context_embeddings = util.EmbeddingDictionary(config["context_embeddings"])
    self.head_embeddings = util.EmbeddingDictionary(config["head_embeddings"], maybe_cache=self.context_embeddings)
    self.char_embedding_size = config["char_embedding_size"]
    self.char_dict = util.load_char_dict(config["char_vocab_path"])
    self.max_span_width = config["max_span_width"]
    self.genres = { g:i for i,g in enumerate(config["genres"]) }
    if config["lm_path"]:
      self.lm_file = h5py.File(self.config["lm_path"], "r")
    else:
      self.lm_file = None
    self.lm_layers = self.config["lm_layers"]
    self.lm_size = self.config["lm_size"]
    self.eval_data = None # Load eval data lazily.

    self.scene_emb_size = self.config['scene_emb_size']
    if (self.config['use_video']):
      self.scene_embedding = util.load_scene_embedding(config["scene_embedding_dir"])


    input_props = []
    input_props.append((tf.string, [None, None])) # Tokens.
    input_props.append((tf.float32, [None, None, self.context_embeddings.size])) # Context embeddings.
    input_props.append((tf.float32, [None, None, self.head_embeddings.size])) # Head embeddings.
    input_props.append((tf.float32, [None, None, self.lm_size, self.lm_layers])) # LM embeddings.
    input_props.append((tf.int32, [None, None, None])) # Character indices.
    input_props.append((tf.int32, [None])) # Text lengths.
    input_props.append((tf.int32, [None])) # Speaker IDs.
    input_props.append((tf.int32, [])) # Genre.
    input_props.append((tf.bool, [])) # Is training.
    input_props.append((tf.int32, [None])) # Gold starts.
    input_props.append((tf.int32, [None])) # Gold ends.
    input_props.append((tf.int32, [None])) # Cluster ids.
    input_props.append((tf.float32, [None, self.scene_emb_size])) # Video Scene Embedding
    input_props.append((tf.int32, [None])) # Token Genders
    input_props.append((tf.int32, [None])) # Token is First Pronoun

    self.queue_input_tensors = [tf.placeholder(dtype, shape) for dtype, shape in input_props]
    dtypes, shapes = zip(*input_props)
    queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes)
    self.enqueue_op = queue.enqueue(self.queue_input_tensors)
    self.input_tensors = queue.dequeue()

    self.global_step = tf.Variable(0, name="global_step", trainable=False)
    self.predictions, self.loss = self.get_predictions_and_loss(*self.input_tensors)
    self.reset_global_step = tf.assign(self.global_step, 0)
    learning_rate = tf.train.exponential_decay(self.config["learning_rate"], self.global_step,
                                               self.config["decay_frequency"], self.config["decay_rate"], staircase=True)
    trainable_params = tf.trainable_variables()
    gradients = tf.gradients(self.loss, trainable_params)
    gradients, _ = tf.clip_by_global_norm(gradients, self.config["max_gradient_norm"])
    optimizers = {
      "adam" : tf.train.AdamOptimizer,
      "sgd" : tf.train.GradientDescentOptimizer
    }
    optimizer = optimizers[self.config["optimizer"]](learning_rate)
    self.train_op = optimizer.apply_gradients(zip(gradients, trainable_params), global_step=self.global_step)
    def __init__(self, config):
        super(CorefModel, self).__init__()
        self.config = config
        self.config = config
        self.embedding_info = [
            (emb["size"], emb["lowercase"]) for emb in config["embeddings"]
        ]  # [(300,false)(50,false)]
        self.embedding_size = sum(
            size for size, _ in self.embedding_info)  # 350 = 300+50
        self.char_embedding_size = config["char_embedding_size"]  # 8
        self.char_dict = util.load_char_dict(
            config["char_vocab_path"])  # all characters + <unk> size 115
        self.max_mention_width = config["max_mention_width"]  # 10
        self.genres = {g: i for i, g in enumerate(config["genres"])}
        self.dropout = nn.Dropout(self.config["dropout_rate"])  # 0.2
        self.lexical_dropout = nn.Dropout(
            self.config["lexical_dropout_rate"])  # 0.5

        self.char_embeddings = nn.Embedding(115, 8)

        self.char_cnn = CNN()
        self.bilstm = nn.LSTM(input_size=500,
                              hidden_size=200,
                              num_layers=1,
                              dropout=0.2,
                              bidirectional=True)

        self._endpoint_span_extractor = EndpointSpanExtractor(
            800,
            combination="x,y",
            num_width_embeddings=10,
            span_width_embedding_dim=20,
            bucket_widths=False)
        self._attentive_span_extractor = SelfAttentiveSpanExtractor(
            input_dim=400)

        self.genre_emb = nn.Embedding(len(self.genres),
                                      self.config["feature_size"])
        # self.mention_width_emb = nn.Embedding(self.config["max_mention_width"], self.config["feature_size"])
        # self.head_scores = nn.Linear(400, 1)
        self.mention = SpanPruner(FFNNMention())
        self.same_speaker_emb = nn.Embedding(2, self.config["feature_size"])
        self.mention_distance_emb = nn.Embedding(10,
                                                 self.config["feature_size"])
        self.antecedent = FFNNAntecedent()

        self._mention_recall = MentionRecall()
        self._conll_coref_scores = ConllCorefScores()
        self._regularizer = None

        self.weights_init(self.char_cnn.parameters())
        self.hidden = self.bilstm_init(self.bilstm.hidden_size)
        self.weights_init(self.mention.parameters())
        self.weights_init(self.antecedent.parameters())
  def __init__(self, config):
    self.config = config
    self.context_embeddings = util.EmbeddingDictionary(config["context_embeddings"])
    self.context_embeddings_size = self.context_embeddings.size

    self.char_embedding_size = config["char_embedding_size"]
    self.char_dict = util.load_char_dict(config["char_vocab_path"])

    if self.config["lm_path"].lower() == "none":
      self.lm_file = None
    else:
      self.lm_file = h5py.File(self.config["lm_path"], "r")
    self.lm_layers = self.config["lm_layers"]
    self.lm_size = self.config["lm_size"]

    self.eval_data = None  # Load eval data lazily.
    self.ner_types = self.config['ner_types']
    self.ner_maps = {ner: (i + 1) for i, ner in enumerate(self.ner_types)}
    self.num_types = len(self.ner_types)

    input_props = []
    input_props.append((tf.string, [None, None]))  # Tokens.
    input_props.append((tf.float32, [None, None, self.context_embeddings_size]))  # Context embeddings.
    input_props.append((tf.float32, [None, None, self.lm_size, self.lm_layers]))  # LM embeddings.
    input_props.append((tf.int32, [None, None, None]))  # Character indices.
    input_props.append((tf.int32, [None]))  # Text lengths.
    input_props.append((tf.bool, []))  # Is training.
    input_props.append((tf.int32, [None]))  # Gold NER Label

    self.queue_input_tensors = [tf.placeholder(dtype, shape) for dtype, shape in input_props]
    dtypes, shapes = zip(*input_props)
    queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes)
    self.enqueue_op = queue.enqueue(self.queue_input_tensors)
    self.input_tensors = queue.dequeue()

    self.predictions, self.loss = self.get_predictions_and_loss(self.input_tensors)
    self.global_step = tf.Variable(0, name="global_step", trainable=False)
    self.reset_global_step = tf.assign(self.global_step, 0)
    learning_rate = tf.train.exponential_decay(self.config["learning_rate"], self.global_step,
                                               self.config["decay_frequency"], self.config["decay_rate"],
                                               staircase=True)
    trainable_params = tf.trainable_variables()
    gradients = tf.gradients(self.loss, trainable_params)
    gradients, _ = tf.clip_by_global_norm(gradients, self.config["max_gradient_norm"])
    optimizers = {
      "adam": tf.train.AdamOptimizer,
      "sgd": tf.train.GradientDescentOptimizer
    }
    optimizer = optimizers[self.config["optimizer"]](learning_rate)
    self.train_op = optimizer.apply_gradients(zip(gradients, trainable_params), global_step=self.global_step)
    def __init__(self, config):
        super(CorefModel, self).__init__()
        self.config = config
        self.config = config
        self.embedding_info = [
            (emb["size"], emb["lowercase"]) for emb in config["embeddings"]
        ]  # [(300,false)(50,false)]
        self.embedding_size = sum(
            size for size, _ in self.embedding_info)  # 350 = 300+50
        self.char_embedding_size = config["char_embedding_size"]  # 8
        self.char_dict = util.load_char_dict(
            config["char_vocab_path"])  # all characters + <unk> size 115
        self.max_mention_width = config["max_mention_width"]  # 10
        self.genres = {g: i for i, g in enumerate(config["genres"])}

        self.char_embeddings = nn.Parameter(
            torch.randn(
                [len(self.char_dict), self.config["char_embedding_size"]]))
        self.char_cnn = CNN()
        # TODO check if the input to the BILSTM should be a pack(_padded)_sequence so that minibatches can be used
        self.bilstm = nn.LSTM(input_size=500,
                              hidden_size=200,
                              num_layers=1,
                              dropout=0.2,
                              bidirectional=True)
        self.genre_tensor = nn.Parameter(
            torch.randn([len(self.genres), self.config["feature_size"]]))
        self.mention_width_tensor = nn.Parameter(
            torch.randn([
                self.config["max_mention_width"], self.config["feature_size"]
            ]))
        self.head_scores = nn.Linear(400, 1)
        self.mention = FFNNMention()
        self.same_speaker_emb = nn.Parameter(
            torch.randn([2, self.config["feature_size"]]))
        self.mention_distance_emb = nn.Parameter(
            torch.zeros([10, self.config["feature_size"]]))
        self.antecedent = FFNNAntecedent()

        nn.init.xavier_uniform_(self.char_embeddings)
        self.weights_init(self.char_cnn.parameters())
        self.hidden = self.bilstm_init(self.bilstm.hidden_size)
        nn.init.xavier_uniform_(self.genre_tensor)
        nn.init.xavier_uniform_(self.mention_width_tensor)
        self.weights_init(self.mention.parameters())
        nn.init.xavier_uniform_(self.same_speaker_emb)
        nn.init.xavier_uniform_(self.mention_distance_emb)
        self.weights_init(self.antecedent.parameters())
Ejemplo n.º 7
0
 def __init__(self, config, embedding_dicts, dataset="train"):
     self.config = config
     self.embedding_info = [(emb["size"], emb["lowercase"]) for emb in config["embeddings"]]
     self.embedding_size = sum(size for size, _ in self.embedding_info)
     self.embedding_dicts = embedding_dicts
     self.char_dict = util.load_char_dict(config["char_vocab_path"])
     self.genres = {g: i for i, g in enumerate(config["genres"])}
     if dataset == "train":
         file_path = self.config["train_path"]
     elif dataset == "dev":
         file_path = self.config["dev_path"]
     elif dataset == "test":
         file_path = self.config["test_path"]
     else:
         raise ValueError("None of the specified keys exist: {} Select from: train,dev or test".format(dataset))
     with open(file_path) as f:
         self.train_examples = [json.loads(jsonline) for jsonline in f.readlines()]
     random.shuffle(self.train_examples)
     self.length = len(self.train_examples)
Ejemplo n.º 8
0
  def __init__(self, config):
    self.config = config
    self.embedding_info = [(emb["size"], emb["lowercase"]) for emb in config["embeddings"]]
    self.embedding_size = sum(size for size, _ in self.embedding_info)
    self.char_embedding_size = config["char_embedding_size"]
    self.char_dict = util.load_char_dict(config["char_vocab_path"])
    self.embedding_dicts = [util.load_embedding_dict(emb["path"], emb["size"], emb["format"]) for emb in config["embeddings"]]
    self.max_mention_width = config["max_mention_width"]
    self.genres = { g:i for i,g in enumerate(config["genres"]) }
    self.eval_data = None # Load eval data lazily.

    input_props = []
    input_props.append((tf.float32, [None, None, self.embedding_size])) # Text embeddings.
    input_props.append((tf.int32, [None, None, None])) # Character indices.
    input_props.append((tf.int32, [None])) # Text lengths.
    input_props.append((tf.int32, [None])) # Speaker IDs.
    input_props.append((tf.int32, [])) # Genre.
    input_props.append((tf.bool, [])) # Is training.
    input_props.append((tf.int32, [None])) # Gold starts.
    input_props.append((tf.int32, [None])) # Gold ends.
    input_props.append((tf.int32, [None])) # Cluster ids.

    self.queue_input_tensors = [tf.placeholder(dtype, shape) for dtype, shape in input_props]
    dtypes, shapes = zip(*input_props)
    queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes)
    self.enqueue_op = queue.enqueue(self.queue_input_tensors)
    self.input_tensors = queue.dequeue()

    self.predictions, self.loss = self.get_predictions_and_loss(*self.input_tensors)
    self.global_step = tf.Variable(0, name="global_step", trainable=False)
    self.reset_global_step = tf.assign(self.global_step, 0)
    learning_rate = tf.train.exponential_decay(self.config["learning_rate"], self.global_step,
                                               self.config["decay_frequency"], self.config["decay_rate"], staircase=True)
    trainable_params = tf.trainable_variables()
    gradients = tf.gradients(self.loss, trainable_params)
    gradients, _ = tf.clip_by_global_norm(gradients, self.config["max_gradient_norm"])
    optimizers = {
      "adam" : tf.train.AdamOptimizer,
      "sgd" : tf.train.GradientDescentOptimizer
    }
    optimizer = optimizers[self.config["optimizer"]](learning_rate)
    self.train_op = optimizer.apply_gradients(zip(gradients, trainable_params), global_step=self.global_step)
Ejemplo n.º 9
0
    def __init__(self, config):
        self.config = config
        self.embedding_info = [
            (emb["size"], emb["lowercase"]) for emb in config["embeddings"]
        ]  #[(300,false)(50,false)]
        self.embedding_size = sum(
            size for size, _ in self.embedding_info)  #350 = 300+50
        self.char_embedding_size = config["char_embedding_size"]  #8
        self.char_dict = util.load_char_dict(
            config["char_vocab_path"])  #all characters + <unk> size 115
        self.embedding_dicts = [
            util.load_embedding_dict(emb["path"], emb["size"], emb["format"])
            for emb in config["embeddings"]
        ]  #dictionary [(43994?,300)(268822,50)]
        self.max_mention_width = config["max_mention_width"]  #10
        self.genres = {g: i
                       for i, g in enumerate(config["genres"])
                       }  #types of corpus documents
        #(news = nw, conversational telephone speech=tc, weblogs=wb, usenet newsgroups, broadcast=bc, talk shows)
        #[bc, bn, mz, nw, pt, tc, wb]
        self.eval_data = None  # Load eval data lazily.

        input_props = []
        input_props.append((tf.FloatTensor, [None, None, self.embedding_size
                                             ]))  # Text embeddings. [?,?,350]
        input_props.append((tf.IntTensor, [None, None,
                                           None]))  # Character indices.
        input_props.append((tf.IntTensor, [None]))  # Text lengths.
        input_props.append((tf.IntTensor, [None]))  # Speaker IDs.
        input_props.append((tf.IntTensor, []))  # Genre.
        input_props.append((tf.ByteTensor, []))  # Is training.
        input_props.append((tf.IntTensor, [None]))  # Gold starts.
        input_props.append((tf.IntTensor, [None]))  # Gold ends.
        input_props.append((tf.IntTensor, [None]))  # Cluster ids.
        self.queue_input_tensors = [
            tf.zeros(shape).type(dtype) for dtype, shape in input_props
        ]
        # dtypes, shapes = zip(*input_props)
        # queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes)
        # self.enqueue_op = queue.enqueue(self.queue_input_tensors)
        # self.input_tensors = queue.dequeue()
        self.input_tensors = self.queue_input_tensors  #9 items from input_props that are split when calling get_prediction_and_loss
        # this is the training step more or less
        self.predictions, self.loss = self.get_predictions_and_loss(
            *self.input_tensors)

        self.global_step = tf.zeros(
        )  #.Variable(0, name="global_step", trainable=False)
        # self.reset_global_step = tf.assign(self.global_step, 0)

        #here you update something based on yout prediction and loss
        trainable_params = autograd.Variable(
            0
        )  #this is equivalent to model.parameters() tf.trainable_variables()
        gradients = tf.gradients(
            self.loss, trainable_params)  #this is autograd backward pass
        # Constructs symbolic derivatives of sum of self.loss w.r.t. x in trainable_params
        gradients, _ = nn.utils.clip_grad_norm(
            gradients, self.config["max_gradient_norm"])
        optimizers = {
            "adam":
            optim.Adam(trainable_params,
                       lr=self.config["learning_rate"],
                       weight_decay=self.config["decay_rate"]),
            "sgd":
            optim.SGD(trainable_params,
                      lr=self.config["learning_rate"],
                      weight_decay=self.config["decay_rate"])
        }
        optimizer = optimizers[self.config["optimizer"]]

        learning_rate = optim.lr_scheduler.ExponentialLR(
            optimizer, gamma=self.config["decay_frequency"])
        learning_rate.step()
Ejemplo n.º 10
0
    def __init__(self, config):
        self.config = config
        self.context_embeddings = util.EmbeddingDictionary(
            config["context_embeddings"])
        self.head_embeddings = util.EmbeddingDictionary(
            config["head_embeddings"], maybe_cache=self.context_embeddings)
        self.char_embedding_size = config["char_embedding_size"]
        self.char_dict = util.load_char_dict(config["char_vocab_path"])
        self.max_span_width = config["max_span_width"]
        self.genres = {g: i for i, g in enumerate(config["genres"])}
        if config["lm_path"]:
            self.config["lm_path"] = "/scratch/pp1953/dataset/elmo_cache.hdf5"
            self.lm_file = h5py.File(self.config["lm_path"], "r")
        else:
            self.lm_file = None
        self.lm_layers = self.config["lm_layers"]
        self.lm_size = self.config["lm_size"]
        self.eval_data = None  # Load eval data lazily.
        self.swag_train_dir = config["swag_train_dir"]
        self.swag_val_dir = config["swag_val_dir"]

        self.eval_data = None  # Load eval data lazily.

        input_props = []
        input_props.append((tf.string, [None, None]))  # Tokens.
        input_props.append((tf.float32, [None, None,
                                         1024]))  # Context embeddings.
        input_props.append((tf.float32, [None, None,
                                         1024]))  # Head embeddings.
        input_props.append(
            (tf.float32, [None, None, self.lm_size,
                          self.lm_layers]))  # LM embeddings.
        input_props.append((tf.int32, [None]))  # Text lengths.
        input_props.append((tf.bool, []))  # Is training.
        input_props.append((tf.int32, [None]))  # Gold starts.
        input_props.append((tf.int32, [None]))  # Gold ends.
        input_props.append((tf.int32, [None]))  # Cluster ids.

        # SWAG
        input_props.append((tf.float32, [None, None,
                                         1024]))  # sentence embeddings
        input_props.append((tf.int32, [None]))  # text length
        input_props.append((tf.int32, [1, 5]))  # the labe

        self.queue_input_tensors = [
            tf.placeholder(dtype, shape) for dtype, shape in input_props
        ]
        self.swag_embeddings = iter([
            f for f in listdir(self.swag_train_dir)
            if isfile(join(self.swag_train_dir, f))
        ])
        self.swag_test_embeddings = iter([
            f for f in listdir(self.swag_val_dir)
            if isfile(join(self.swag_val_dir, f))
        ])
        dtypes, shapes = zip(*input_props)
        queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes)
        self.enqueue_op = queue.enqueue(self.queue_input_tensors)
        self.input_tensors = queue.dequeue()

        self.swag_predictions, self.multitask_loss1 = self.get_predictions_and_loss_cm(
            *self.input_tensors)
        self.multitask_loss1 = self.multitask_loss1 / 10
        self.global_step1 = tf.Variable(0, name="global_step", trainable=False)
        self.reset_global_step1 = tf.assign(self.global_step1, 0)
        learning_rate1 = tf.train.exponential_decay(
            self.config["learning_rate"],
            self.global_step1,
            self.config["decay_frequency"],
            self.config["decay_rate"],
            staircase=True)
        optimizers1 = {
            "adam": tf.train.AdamOptimizer,
            "sgd": tf.train.GradientDescentOptimizer
        }
        optimizer1 = optimizers1[self.config["optimizer"]](learning_rate1)

        self.predictions2, self.loss2 = self.get_predictions_and_loss(
            *self.input_tensors)
        self.loss = self.loss2 + self.multitask_loss1
        trainable_params1 = tf.trainable_variables()
        gradients1 = tf.gradients(self.loss, trainable_params1)
        gradients1, _ = tf.clip_by_global_norm(
            gradients1, self.config["max_gradient_norm"])

        self.train_op = optimizer1.apply_gradients(
            zip(gradients1, trainable_params1), global_step=self.global_step1)
Ejemplo n.º 11
0
    def __init__(self, config):
        self.config = config
        self.context_embeddings = util.EmbeddingDictionary(
            config["context_embeddings"])
        self.head_embeddings = util.EmbeddingDictionary(
            config["head_embeddings"], maybe_cache=self.context_embeddings)
        self.char_embedding_size = config["char_embedding_size"]
        self.char_dict = util.load_char_dict(config["char_vocab_path"])

        self.lm_file = None
        self.lm_hub = None
        self.lm_layers = 0  # TODO: Remove these.
        self.lm_size = 0
        if config["lm_path"]:
            if "tfhub" in config["lm_path"]:
                print "Using tensorflow hub:", config["lm_path"]
                self.lm_hub = hub.Module(config["lm_path"].encode("utf-8"),
                                         trainable=False)
            else:
                self.lm_file = h5py.File(self.config["lm_path"], "r")
            self.lm_layers = self.config["lm_layers"]
            self.lm_size = self.config["lm_size"]

        self.adjunct_roles, self.core_roles = split_srl_labels(
            config["srl_labels"], config["include_c_v"])
        self.srl_labels_inv = [""] + self.adjunct_roles + self.core_roles
        self.srl_labels = {l: i for i, l in enumerate(self.srl_labels_inv)}

        # IO Stuff.
        # Need to make sure they are in the same order as input_names + label_names
        self.input_props = [
            (tf.string, [None]),  # String tokens.
            (tf.float32, [None, self.context_embeddings.size
                          ]),  # Context embeddings.
            (tf.float32, [None,
                          self.head_embeddings.size]),  # Head embeddings.
            (tf.float32, [None, self.lm_size,
                          self.lm_layers]),  # LM embeddings.
            (tf.int32, [None, None]),  # Character indices.
            (tf.int32, []),  # Text length.
            (tf.int32, []),  # Document ID.
            (tf.bool, []),  # Is training.
            (tf.int32, [None]),  # Gold predicate ids (for input).
            (tf.int32, []),  # Num gold predicates (for input).
            (tf.int32, [None]),  # Predicate ids (length=num_srl_relations).
            (tf.int32, [None]),  # Argument starts.
            (tf.int32, [None]),  # Argument ends.
            (tf.int32, [None]),  # SRL labels.
            (tf.int32, [])  # Number of SRL relations.
        ]
        self.input_names = _input_names
        self.label_names = _label_names
        self.predict_names = _predict_names
        self.batch_size = self.config["batch_size"]
        dtypes, shapes = zip(*self.input_props)
        if self.batch_size > 0 and self.config["max_tokens_per_batch"] < 0:
            # Use fixed batch size if number of words per batch is not limited (-1).
            self.queue_input_tensors = [
                tf.placeholder(dtype, shape)
                for dtype, shape in self.input_props
            ]
            queue = tf.PaddingFIFOQueue(capacity=self.batch_size * 2,
                                        dtypes=dtypes,
                                        shapes=shapes)
            self.enqueue_op = queue.enqueue(self.queue_input_tensors)
            self.input_tensors = queue.dequeue_many(self.batch_size)
        else:
            # Use dynamic batch size.
            new_shapes = [[None] + shape for shape in shapes]
            self.queue_input_tensors = [
                tf.placeholder(dtype, shape)
                for dtype, shape in zip(dtypes, new_shapes)
            ]
            queue = tf.PaddingFIFOQueue(capacity=2,
                                        dtypes=dtypes,
                                        shapes=new_shapes)
            self.enqueue_op = queue.enqueue(self.queue_input_tensors)
            self.input_tensors = queue.dequeue()
        num_features = len(self.input_names)
        self.input_dict = dict(
            zip(self.input_names, self.input_tensors[:num_features]))
        self.labels_dict = dict(
            zip(self.label_names, self.input_tensors[num_features:]))
Ejemplo n.º 12
0
    def __init__(self, config):
        self.config = config
        self.context_embeddings = util.EmbeddingDictionary(
            config["context_embeddings"])
        self.head_embeddings = util.EmbeddingDictionary(
            config["head_embeddings"], maybe_cache=self.context_embeddings)
        self.char_embedding_size = config["char_embedding_size"]
        self.char_dict = util.load_char_dict(config["char_vocab_path"])
        self.max_span_width = config["max_span_width"]
        self.genres = {g: i for i, g in enumerate(config["genres"])}
        if config["lm_path"]:
            self.lm_file = h5py.File(self.config["lm_path"], "r")
        else:
            self.lm_file = None
        self.lm_layers = self.config["lm_layers"]
        self.lm_size = self.config["lm_size"]
        self.eval_data = None  # Load eval data lazily.
        self.undersampling_probability = self.config[
            "undersampling_probability"]
        self.second_undersampling_probability = self.config[
            "second_undersampling_probability"]
        self.cross_validation_fold = self.config["cross_validation_fold"]
        self.skip_comparative_bridging = 'skip_comparative_bridging' in self.config and self.config[
            'skip_comparative_bridging']

        input_props = []
        input_props.append(
            (tf.float32, [None, None, self.context_embeddings.size
                          ]))  # Context embeddings.
        input_props.append(
            (tf.float32, [None, None,
                          self.head_embeddings.size]))  # Head embeddings.
        input_props.append(
            (tf.float32, [None, None, self.lm_size,
                          self.lm_layers]))  # LM embeddings.
        input_props.append((tf.int32, [None, None,
                                       None]))  # Character indices.
        input_props.append((tf.int32, [None]))  # Text lengths.
        input_props.append((tf.int32, [None]))  # Speaker IDs.
        input_props.append((tf.int32, []))  # Genre.
        input_props.append((tf.bool, []))  # Is training.
        input_props.append((tf.int32, [None]))  # Gold starts.
        input_props.append((tf.int32, [None]))  # Gold ends.
        input_props.append((tf.int32, [None]))  # Cluster ids.
        input_props.append(
            (tf.int32, [None]))  # Bridging antecedent cluster ids
        input_props.append(
            (tf.int32, [None]))  # IS status 0-DN 1-DO 2-Bridging
        input_props.append((tf.bool, [None]))  # undersampling mask

        self.input_tensors = [
            tf.placeholder(dtype, shape) for dtype, shape in input_props
        ]

        self.global_step = tf.Variable(0, name="global_step", trainable=False)
        self.reset_global_step = tf.assign(self.global_step, 0)

        self.predictions, self.loss = self.get_predictions_and_loss(
            *self.input_tensors)
        learning_rate = tf.train.exponential_decay(
            self.config["learning_rate"],
            self.global_step,
            self.config["decay_frequency"],
            self.config["decay_rate"],
            staircase=True)
        trainable_params = tf.trainable_variables()
        gradients = tf.gradients(self.loss, trainable_params)
        gradients, _ = tf.clip_by_global_norm(gradients,
                                              self.config["max_gradient_norm"])
        optimizers = {
            "adam": tf.train.AdamOptimizer,
            "sgd": tf.train.GradientDescentOptimizer
        }
        optimizer = optimizers[self.config["optimizer"]](learning_rate)
        self.train_op = optimizer.apply_gradients(zip(gradients,
                                                      trainable_params),
                                                  global_step=self.global_step)
Ejemplo n.º 13
0
    def __init__(self, config):
        self.config = config
        self.context_embeddings = util.EmbeddingDictionary(
            config["context_embeddings"])
        self.head_embeddings = util.EmbeddingDictionary(
            config["head_embeddings"], maybe_cache=self.context_embeddings)
        self.char_embedding_size = config["char_embedding_size"]
        self.char_dict = util.load_char_dict(config["char_vocab_path"])
        self.max_span_width = config["max_span_width"]
        self.genres = {g: i for i, g in enumerate(config["genres"])}
        self.softmax_threshold = config['softmax_threshold']
        if config["lm_path"]:
            self.lm_file = h5py.File(self.config["lm_path"], "r")
        else:
            self.lm_file = None
        self.lm_layers = self.config["lm_layers"]  # 3
        self.lm_size = self.config["lm_size"]  # 1024
        self.eval_data = None  # Load eval data lazily.
        print('Start to load the eval data')
        st = time.time()
        if not config["predict"]:
            self.load_eval_data()
        print("Finished in {:.2f}".format(time.time() - st))

        input_props = []
        input_props.append((tf.string, [None, None]))  # Tokens.
        input_props.append(
            (tf.float32, [None, None, self.context_embeddings.size
                          ]))  # Context embeddings.
        input_props.append(
            (tf.float32, [None, None,
                          self.head_embeddings.size]))  # Head embeddings.
        input_props.append(
            (tf.float32, [None, None, self.lm_size,
                          self.lm_layers]))  # LM embeddings.
        input_props.append((tf.int32, [None, None,
                                       None]))  # Character indices.
        input_props.append((tf.int32, [None]))  # Text lengths.
        input_props.append((tf.int32, []))  # pronoun lengths.
        input_props.append((tf.int32, []))  # name lengths.
        input_props.append((tf.int32, [None]))  # Speaker IDs.
        input_props.append((tf.bool, []))  # Is training.
        input_props.append((tf.int32, [None]))  # gold_starts.
        input_props.append((tf.int32, [None]))  # gold_ends.
        input_props.append((tf.int32, [None, None]))  # number_features.
        input_props.append((tf.int32, [None, None]))  # candidate_positions.
        input_props.append((tf.int32, [None, None]))  # pronoun_positions.
        input_props.append((tf.int32, [None, None]))  # name_position.
        input_props.append((tf.int32, [None, None]))  # status_positions.
        input_props.append((tf.int32, [None, None]))  # order_features.
        input_props.append((tf.bool, [None, None]))  # labels
        input_props.append((tf.float32, [None, None]))  # candidate_masks

        self.queue_input_tensors = [
            tf.placeholder(dtype, shape) for dtype, shape in input_props
        ]
        dtypes, shapes = zip(*input_props)
        queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes)
        self.enqueue_op = queue.enqueue(self.queue_input_tensors)
        self.input_tensors = queue.dequeue()

        self.predictions, self.loss = self.get_predictions_and_loss(
            *self.input_tensors)
        self.global_step = tf.Variable(0, name="global_step", trainable=False)
        self.reset_global_step = tf.assign(self.global_step, 0)
        learning_rate = tf.train.exponential_decay(
            self.config["learning_rate"],
            self.global_step,
            self.config["decay_frequency"],
            self.config["decay_rate"],
            staircase=True)
        trainable_params = tf.trainable_variables()
        gradients = tf.gradients(self.loss, trainable_params)
        gradients, _ = tf.clip_by_global_norm(gradients,
                                              self.config["max_gradient_norm"])
        optimizers = {
            "adam": tf.train.AdamOptimizer,
            "sgd": tf.train.GradientDescentOptimizer
        }
        optimizer = optimizers[self.config["optimizer"]](learning_rate)
        self.train_op = optimizer.apply_gradients(zip(gradients,
                                                      trainable_params),
                                                  global_step=self.global_step)
Ejemplo n.º 14
0
  def __init__(self, config):
    self.config = config

    self.pos_tag_dict = util.load_pos_tags(config["pos_tag_path"])
    self.ner_tag_dict = util.load_pos_tags(config["ner_tag_path"])
    self.categories_dict = util.load_pos_tags(config["categories_path"])

    self.embedding_info = [(emb["size"], emb["lowercase"]) for emb in config["embeddings"]]
    self.embedding_size = sum(size for size, _ in self.embedding_info) # 350
    self.char_embedding_size = config["char_embedding_size"]

    self.glove_embedding_size = 300

    self.char_dict = util.load_char_dict(config["char_vocab_path"])
    self.l = float(config["l"])


    print "l value:", self.l
    print "l adapted:", self.config["l_adapted"]
    
    # glove and turian
    self.embedding_dicts = [util.load_embedding_dict(emb["path"], emb["size"], emb["format"]) for emb in config["embeddings"]]
    
    # glove only
    glove_emb = config["embeddings"][0]
    self.glove_embedding_dict = util.load_embedding_dict(glove_emb["path"], glove_emb["size"], glove_emb["format"])

    self.max_mention_width = config["max_mention_width"]
    self.genres = { g:i for i,g in enumerate(config["genres"]) }
    self.eval_data = None # Load eval data lazily.

    input_props = []
    input_props.append((tf.float32, [None, None, self.embedding_size])) # Text embeddings. --> sentences x words x embedding size
    input_props.append((tf.int32, [None, None, None])) # Character indices.
    input_props.append((tf.int32, [None])) # Text lengths.
    input_props.append((tf.int32, [None])) # Speaker IDs.
    input_props.append((tf.int32, [])) # Genre.
    input_props.append((tf.bool, [])) # Is training.
    input_props.append((tf.int32, [None])) # Gold starts.
    input_props.append((tf.int32, [None])) # Gold ends.
    input_props.append((tf.int32, [None])) # Cluster ids.

    input_props.append((tf.float32, [None, None, len(self.pos_tag_dict)])) # POS tags --> sentences x tags
    input_props.append((tf.float32, [None, None, len(self.ner_tag_dict)])) # NER indicator variable

    input_props.append((tf.float32, [None, None, len(self.categories_dict)])) # categories

    input_props.append((tf.int32, [None])) # NER IDs. # matching speakers
    input_props.append((tf.float32, [None, None, self.glove_embedding_size])) # categories with glove embeddings

    # DOMAIN ADAPTATION THING
    input_props.append((tf.float32, [len(self.genres)])) # domain labels
    input_props.append((tf.float32, [])) # l

    self.queue_input_tensors = [tf.placeholder(dtype, shape) for dtype, shape in input_props]
    dtypes, shapes = zip(*input_props)
    queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes)
    self.enqueue_op = queue.enqueue(self.queue_input_tensors)
    self.input_tensors = queue.dequeue()

    self.predictions, self.loss, self.domain_loss, self.domain_predictions, self.values = self.get_predictions_and_loss(*self.input_tensors)
    # self.predictions, self.loss = self.get_predictions_and_loss(*self.input_tensors)
    self.global_step = tf.Variable(0, name="global_step", trainable=False)
    self.reset_global_step = tf.assign(self.global_step, 0)
    learning_rate = tf.train.exponential_decay(self.config["learning_rate"], self.global_step,
                                               self.config["decay_frequency"], self.config["decay_rate"], staircase=True)
    
    self.total_loss = self.loss + self.domain_loss
    trainable_params = tf.trainable_variables()
    # gradients = tf.gradients(self.loss, trainable_params)
    
    gradients = tf.gradients(self.total_loss, trainable_params)
    gradients, _ = tf.clip_by_global_norm(gradients, self.config["max_gradient_norm"])
    optimizers = {
      "adam" : tf.train.AdamOptimizer,
      "sgd" : tf.train.GradientDescentOptimizer
    }
    optimizer = optimizers[self.config["optimizer"]](learning_rate)
    self.train_op = optimizer.apply_gradients(zip(gradients, trainable_params), global_step=self.global_step)
Ejemplo n.º 15
0
  def __init__(self, config):
    self.config = config
    self.context_embeddings = util.EmbeddingDictionary(config["context_embeddings"])

    # TODO-Ahmed what is head embeddings
    self.head_embeddings = util.EmbeddingDictionary(config["head_embeddings"],
                                                    maybe_cache=self.context_embeddings)
    self.char_embedding_size = config["char_embedding_size"]
    self.char_dict = util.load_char_dict(config["char_vocab_path"])
      
    self.lm_file = None
    self.lm_hub = None
    self.lm_layers = 0  # TODO: Remove these.
    self.lm_size = 0

    # Not applied in the best experiment case
    if config["lm_path"]:
      if "tfhub" in config["lm_path"]:
        print "Using tensorflow hub:", config["lm_path"]
        #self.lm_hub = hub.Module(config["lm_path"].encode("utf-8"), trainable=False)
      else:

        # TODO-Ahmed investigate lm
        self.lm_file = h5py.File(self.config["lm_path"], "r")
      self.lm_layers = self.config["lm_layers"]
      self.lm_size = self.config["lm_size"]


    # self.adjunct_roles, self.core_roles = split_srl_labels(
    #     config["srl_labels"], config["include_c_v"])
    # self.srl_labels_inv  = [""] + self.adjunct_roles + self.core_roles
    # self.srl_labels = { l:i for i,l in enumerate(self.srl_labels_inv) }
    self.ner_labels = { l:i for i,l in enumerate([""] + config["ner_labels"]) }
    self.ner_labels_inv = [""] + config["ner_labels"]

    if "relation_labels" in config:
      self.rel_labels_inv = [""] + config["relation_labels"]
      if config["filter_reverse_relations"]:
        self.rel_labels_inv = [r for r in self.rel_labels_inv if "REVERSE" not in r]
      self.rel_labels = { l:i for i,l in enumerate(self.rel_labels_inv) }
      print "Filtered relations:", self.rel_labels
    else:
      self.rel_labels = None
      self.rel_labels_inv = None

    # IO Stuff.
    # Need to make sure they are in the same order as input_names + label_names
    self.input_props = [
        (tf.string, [None]), # String tokens.
        (tf.float32, [None, self.context_embeddings.size]), # Context embeddings.
        (tf.float32, [None, self.head_embeddings.size]), # Head embeddings.
        (tf.float32, [None, self.lm_size, self.lm_layers]), # LM embeddings.
        (tf.int32, [None, None]), # Character indices.
        (tf.int32, []),  # Text length.
        (tf.int32, []),  # Document ID.
        (tf.bool, []),  # Is training.
        (tf.int32, [None]),  # NER starts.
        (tf.int32, [None]),  # NER ends.
        (tf.int32, [None]),  # NER labels. 10
        (tf.int32, []),  # Number of NER spans.
        (tf.int32, [None]),  # Coref mention starts.
        (tf.int32, [None]),  # Coref mention ends.
        (tf.int32, [None]),  # Coref cluster ids.
        (tf.int32, []),  # Number of coref mentions.
        (tf.int32, [None]),  # Relation entity1 starts.
        (tf.int32, [None]),  # Relation entity1 ends.
        (tf.int32, [None]),  # Relation entity2 starts.
        (tf.int32, [None]),  # Relation entity2 ends.
        (tf.int32, [None]),  # Relation labels.
        (tf.int32, []),  # Number of relations.
    ]
    self.input_names = _input_names
    self.label_names = _label_names
    self.predict_names = _predict_names
    self.batch_size = self.config["batch_size"]
    dtypes, shapes = zip(*self.input_props)

    if self.batch_size > 0 and self.config["max_tokens_per_batch"] < 0:
      # Use fixed batch size if number of words per batch is not limited (-1).
      self.queue_input_tensors = [tf.placeholder(dtype, shape) for dtype, shape in self.input_props]
      queue = tf.PaddingFIFOQueue(capacity=self.batch_size * 2, dtypes=dtypes, shapes=shapes)
      self.enqueue_op = queue.enqueue(self.queue_input_tensors)
      self.input_tensors = queue.dequeue_many(self.batch_size)
    else:
      # Use dynamic batch size.
      new_shapes = [[None] + shape for shape in shapes]
      self.queue_input_tensors = [tf.placeholder(dtype, shape) for dtype, shape in zip(dtypes, new_shapes)]
      queue = tf.PaddingFIFOQueue(capacity=2, dtypes=dtypes, shapes=new_shapes)
      self.enqueue_op = queue.enqueue(self.queue_input_tensors)
      self.input_tensors = queue.dequeue()
    num_features = len(self.input_names)
    self.input_dict = dict(zip(self.input_names, self.input_tensors[:num_features]))
    self.labels_dict = dict(zip(self.label_names, self.input_tensors[num_features:]))
    def __init__(self, config):
        self.config = config
        self.context_embeddings = util.EmbeddingDictionary(
            config["context_embeddings"])
        self.head_embeddings = util.EmbeddingDictionary(
            config["head_embeddings"], maybe_cache=self.context_embeddings)
        self.char_embedding_size = config["char_embedding_size"]
        self.char_dict = util.load_char_dict(config["char_vocab_path"])
        self.max_span_width = config["max_span_width"]
        self.genres = {g: i for i, g in enumerate(config["genres"])}
        if config["lm_path"]:
            self.lm_file = h5py.File(self.config["lm_path"], "r")
        else:
            self.lm_file = None
        self.lm_layers = self.config["lm_layers"]
        self.lm_size = self.config["lm_size"]
        self.eval_data = None  # Load eval data lazily.
        self.seq_length = 500
        self.new_dim = 512
        self.sample_transformer = Transformer(
            num_layers=2,
            d_model=self.new_dim,
            num_heads=8,
            dff=2048,
            input_vocab_size=self.seq_length,
            target_vocab_size=self.seq_length)

        input_props = []
        input_props.append((tf.string, [None, None]))  # Tokens.
        input_props.append(
            (tf.float32, [None, None, self.context_embeddings.size
                          ]))  # Context embeddings.
        input_props.append(
            (tf.float32, [None, None,
                          self.head_embeddings.size]))  # Head embeddings.
        input_props.append(
            (tf.float32, [None, None, self.lm_size,
                          self.lm_layers]))  # LM embeddings.
        input_props.append((tf.int32, [None, None,
                                       None]))  # Character indices.
        input_props.append((tf.int32, [None]))  # Text lengths.
        input_props.append((tf.int32, [None]))  # Speaker IDs.
        input_props.append((tf.int32, []))  # Genre.
        input_props.append((tf.bool, []))  # Is training.
        input_props.append((tf.int32, [None]))  # Gold starts.
        input_props.append((tf.int32, [None]))  # Gold ends.
        input_props.append((tf.int32, [None]))  # Cluster ids.
        # input_props.append((tf.float32, [None])) # learning rate.

        self.queue_input_tensors = [
            tf.placeholder(dtype, shape) for dtype, shape in input_props
        ]
        dtypes, shapes = zip(*input_props)
        queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes)
        self.enqueue_op = queue.enqueue(self.queue_input_tensors)
        self.input_tensors = queue.dequeue()
        self.predictions, self.loss = self.get_predictions_and_loss(
            *self.input_tensors)
        self.global_step = tf.Variable(0, name="global_step", trainable=False)

        learning_rate = step_decay(self.global_step)

        self.reset_global_step = tf.assign(self.global_step, 0)

        vars = tf.trainable_variables(scope=None)
        var_list1 = []
        var_list2 = []
        for var in vars:
            if 'transformer' in var.name:
                var_list2 += [var]
            else:
                var_list1 += [var]

        learning_rate1 = tf.train.exponential_decay(
            self.config["learning_rate"],
            self.global_step,
            self.config["decay_frequency"],
            self.config["decay_rate"],
            staircase=True)
        # learning_rate2 = CustomSchedule(self.new_dim)

        gradients1 = tf.gradients(self.loss, var_list1)
        gradients2 = tf.gradients(self.loss, var_list2)

        gradients1, _ = tf.clip_by_global_norm(
            gradients1, self.config["max_gradient_norm"])
        gradients2, _ = tf.clip_by_global_norm(
            gradients2, self.config["max_gradient_norm"])

        optimizers = {
            "adam": tf.train.AdamOptimizer,
            "sgd": tf.train.GradientDescentOptimizer
        }

        optimizer1 = optimizers[self.config["optimizer"]](learning_rate1)
        optimizer2 = optimizers[self.config["optimizer"]](learning_rate)

        train_op1 = optimizer1.apply_gradients(zip(gradients1, var_list1),
                                               global_step=self.global_step)
        train_op2 = optimizer2.apply_gradients(zip(gradients2, var_list2),
                                               global_step=self.global_step)
        self.train_op = tf.group(train_op1, train_op2)
    def __init__(self, config, tokenizer, check_point_load=True):
        bert_config = modeling.BertConfig.from_json_file(
            config["bert_folder"] + "/bert_config.json")
        seq_length = 502
        self.max_sentence_length = 263
        self.max_sentence_no = 90

        self.config = config
        self.char_embedding_size = config["char_embedding_size"]
        self.char_dict = util.load_char_dict(config["char_vocab_path"])
        self.max_span_width = config["max_span_width"]
        self.genres = {g: i for i, g in enumerate(config["genres"])}
        self.lm_file = None
        self.lm_layers = self.config["lm_layers"]
        self.lm_size = self.config["lm_size"]
        self.eval_data = None  # Load eval data lazily.

        input_ids = tf.placeholder(dtype=tf.int32, shape=(1, seq_length))
        input_mask = tf.placeholder(dtype=tf.int32, shape=(1, seq_length))
        input_type_ids = tf.placeholder(dtype=tf.int32, shape=(1, seq_length))

        model = modeling.BertModel(config=bert_config,
                                   is_training=True,
                                   input_ids=input_ids,
                                   input_mask=input_mask,
                                   token_type_ids=input_type_ids,
                                   use_one_hot_embeddings=False)
        self.tokenizer = tokenizer
        tvars = tf.trainable_variables()
        init_checkpoint = config["bert_folder"] + "/bert_model.ckpt"
        (assignment_map, initialized_variable_names
         ) = modeling.get_assignment_map_from_checkpoint(
             tvars, init_checkpoint)

        if check_point_load:
            tf.train.init_from_checkpoint(init_checkpoint, assignment_map)
        self.usesful = []
        for var in tvars:
            var.name
            if len(var.name.split("/")) < 3:
                continue
            if var.name.split("/")[2][0] == "l":
                temp = var.name.split("/")[2][6:]
                if int(temp) > 18:
                    self.usesful += [var]

        self.usesful += [tvars[-1], tvars[-2]]
        all_layers = model.get_all_encoder_layers()
        embeddings = tf.concat([
            tf.expand_dims(all_layers[-1], 3),
            tf.expand_dims(all_layers[-2], 3),
            tf.expand_dims(all_layers[-3], 3),
            tf.expand_dims(all_layers[-4], 3)
        ], 3)
        embeddings = embeddings[:, 1:501, :, :]
        embeddings = tf.reshape(embeddings, [500, 1024, 4])
        self.yo = embeddings
        # <tf.Tensor 'strided_slice_4:0' shape=(1, 500, 1024, 4) dtype=float32>

        input_props = []
        input_props.append((tf.string, [None, None]))  # Tokens.
        input_props.append((tf.int32, [None, None,
                                       None]))  # Character indices.
        input_props.append((tf.int32, [None]))  # Text lengths.
        input_props.append((tf.int32, [None]))  # Speaker IDs.
        input_props.append((tf.int32, []))  # Genre.
        input_props.append((tf.bool, []))  # Is training.
        input_props.append((tf.int32, [None]))  # Gold starts.
        input_props.append((tf.int32, [None]))  # Gold ends.
        input_props.append((tf.int32, [None]))  # Cluster ids.
        input_props.append((tf.int32, [self.max_sentence_no]))  # splits
        input_props.append((tf.int32, [2]))  # param

        # self.queue_input_tensors = [tf.placeholder(dtype, shape) for dtype, shape in input_props]
        self.queue_input_tensors = [input_ids, input_mask, input_type_ids] + [
            tf.placeholder(dtype, shape) for dtype, shape in input_props
        ]

        # dtypes, shapes = zip(*input_props)
        # dtypes = (tf.int32, tf.int32 , tf.int32) + dtypes
        # shapes = ([1, seq_length] , [1, seq_length],  [1, seq_length]) + shapes
        # queue = tf.PaddingFIFOQueue(capacity=3, dtypes=dtypes, shapes=shapes)
        # self.enqueue_op = queue.enqueue(self.queue_input_tensors)
        # self.input_tensors = queue.dequeue()
        self.input_tensors = self.queue_input_tensors
        self.predictions, self.loss = self.get_predictions_and_loss(
            *self.input_tensors[3:], embeddings)
        self.global_step1 = tf.Variable(0, name="global_step", trainable=False)
        self.global_step2 = tf.Variable(0, name="global_step", trainable=False)
        self.reset_global_step1 = tf.assign(self.global_step1, 0)
        self.reset_global_step2 = tf.assign(self.global_step2, 0)

        learning_rate1 = tf.train.exponential_decay(
            0.0001,
            self.global_step1,
            self.config["decay_frequency"],
            self.config["decay_rate"],
            staircase=True)
        learning_rate2 = tf.train.exponential_decay(
            self.config["learning_rate"],
            self.global_step2,
            self.config["decay_frequency"],
            self.config["decay_rate"],
            staircase=True)

        trainable_params = tf.trainable_variables()
        Lee_param = []
        for param in trainable_params:
            temp = param.name.split("/")[0]
            if "bert" == temp or temp == "context_word_emb:0" or temp == "head_word_emb:0" or temp == "total_embedding:0":
                continue
            else:
                Lee_param += [param]

        gradients1 = tf.gradients(self.loss, self.usesful)
        gradients2 = tf.gradients(self.loss, Lee_param)

        gradients1, _ = tf.clip_by_global_norm(
            gradients1, self.config["max_gradient_norm"])
        gradients2, _ = tf.clip_by_global_norm(
            gradients2, self.config["max_gradient_norm"])

        optimizers = {
            "adam": tf.train.AdamOptimizer,
            "sgd": tf.train.GradientDescentOptimizer
        }

        optimizer1 = tf.train.AdamOptimizer(learning_rate1)
        optimizer2 = optimizers[self.config["optimizer"]](learning_rate2)

        self.train_op1 = optimizer1.apply_gradients(
            zip(gradients1, self.usesful), global_step=self.global_step1)
        self.train_op2 = optimizer2.apply_gradients(
            zip(gradients2, Lee_param), global_step=self.global_step2)

        self.train_op = tf.group(self.train_op1, self.train_op2)