예제 #1
0
 def __init__(self, returns_dict=False):
   embeddings = [
       ("", [0, 0, 0, 0]),  # OOV items are mapped to this embedding.
       ("hello world", [1, 2, 3, 4]),
       ("pair-programming", [5, 5, 5, 5]),
   ]
   keys = tf.constant([item[0] for item in embeddings], dtype=tf.string)
   indices = tf.constant(list(range(len(embeddings))), dtype=tf.int64)
   tbl_init = KeyValueTensorInitializer(keys, indices)
   self.table = HashTable(tbl_init, 0)
   self.weights = tf.Variable(
       list([item[1] for item in embeddings]), dtype=tf.float32)
   self.variables = [self.weights]
   self.trainable_variables = self.variables
   self._returns_dict = returns_dict
예제 #2
0
class TextEmbedding(tf.train.Checkpoint):

  def __init__(self, returns_dict=False):
    embeddings = [
        ("", [0, 0, 0, 0]),  # OOV items are mapped to this embedding.
        ("hello world", [1, 2, 3, 4]),
        ("pair-programming", [5, 5, 5, 5]),
    ]
    keys = tf.constant([item[0] for item in embeddings], dtype=tf.string)
    indices = tf.constant(list(range(len(embeddings))), dtype=tf.int64)
    tbl_init = KeyValueTensorInitializer(keys, indices)
    self.table = HashTable(tbl_init, 0)
    self.weights = tf.Variable(
        list([item[1] for item in embeddings]), dtype=tf.float32)
    self.variables = [self.weights]
    self.trainable_variables = self.variables
    self._returns_dict = returns_dict

  @tf.function(input_signature=[
      tf.TensorSpec(dtype=tf.string, name="text", shape=[None])
  ])
  def __call__(self, text_tensor):
    indices_tensor = self.table.lookup(text_tensor)
    embedding_tensor = tf.gather(self.weights, indices_tensor)
    return dict(
        outputs=embedding_tensor) if self._returns_dict else embedding_tensor
예제 #3
0
    def __init__(self, mode, config):
        self._mode = mode
        self.config = config

        if self._mode != Mode.Predict:
            self.input = tf.placeholder(name='inputs', dtype=tf.int32, shape=[None, None])
            self.pos_input = tf.placeholder(name='pos_input', dtype=tf.int32, shape=[None])
            self.morph_input = tf.placeholder(name='morph_input', dtype=tf.int32, shape=[None, config.max_morph_tags])
        else:
            k, v = list(zip(*config.dec_char_numberer.value2num.items()))
            i = KeyValueTensorInitializer(v, k,key_dtype=tf.int64,value_dtype=tf.string)
            self.dec2char = HashTable(i, default_value="<UNK>")
            self.enc_chars = self.lookup(config.enc_char_numberer)
            self.morph_tags = self.lookup(config.morph_numberer)
            self.pos_tags = self.lookup(config.pos_numberer)

            self.input_plc =tf.placeholder(name='inputs', dtype=tf.string, shape=[None, None])
            self.pos_plc = tf.placeholder(name='pos_input', dtype=tf.string, shape=[None])
            self.morph_plc = tf.placeholder(name='morph_input', dtype=tf.string, shape=[None, config.max_morph_tags])
            self.input = self.enc_chars.lookup(self.input_plc)
            self.pos_input = self.pos_tags.lookup(self.pos_plc)
            self.morph_input = self.morph_tags.lookup(self.morph_plc)


        self.input_lengths = tf.placeholder(name='input_lengths', dtype=tf.int32, shape=[None])



        self.enc_character_embeddings = tf.get_variable('enc_character_embeddings',
                                                        shape=[self.config.enc_vocab_size,
                                                               self.config.char_embedding_size])
        self.dec_character_embeddings = tf.get_variable('dec_character_embeddings',
                                                        shape=[self.config.dec_vocab_size,
                                                               self.config.char_embedding_size])

        self.pos_embeddings = tf.get_variable('pos_embeddings',
                                              shape=[self.config.pos_vocab_size,
                                                     self.config.pos_embedding_size])



        self.morph_embeddings = tf.get_variable('morph_embeddings',
                                              shape=[self.config.morph_vocab_size,
                                                     self.config.morph_embedding_size])

        if self._mode != Mode.Train:
            self.start_tokens = tf.tile([config.start_idx], [tf.shape(self.input)[0]])

        if self._mode != Mode.Predict:
            # length +2 since lengths are <bow> + word + <eow>
            self.dec_input = tf.placeholder(name='dec_in', dtype=tf.int32, shape=[None, None])
            # exclude start token from targets for loss-computations since we feed the start token and don't want to
            # predict it
            self.decoder_targets = self.dec_input[:, 1:]
            self.dec_lengths = tf.placeholder(name='dec_lengths', dtype=tf.int32, shape=[None])
예제 #4
0
def text_module_fn():
  embeddings = [
      ("", [0, 0, 0, 0]),  # OOV items are mapped to this embedding.
      ("hello world", [1, 2, 3, 4]),
      ("pair-programming", [5, 5, 5, 5]),
  ]
  keys = tf.constant([item[0] for item in embeddings], dtype=tf.string)
  indices = tf.constant(list(range(len(embeddings))), dtype=tf.int64)
  tbl_init = KeyValueTensorInitializer(keys, indices)
  table = HashTable(tbl_init, 0)

  weights_initializer = tf.cast(
      tf.constant(list([item[1] for item in embeddings])), tf.float32)

  weights = tf_v1.get_variable(
      "weights", dtype=tf.float32, initializer=weights_initializer)

  text_tensor = tf_v1.placeholder(dtype=tf.string, name="text", shape=[None])
  indices_tensor = table.lookup(text_tensor)
  embedding_tensor = tf.gather(weights, indices_tensor)
  hub.add_signature(inputs=text_tensor, outputs=embedding_tensor)
예제 #5
0
파일: data.py 프로젝트: tongda/ImSAT
        def input_fn():
            with tf.variable_scope("input_fn"), tf.device("/cpu:0"):
                caption_dataset = Dataset.from_tensor_slices(list(captions))
                filename_dataset = Dataset.from_tensor_slices(list(filenames))

                table_init = KeyValueTensorInitializer(
                    list(self.word_to_idx.keys()),
                    list(self.word_to_idx.values()),
                    key_dtype=tf.string,
                    value_dtype=tf.int32)
                table = HashTable(table_init, default_value=0)

                def split_sentence(sentence):
                    words = tf.string_split(tf.reshape(sentence, (1, ))).values
                    words = tf.concat([
                        tf.constant(["<START>"]), words,
                        tf.constant(["<END>"])
                    ],
                                      axis=0)
                    return table.lookup(words)

                index_dataset = caption_dataset.map(split_sentence,
                                                    num_threads=8)

                def decode_image(filename):
                    image = tf.image.decode_jpeg(tf.read_file(filename),
                                                 channels=3)
                    # image = tf.image.resize_images(image, [224, 224])
                    image = tf.to_float(image)
                    return image

                image_dataset = filename_dataset.map(decode_image,
                                                     num_threads=8)
                caption_structure = {
                    "raw": caption_dataset,
                    "index": index_dataset
                }
            return image_dataset, caption_structure
    def __init__(self, args, session, updates=None):
        self.args = args
        self.sess = session

        # updates
        if not updates:
            updates = 0
        self.updates = updates
        self.global_step = tf.get_variable(
            'global_step',
            shape=(),
            dtype=tf.float32,
            initializer=tf.constant_initializer(updates),
            trainable=False)
        self.step = tf.assign_add(self.global_step, 1)

        # placeholders
        table = HashTable(TextFileIdTableInitializer(
            filename=os.path.join(args.output_dir, 'vocab.txt')),
                          default_value=Vocab.unk())
        self.q1_string = tf.placeholder(tf.string, [None, None], name='q1_str')
        self.q2_string = tf.placeholder(tf.string, [None, None], name='q2_str')
        self.q1 = tf.placeholder_with_default(table.lookup(self.q1_string),
                                              [None, None],
                                              name='q1')
        self.q2 = tf.placeholder_with_default(table.lookup(self.q2_string),
                                              [None, None],
                                              name='q2')
        self.q1_len = tf.placeholder(tf.int32, [None], name='q1_len')
        self.q2_len = tf.placeholder(tf.int32, [None], name='q2_len')
        self.y = tf.placeholder(tf.int32, [None], name='y')
        self.dropout_keep_prob = tf.placeholder(tf.float32, (),
                                                name='dropout_keep_prob')

        q1_mask = tf.expand_dims(tf.sequence_mask(self.q1_len,
                                                  dtype=tf.float32),
                                 dim=-1)
        q2_mask = tf.expand_dims(tf.sequence_mask(self.q2_len,
                                                  dtype=tf.float32),
                                 dim=-1)

        devices = self.get_available_gpus() or ['/device:CPU:0']
        if not args.multi_gpu:
            devices = devices[:1]
        if len(devices) == 1:
            splits = 1
        else:
            splits = [tf.shape(self.q1)[0] // len(devices)
                      ] * (len(devices) - 1) + [-1]  # handle uneven split
        q1 = tf.split(self.q1, splits)
        q2 = tf.split(self.q2, splits)
        q1_mask = tf.split(q1_mask, splits)
        q2_mask = tf.split(q2_mask, splits)
        y = tf.split(self.y, splits)

        # network
        self.network = Network(args)

        # optimizer
        lr = tf.get_variable('lr',
                             shape=(),
                             dtype=tf.float32,
                             initializer=tf.constant_initializer(args.lr),
                             trainable=False)
        lr_next = tf.cond(self.global_step < args.lr_warmup_steps,
                          true_fn=lambda: args.min_lr +
                          (args.lr - args.min_lr) / max(
                              1, args.lr_warmup_steps) * self.global_step,
                          false_fn=lambda: tf.maximum(
                              args.min_lr,
                              args.lr * args.lr_decay_rate**tf.floor(
                                  (self.global_step - args.lr_warmup_steps
                                   ) / args.lr_decay_steps)))
        tf.add_to_collection(tf.GraphKeys.UPDATE_OPS,
                             tf.assign(lr, lr_next, name='update_lr'))
        self.lr = lr
        self.opt = tf.train.AdamOptimizer(learning_rate=lr,
                                          beta1=args.beta1,
                                          beta2=args.beta2)

        # training graph
        tower_names = ['tower-{}'.format(i) for i in range(len(devices))
                       ] if len(devices) > 1 else ['']
        tower_logits = []
        tower_grads = []
        summaries = []
        loss = 0

        with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
            for i, device in enumerate(devices):
                with tf.device(device):
                    with tf.name_scope(tower_names[i]) as scope:
                        logits = self.network(q1[i], q2[i], q1_mask[i],
                                              q2_mask[i],
                                              self.dropout_keep_prob)
                        tower_logits.append(logits)
                        loss = self.get_loss(logits, y[i])
                        tf.get_variable_scope().reuse_variables()
                        summaries = tf.get_collection(tf.GraphKeys.SUMMARIES,
                                                      scope)
                        grads = self.opt.compute_gradients(loss)
                        tower_grads.append(grads)
        gradients = []
        variables = []
        for grad_and_vars in zip(*tower_grads):
            if grad_and_vars[0][0] is None:
                msg = 'WARNING: trainable variable {} receives no grad.\n'.format(
                    grad_and_vars[0][1].op.name)
                sys.stderr.write(msg)
                continue
            grad = tf.stack([g for g, _ in grad_and_vars])
            grad = tf.reduce_mean(grad, 0)
            v = grad_and_vars[0][
                1]  # use the first tower's pointer to the (shared) variable
            gradients.append(grad)
            variables.append(v)

        gradients, self.gnorm = tf.clip_by_global_norm(gradients,
                                                       self.args.grad_clipping)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            self.train_op = self.opt.apply_gradients(zip(gradients, variables))
        logits = tf.concat(tower_logits, 0)
        self.prob = tf.nn.softmax(logits, dim=1, name='prob')
        self.pred = tf.argmax(input=logits, axis=1, name='pred')
        self.loss = tf.identity(loss, name='loss')
        summaries.append(tf.summary.scalar('training/lr', lr))
        summaries.append(tf.summary.scalar('training/gnorm', self.gnorm))
        summaries.append(tf.summary.scalar('training/loss', self.loss))

        # add summary
        self.summary = tf.summary.merge(summaries)

        # saver
        self.saver = tf.train.Saver(
            [var for var in tf.global_variables() if 'Adam' not in var.name],
            max_to_keep=args.max_checkpoints)
예제 #7
0
    def __init__(self, hparam, export=False, with_char=False):
        self.hparam = hparam

        self.Embedding = None
        self.embed_matrix = None
        self.init_embedding = None

        if export:
            table = HashTable(TextFileIdTableInitializer(
                filename='assets/vocab',
                key_column_index=0,
                value_column_index=1,
                vocab_size=None,
                delimiter='\t',
                name='table'),
                              default_value=0)
            self.q1_string = tf.placeholder(tf.string,
                                            [None, self.hparam.seq_length],
                                            name='string_input1')
            self.q2_string = tf.placeholder(tf.string,
                                            [None, self.hparam.seq_length],
                                            name='string_input2')
            self.premise = table.lookup(self.q1_string)
            self.hypothesis = table.lookup(self.q2_string)

            if with_char:
                char_table = HashTable(TextFileIdTableInitializer(
                    filename='assets/char_vocab',
                    key_column_index=0,
                    value_column_index=1,
                    vocab_size=None,
                    delimiter='\t',
                    name='char_table'),
                                       default_value=0)
                self.ch1_string = tf.placeholder(
                    tf.string, [None, self.hparam.char_seq_length],
                    name='char_string_input1')
                self.ch2_string = tf.placeholder(
                    tf.string, [None, self.hparam.char_seq_length],
                    name='char_string_input1')
                self.ch1 = char_table.lookup(self.ch1_string)
                self.ch2 = char_table.lookup(self.ch2_string)
            else:
                self.ch1, self.ch2 = None, None
        else:
            self.premise = tf.placeholder(tf.int32,
                                          [None, self.hparam.seq_length],
                                          'premise')
            self.hypothesis = tf.placeholder(tf.int32,
                                             [None, self.hparam.seq_length],
                                             'hypothesis')
            self.ch1, self.ch2 = None, None

        self.premise_mask, self.hypothesis_mask = None, None
        self.y = None
        self.pred = None
        self.logits = None
        self.dropout_keep_prob = None

        self.loss = None
        self.train_op = None
        self.is_training = None

        self.output_w = None
        self.output_b = None

        self.lsf_q1, self.lsf_q2 = None, None

        self.random_size = None
예제 #8
0
 def lookup(self, numberer):
     k, v = list(zip(*numberer.value2num.items()))
     i = KeyValueTensorInitializer(k, v)
     return HashTable(i, default_value=numberer.unknown_idx)
예제 #9
0
    def __init__(self, args, session, updates=None):
        self.args = args
        self.sess = session

        # updates
        if not updates:
            updates = 0
        # tf.get_variable(): Make the variable be shared. If tf.Variable() is used, the system treat the variables with the same name as
        # two vairables
        self.global_step = tf.get_variable(
            'global_step',
            shape=(),
            dtype=tf.float32,
            initializer=tf.constant_initializer(updates),
            trainable=False)
        self.step = tf.assign_add(self.global_step,
                                  1)  # update self.global by add 1 to it

        # placeholders
        table = HashTable(
            TextFileIdTableInitializer(
                filename=os.path.join(args.output_dir, 'vocab.txt')),
            default_value=Vocab.unk(
            ))  # string to id table, generates one key-value pair per line

        self.q1_string = tf.placeholder(tf.string, [None, None], name='q1_str')
        self.q2_string = tf.placeholder(tf.string, [None, None], name='q2_str')
        self.q1 = tf.placeholder_with_default(table.lookup(self.q1_string),
                                              [None, None],
                                              name='q1')
        self.q2 = tf.placeholder_with_default(table.lookup(self.q2_string),
                                              [None, None],
                                              name='q2')
        self.q1_len = tf.placeholder(tf.int32, [None], name='q1_len')
        self.q2_len = tf.placeholder(tf.int32, [None], name='q2_len')
        self.y = tf.placeholder(tf.int32, [None], name='y')
        self.dropout_keep_prob = tf.placeholder(tf.float32, (),
                                                name='dropout_keep_prob')

        q1_mask = tf.expand_dims(tf.sequence_mask(self.q1_len,
                                                  dtype=tf.float32),
                                 dim=-1)  # 返回一个表示每个单元的前N个位置的mask张量
        q2_mask = tf.expand_dims(tf.sequence_mask(self.q2_len,
                                                  dtype=tf.float32),
                                 dim=-1)

        devices = self.get_available_gpus() or ['/device:CPU:0']
        if not args.multi_gpu:
            devices = devices[:1]
        if len(devices) == 1:
            splits = 1
        else:
            splits = [tf.shape(self.q1)[0] // len(devices)
                      ] * (len(devices) - 1) + [-1]  # handle uneven split

        q1 = tf.split(self.q1, splits)
        q2 = tf.split(self.q2, splits)
        q1_mask = tf.split(q1_mask, splits)
        q2_mask = tf.split(q2_mask, splits)
        y = tf.split(self.y, splits)

        # network
        self.network = Network(args)

        # optimizer
        lr = tf.get_variable('lr',
                             shape=(),
                             dtype=tf.float32,
                             initializer=tf.constant_initializer(args.lr),
                             trainable=False)
        lr_next = tf.cond(self.global_step < args.lr_warmup_steps,
                          true_fn=lambda: args.min_lr +
                          (args.lr - args.min_lr) / max(
                              1, args.lr_warmup_steps) * self.global_step,
                          false_fn=lambda: tf.maximum(
                              args.min_lr,
                              args.lr * args.lr_decay_rate**tf.floor(
                                  (self.global_step - args.lr_warmup_steps
                                   ) / args.lr_decay_steps)))

        # 在一个计算图中,可以通过集合(collection)来管理不同类别的资源
        # tf.GraphKeys.UPDATE_OPS: ops的集合(图表运行时执行的操作,如乘法,ReLU等),而不是变量
        tf.add_to_collection(tf.GraphKeys.UPDATE_OPS,
                             tf.assign(lr, lr_next, name='update_lr'))
        self.lr = lr
        self.opt = tf.train.AdamOptimizer(learning_rate=lr,
                                          beta1=args.beta1,
                                          beta2=args.beta2)

        # training graph
        tower_names = ['tower-{}'.format(i) for i in range(len(devices))
                       ] if len(devices) > 1 else ['']
        tower_logits = []
        tower_grads = []
        summaries = []
        loss = 0

        # 当系统检测到我们用了一个之前已经定义的变量时,就开启共享,否则就重新创建变量
        with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):
            for i, device in enumerate(devices):
                with tf.device(device):
                    with tf.name_scope(tower_names[i]) as scope:
                        logits = self.network(q1[i], q2[i], q1_mask[i],
                                              q2_mask[i],
                                              self.dropout_keep_prob)
                        tower_logits.append(logits)
                        loss = self.get_loss(logits, y[i])
                        tf.get_variable_scope().reuse_variables()
                        summaries = tf.get_collection(tf.GraphKeys.SUMMARIES,
                                                      scope)
                        grads = self.opt.compute_gradients(loss)
                        tower_grads.append(grads)

        gradients = []
        variables = []
        for grad_and_vars in zip(*tower_grads):  # *list: 将一组行转换为一组列
            if grad_and_vars[0][0] is None:
                msg = 'WARNING: trainable variable {} receives no grad.\n'.format(
                    grad_and_vars[0][1].op.name)
                sys.stderr.write(msg)  # output error msg to window
                continue
            grad = tf.stack([g for g, _ in grad_and_vars])
            grad = tf.reduce_mean(grad, 0)
            v = grad_and_vars[0][
                1]  # use the 1st tower's pointer to the (shared) variable
            gradients.append(grad)
            variables.append(v)

        gradients, self.gnorm = tf.clip_by_global_norm(gradients,
                                                       self.args.grad_clipping)
        """
        tf.clip_by_global_norm(t_list, clip_norm, use_norm=None, name=None)
        t_list 是梯度张量, clip_norm 是截取的比率,和上面的 clip_gradient是同一个东西。 这个函数返回截取过的梯度张量和一个所有张量的全局范数。
        这样就保证了在一次迭代更新中,所有权重的梯度的平方和在一个设定范围以内
        """
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        """
        该函数接受的参数control_inputs,是Operation或者Tensor构成的list。返回的是一个上下文管理器,该上下文管理器用来控制在该上下文中的操作的依赖。
        也就是说,上下文管理器下定义的操作是依赖control_inputs中的操作的,control_dependencies用来控制control_inputs中操作执行后,
        才执行上下文管理器中定义的操作
        """
        with tf.control_dependencies(update_ops):  # 先执行update_ops
            self.train_op = self.opt.apply_gradients(zip(gradients, variables))

        logits = tf.concat(tower_logits, 0)
        self.prob = tf.nn.softmax(logits, dim=1, name='prob')
        self.pred = tf.argmax(input=logits, axis=1, name='pred')
        self.loss = tf.identity(loss, name='loss')
        summaries.append(tf.summary.scalar('training/lr', lr))
        summaries.append(tf.summary.scalar('training/gnomr', self.gnorm))
        summaries.append(tf.summary.scalar('training/loss', self.loss))

        # add summary
        self.summary = tf.summary.merge(summaries)

        # saver
        self.saver = tf.train.Saver(
            [var for var in tf.global_variables() if 'Adam' not in var.name],
            max_to_keep=args.max_checkpoints)