def _make_batch_queue(input, capacity, num_threads=1): queue = tf.PaddingFIFOQueue(capacity=capacity, dtypes=[s.dtype for s in input], shapes=[s.get_shape() for s in input]) tf.summary.scalar("fraction_of_%d_full" % capacity, tf.cast(queue.size(), tf.float32) * (1. / capacity)) enqueue_ops = [queue.enqueue(input)] * num_threads queue_runner.add_queue_runner(queue_runner.QueueRunner(queue, enqueue_ops)) return queue
def __init__(self, config): self.config = config self.context_embeddings = util.EmbeddingDictionary(config["context_embeddings"]) self.context_embeddings_size = self.context_embeddings.size self.char_embedding_size = config["char_embedding_size"] self.char_dict = util.load_char_dict(config["char_vocab_path"]) if self.config["lm_path"].lower() == "none": self.lm_file = None else: self.lm_file = h5py.File(self.config["lm_path"], "r") self.lm_layers = self.config["lm_layers"] self.lm_size = self.config["lm_size"] self.eval_data = None # Load eval data lazily. self.ner_types = self.config['ner_types'] self.ner_maps = {ner: (i + 1) for i, ner in enumerate(self.ner_types)} self.num_types = len(self.ner_types) input_props = [] input_props.append((tf.string, [None, None])) # Tokens. input_props.append((tf.float32, [None, None, self.context_embeddings_size])) # Context embeddings. input_props.append((tf.float32, [None, None, self.lm_size, self.lm_layers])) # LM embeddings. input_props.append((tf.int32, [None, None, None])) # Character indices. input_props.append((tf.int32, [None])) # Text lengths. input_props.append((tf.bool, [])) # Is training. input_props.append((tf.int32, [None])) # Gold NER Label self.queue_input_tensors = [tf.placeholder(dtype, shape) for dtype, shape in input_props] dtypes, shapes = zip(*input_props) queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes) self.enqueue_op = queue.enqueue(self.queue_input_tensors) self.input_tensors = queue.dequeue() self.predictions, self.loss = self.get_predictions_and_loss(self.input_tensors) self.global_step = tf.Variable(0, name="global_step", trainable=False) self.reset_global_step = tf.assign(self.global_step, 0) learning_rate = tf.train.exponential_decay(self.config["learning_rate"], self.global_step, self.config["decay_frequency"], self.config["decay_rate"], staircase=True) trainable_params = tf.trainable_variables() gradients = tf.gradients(self.loss, trainable_params) gradients, _ = tf.clip_by_global_norm(gradients, self.config["max_gradient_norm"]) optimizers = { "adam": tf.train.AdamOptimizer, "sgd": tf.train.GradientDescentOptimizer } optimizer = optimizers[self.config["optimizer"]](learning_rate) self.train_op = optimizer.apply_gradients(zip(gradients, trainable_params), global_step=self.global_step)
def init_queue(self): queue_types = [] queue_shapes = [] self.placeholders = [] for (name, shape) in self.data_names_and_shapes: if self.data_types is not None and name in self.data_types: types = self.data_types[name] else: types = tf.float32 queue_shapes.append(shape) queue_types.append(types) self.placeholders.append( tf.placeholder(types, shape, name='placeholder_' + name)) self.queue = tf.PaddingFIFOQueue(self.queue_size, queue_types, queue_shapes) self.enqueue = self.queue.enqueue(self.placeholders)
def prefetch(tensor_dict, capacity): """Creates a prefetch queue for tensors. Creates a FIFO queue to asynchronously enqueue tensor_dicts and returns a dequeue op that evaluates to a tensor_dict. This function is useful in prefetching preprocessed tensors so that the data is readily available for consumers. Example input pipeline when you don't need batching: ---------------------------------------------------- key, string_tensor = slim.parallel_reader.parallel_read(...) tensor_dict = decoder.decode(string_tensor) tensor_dict = preprocessor.preprocess(tensor_dict, ...) prefetch_queue = prefetcher.prefetch(tensor_dict, capacity=20) tensor_dict = prefetch_queue.dequeue() outputs = Model(tensor_dict) ... ---------------------------------------------------- For input pipelines with batching, refer to core/batcher.py Args: tensor_dict: a dictionary of tensors to prefetch. capacity: the size of the prefetch queue. Returns: a FIFO prefetcher queue """ names = list(tensor_dict.keys()) dtypes = [t.dtype for t in tensor_dict.values()] shapes = [t.get_shape() for t in tensor_dict.values()] prefetch_queue = tf.PaddingFIFOQueue(capacity, dtypes=dtypes, shapes=shapes, names=names, name='prefetch_queue') enqueue_op = prefetch_queue.enqueue(tensor_dict) tf.train.queue_runner.add_queue_runner( tf.train.queue_runner.QueueRunner(prefetch_queue, [enqueue_op])) tf.summary.scalar( 'queue/%s/fraction_of_%d_full' % (prefetch_queue.name, capacity), tf.cast(prefetch_queue.size(), dtype=tf.float32) * (1. / capacity)) return prefetch_queue
def __init__(self, config): self.config = config self.context_embeddings = util.EmbeddingDictionary( config["context_embeddings"], config['datapath']) self.head_embeddings = util.EmbeddingDictionary( config["context_embeddings"], config['datapath'], maybe_cache=self.context_embeddings) self.char_embedding_size = config["char_embedding_size"] self.char_dict = util.load_char_dict( os.path.join(config['datapath'], config["char_vocab_path"])) self.max_span_width = config["max_span_width"] self.genres = {g: i for i, g in enumerate(config["genres"])} if config["lm_path"]: self.lm_file = h5py.File( os.path.join(config['datapath'], self.config["lm_path"]), "r") else: self.lm_file = None if config["lm_model_name"]: self.bert_tokenizer, self.bert_model = bert.load_bert( self.config["lm_model_name"]) else: self.bert_tokenizer = None self.bert_model = None self.lm_layers = self.config["lm_layers"] self.lm_size = self.config["lm_size"] self.eval_data = None # Load eval data lazily. input_props = [] input_props.append((tf.string, [None, None])) # Tokens. # Context embeddings. input_props.append( (tf.float32, [None, None, self.context_embeddings.size])) # Head embeddings. input_props.append( (tf.float32, [None, None, self.head_embeddings.size])) # LM embeddings. input_props.append( (tf.float32, [None, None, self.lm_size, self.lm_layers])) # Character indices. input_props.append((tf.int32, [None, None, None])) input_props.append((tf.int32, [None])) # Text lengths.. input_props.append((tf.int32, [])) # Genre. input_props.append((tf.bool, [])) # Is training. input_props.append((tf.int32, [None])) # Gold starts. input_props.append((tf.int32, [None])) # Gold ends. input_props.append((tf.int32, [None])) # Cluster ids. self.queue_input_tensors = [ tf.placeholder(dtype, shape) for dtype, shape in input_props ] dtypes, shapes = zip(*input_props) queue = tf.PaddingFIFOQueue(capacity=10, dtypes=dtypes, shapes=shapes) self.enqueue_op = queue.enqueue(self.queue_input_tensors) self.input_tensors = queue.dequeue() self.predictions, self.loss = self.get_predictions_and_loss( *self.input_tensors) self.global_step = tf.Variable(0, name="global_step", trainable=False) self.reset_global_step = tf.assign(self.global_step, 0) learning_rate = tf.train.exponential_decay( self.config["learning_rate"], self.global_step, self.config["decay_frequency"], self.config["decay_rate"], staircase=True) trainable_params = tf.trainable_variables() gradients = tf.gradients(self.loss, trainable_params) gradients, _ = tf.clip_by_global_norm(gradients, self.config["max_gradient_norm"]) optimizers = { "adam": tf.train.AdamOptimizer, "sgd": tf.train.GradientDescentOptimizer } optimizer = optimizers[self.config["optimizer"]](learning_rate) self.train_op = optimizer.apply_gradients(zip(gradients, trainable_params), global_step=self.global_step)