def load(cls, basename: str, **kwargs) -> 'ClassifierModelBase': """Reload the model from a graph file and a checkpoint The model that is loaded is independent of the pooling and stacking layers, making this class reusable by sub-classes. :param basename: The base directory to load from :param kwargs: See below :Keyword Arguments: * *sess* -- An optional tensorflow session. If not passed, a new session is created :return: A restored model """ _state = read_json("{}.state".format(basename)) if __version__ != _state['version']: logger.warning( "Loaded model is from baseline version %s, running version is %s", _state['version'], __version__) if not tf.executing_eagerly(): _state['sess'] = kwargs.pop('sess', create_session()) with _state['sess'].graph.as_default(): embeddings_info = _state.pop('embeddings') embeddings = reload_embeddings(embeddings_info, basename) # If there is a kwarg that is the same name as an embedding object that # is taken to be the input of that layer. This allows for passing in # subgraphs like from a tf.split (for data parallel) or preprocessing # graphs that convert text to indices for k in embeddings_info: if k in kwargs: _state[k] = kwargs[k] labels = read_json("{}.labels".format(basename)) model = cls.create(embeddings, labels, **_state) model._state = _state if kwargs.get('init', True): model.sess.run(tf.compat.v1.global_variables_initializer()) model.saver = tf.compat.v1.train.Saver() model.saver.restore(model.sess, basename) else: embeddings_info = _state.pop('embeddings') embeddings = reload_embeddings(embeddings_info, basename) # If there is a kwarg that is the same name as an embedding object that # is taken to be the input of that layer. This allows for passing in # subgraphs like from a tf.split (for data parallel) or preprocessing # graphs that convert text to indices for k in embeddings_info: if k in kwargs: _state[k] = kwargs[k] # TODO: convert labels into just another vocab and pass number of labels to models. labels = read_json("{}.labels".format(basename)) model = cls.create(embeddings, labels, **_state) model._state = _state model.load_weights(f"{basename}.wgt") return model
def recover_last_checkpoint(self): latest = os.path.join(self.base_dir, 'seq2seq-model-tf-%d' % os.getpid()) logger.info('Reloading %s', latest) g = tf.Graph() with g.as_default(): SET_TRAIN_FLAG(None) sess = create_session() self.model = self.model.load(latest, predict=True, beam=self.beam, session=sess)
def create(cls, embeddings, **kwargs): sess = kwargs.get('sess', create_session()) model = cls() model.embeddings = embeddings model._record_state(**kwargs) model.embedded = model.embed(**kwargs) model.sess = sess model.saver = kwargs.get('saver', tf.train.Saver()) feed_dict = {k: v for e in embeddings.values() for k, v in e.get_feed_dict().items()} if kwargs.get('init', True): # If we have any luts that are large be sure to fill the embeddings # With the weight values on initialization. model.sess.run(tf.global_variables_initializer(), feed_dict) return model
def load(cls, basename, **kwargs): _state = read_json("{}.state".format(basename)) if __version__ != _state['version']: bl_logger.warning("Loaded model is from baseline version %s, running version is %s", _state['version'], __version__) _state['sess'] = kwargs.pop('sess', create_session()) with _state['sess'].graph.as_default(): embeddings_info = _state.pop('embeddings') embeddings = reload_embeddings(embeddings_info, basename) for k in embeddings_info: if k in kwargs: _state[k] = kwargs[k] model = cls.create(embeddings, init=kwargs.get('init', True), **_state) model._state = _state model.saver = tf.train.Saver() model.saver.restore(model.sess, basename) return model
def create(cls, embeddings, labels, **kwargs): """The main method for creating all :class:`WordBasedModel` types. This method instantiates a model with pooling and optional stacking layers. Many of the arguments provided are reused by each implementation, but some sub-classes need more information in order to properly initialize. For this reason, the full list of keyword args are passed to the :method:`pool` and :method:`stacked` methods. :param embeddings: This is a dictionary of embeddings, mapped to their numerical indices in the lookup table :param labels: This is a list of the `str` labels :param kwargs: There are sub-graph specific Keyword Args allowed for e.g. embeddings. See below for known args: :Keyword Arguments: * *gpus* -- (``int``) How many GPUs to split training across. If called this function delegates to another class `ClassifyParallelModel` which creates a parent graph and splits its inputs across each sub-model, by calling back into this exact method (w/o this argument), once per GPU * *model_type* -- The string name for the model (defaults to `default`) * *sess* -- An optional tensorflow session. If not passed, a new session is created * *lengths_key* -- (``str``) Specifies which `batch_dict` property should be used to determine the temporal length if this is not set, it defaults to either `word`, or `x` if `word` is also not a feature * *finetune* -- Are we doing fine-tuning of word embeddings (defaults to `True`) * *mxlen* -- The maximum signal (`x` tensor temporal) length (defaults to `100`) * *dropout* -- This indicates how much dropout should be applied to the model when training. * *filtsz* -- This is actually a top-level param due to an unfortunate coupling between the pooling layer and the input, which, for convolution, requires input padding. :return: A fully-initialized tensorflow classifier """ TRAIN_FLAG() gpus = kwargs.get('gpus', 1) if gpus == -1: gpus = len( os.getenv('CUDA_VISIBLE_DEVICES', os.getenv('NV_GPU', '0')).split(',')) kwargs['gpus'] = gpus if gpus > 1: return ClassifyParallelModel(cls.create, embeddings, labels, **kwargs) sess = kwargs.get('sess', create_session()) model = cls() model.embeddings = embeddings model._record_state(**kwargs) model.lengths_key = kwargs.get('lengths_key') if model.lengths_key is not None: model.lengths = kwargs.get( 'lengths', tf.placeholder(tf.int32, [None], name="lengths")) else: model.lengths = None model.labels = labels nc = len(labels) model.y = kwargs.get('y', tf.placeholder(tf.int32, [None, nc], name="y")) # This only exists to make exporting easier model.pdrop_value = kwargs.get('dropout', 0.5) # This only exists to make exporting easier with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): seed = np.random.randint(10e8) init = tf.random_uniform_initializer(-0.05, 0.05, dtype=tf.float32, seed=seed) word_embeddings = model.embed(**kwargs) input_sz = word_embeddings.shape[-1] pooled = model.pool(word_embeddings, input_sz, init, **kwargs) stacked = model.stacked(pooled, init, **kwargs) # For fully connected layers, use xavier (glorot) transform with tf.variable_scope("output"): model.logits = tf.identity(tf.layers.dense( stacked, nc, activation=None, kernel_initializer=tf.glorot_uniform_initializer(seed)), name="logits") model.best = tf.argmax(model.logits, 1, name="best") model.probs = tf.nn.softmax(model.logits, name="probs") model.sess = sess # writer = tf.summary.FileWriter('blah', sess.graph) return model
def create(cls, embeddings: Dict[str, BaseLayer], labels: List[str], **kwargs) -> 'ClassifierModelBase': """The main method for creating all :class:`ClassifierBasedModel` types. This method typically instantiates a model with pooling and optional stacking layers. Many of the arguments provided are reused by each implementation, but some sub-classes need more information in order to properly initialize. For this reason, the full list of keyword args are passed to the :method:`pool` and :method:`stacked` methods. :param embeddings: This is a dictionary of embeddings, mapped to their numerical indices in the lookup table :param labels: This is a list of the `str` labels :param kwargs: There are sub-graph specific Keyword Args allowed for e.g. embeddings. See below for known args: :Keyword Arguments: * *gpus* -- (``int``) How many GPUs to split training across. If called this function delegates to another class `ClassifyParallelModel` which creates a parent graph and splits its inputs across each sub-model, by calling back into this exact method (w/o this argument), once per GPU * *model_type* -- The string name for the model (defaults to `default`) * *sess* -- An optional tensorflow session. If not passed, a new session is created * *lengths_key* -- (``str``) Specifies which `batch_dict` property should be used to determine the temporal length if this is not set, it defaults to either `word`, or `x` if `word` is also not a feature * *finetune* -- Are we doing fine-tuning of word embeddings (defaults to `True`) * *mxlen* -- The maximum signal (`x` tensor temporal) length (defaults to `100`) * *dropout* -- This indicates how much dropout should be applied to the model when training. * *filtsz* -- This is actually a top-level param due to an unfortunate coupling between the pooling layer and the input, which, for convolution, requires input padding. :return: A fully-initialized tensorflow classifier """ model = cls(name=kwargs.get('name')) #embeddings_ = {} #for k, embedding in embeddings.items(): # embeddings_[k] = embedding #.detached_ref() model.lengths_key = kwargs.get('lengths_key') if not tf.executing_eagerly(): inputs = {} if model.lengths_key is not None: model._unserializable.append(model.lengths_key) model.lengths = kwargs.get( 'lengths', tf.compat.v1.placeholder(tf.int32, [None], name="lengths")) inputs['lengths'] = model.lengths else: model.lengths = None model._record_state(embeddings, **kwargs) nc = len(labels) if not tf.executing_eagerly(): model.y = kwargs.get( 'y', tf.compat.v1.placeholder(tf.int32, [None, nc], name="y")) for k, embedding in embeddings.items(): x = kwargs.get(k, embedding.create_placeholder(name=k)) inputs[k] = x model.sess = kwargs.get('sess', create_session()) model.pdrop_value = kwargs.get('dropout', 0.5) model.labels = labels model.create_layers(embeddings, **kwargs) if not tf.executing_eagerly(): model.logits = tf.identity(model(inputs), name="logits") model.best = tf.argmax(model.logits, 1, name="best") model.probs = tf.nn.softmax(model.logits, name="probs") return model