예제 #1
0
    def load(cls, basename: str, **kwargs) -> 'ClassifierModelBase':
        """Reload the model from a graph file and a checkpoint

        The model that is loaded is independent of the pooling and stacking layers, making this class reusable
        by sub-classes.

        :param basename: The base directory to load from
        :param kwargs: See below

        :Keyword Arguments:
        * *sess* -- An optional tensorflow session.  If not passed, a new session is
            created

        :return: A restored model
        """
        _state = read_json("{}.state".format(basename))
        if __version__ != _state['version']:
            logger.warning(
                "Loaded model is from baseline version %s, running version is %s",
                _state['version'], __version__)

        if not tf.executing_eagerly():
            _state['sess'] = kwargs.pop('sess', create_session())
            with _state['sess'].graph.as_default():
                embeddings_info = _state.pop('embeddings')
                embeddings = reload_embeddings(embeddings_info, basename)
                # If there is a kwarg that is the same name as an embedding object that
                # is taken to be the input of that layer. This allows for passing in
                # subgraphs like from a tf.split (for data parallel) or preprocessing
                # graphs that convert text to indices
                for k in embeddings_info:
                    if k in kwargs:
                        _state[k] = kwargs[k]
                labels = read_json("{}.labels".format(basename))
                model = cls.create(embeddings, labels, **_state)
                model._state = _state
                if kwargs.get('init', True):
                    model.sess.run(tf.compat.v1.global_variables_initializer())
                model.saver = tf.compat.v1.train.Saver()
                model.saver.restore(model.sess, basename)
        else:
            embeddings_info = _state.pop('embeddings')
            embeddings = reload_embeddings(embeddings_info, basename)
            # If there is a kwarg that is the same name as an embedding object that
            # is taken to be the input of that layer. This allows for passing in
            # subgraphs like from a tf.split (for data parallel) or preprocessing
            # graphs that convert text to indices
            for k in embeddings_info:
                if k in kwargs:
                    _state[k] = kwargs[k]
                # TODO: convert labels into just another vocab and pass number of labels to models.
            labels = read_json("{}.labels".format(basename))
            model = cls.create(embeddings, labels, **_state)
            model._state = _state
            model.load_weights(f"{basename}.wgt")
        return model
예제 #2
0
 def recover_last_checkpoint(self):
     latest = os.path.join(self.base_dir,
                           'seq2seq-model-tf-%d' % os.getpid())
     logger.info('Reloading %s', latest)
     g = tf.Graph()
     with g.as_default():
         SET_TRAIN_FLAG(None)
         sess = create_session()
         self.model = self.model.load(latest,
                                      predict=True,
                                      beam=self.beam,
                                      session=sess)
예제 #3
0
 def create(cls, embeddings, **kwargs):
     sess = kwargs.get('sess', create_session())
     model = cls()
     model.embeddings = embeddings
     model._record_state(**kwargs)
     model.embedded = model.embed(**kwargs)
     model.sess = sess
     model.saver = kwargs.get('saver', tf.train.Saver())
     feed_dict = {k: v for e in embeddings.values() for k, v in e.get_feed_dict().items()}
     if kwargs.get('init', True):
         # If we have any luts that are large be sure to fill the embeddings
         # With the weight values on initialization.
         model.sess.run(tf.global_variables_initializer(), feed_dict)
     return model
예제 #4
0
 def load(cls, basename, **kwargs):
     _state = read_json("{}.state".format(basename))
     if __version__ != _state['version']:
         bl_logger.warning("Loaded model is from baseline version %s, running version is %s", _state['version'], __version__)
     _state['sess'] = kwargs.pop('sess', create_session())
     with _state['sess'].graph.as_default():
         embeddings_info = _state.pop('embeddings')
         embeddings = reload_embeddings(embeddings_info, basename)
         for k in embeddings_info:
             if k in kwargs:
                 _state[k] = kwargs[k]
         model = cls.create(embeddings, init=kwargs.get('init', True), **_state)
         model._state = _state
         model.saver = tf.train.Saver()
         model.saver.restore(model.sess, basename)
     return model
예제 #5
0
    def create(cls, embeddings, labels, **kwargs):
        """The main method for creating all :class:`WordBasedModel` types.

        This method instantiates a model with pooling and optional stacking layers.
        Many of the arguments provided are reused by each implementation, but some sub-classes need more
        information in order to properly initialize.  For this reason, the full list of keyword args are passed
        to the :method:`pool` and :method:`stacked` methods.

        :param embeddings: This is a dictionary of embeddings, mapped to their numerical indices in the lookup table
        :param labels: This is a list of the `str` labels
        :param kwargs: There are sub-graph specific Keyword Args allowed for e.g. embeddings. See below for known args:

        :Keyword Arguments:
        * *gpus* -- (``int``) How many GPUs to split training across.  If called this function delegates to
            another class `ClassifyParallelModel` which creates a parent graph and splits its inputs across each
            sub-model, by calling back into this exact method (w/o this argument), once per GPU
        * *model_type* -- The string name for the model (defaults to `default`)
        * *sess* -- An optional tensorflow session.  If not passed, a new session is
            created
        * *lengths_key* -- (``str``) Specifies which `batch_dict` property should be used to determine the temporal length
            if this is not set, it defaults to either `word`, or `x` if `word` is also not a feature
        * *finetune* -- Are we doing fine-tuning of word embeddings (defaults to `True`)
        * *mxlen* -- The maximum signal (`x` tensor temporal) length (defaults to `100`)
        * *dropout* -- This indicates how much dropout should be applied to the model when training.
        * *filtsz* -- This is actually a top-level param due to an unfortunate coupling between the pooling layer
            and the input, which, for convolution, requires input padding.

        :return: A fully-initialized tensorflow classifier
        """
        TRAIN_FLAG()
        gpus = kwargs.get('gpus', 1)
        if gpus == -1:
            gpus = len(
                os.getenv('CUDA_VISIBLE_DEVICES', os.getenv('NV_GPU',
                                                            '0')).split(','))
            kwargs['gpus'] = gpus
        if gpus > 1:
            return ClassifyParallelModel(cls.create, embeddings, labels,
                                         **kwargs)
        sess = kwargs.get('sess', create_session())

        model = cls()
        model.embeddings = embeddings
        model._record_state(**kwargs)
        model.lengths_key = kwargs.get('lengths_key')
        if model.lengths_key is not None:
            model.lengths = kwargs.get(
                'lengths', tf.placeholder(tf.int32, [None], name="lengths"))
        else:
            model.lengths = None

        model.labels = labels
        nc = len(labels)
        model.y = kwargs.get('y', tf.placeholder(tf.int32, [None, nc],
                                                 name="y"))
        # This only exists to make exporting easier
        model.pdrop_value = kwargs.get('dropout', 0.5)
        # This only exists to make exporting easier

        with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE):

            seed = np.random.randint(10e8)
            init = tf.random_uniform_initializer(-0.05,
                                                 0.05,
                                                 dtype=tf.float32,
                                                 seed=seed)
            word_embeddings = model.embed(**kwargs)
            input_sz = word_embeddings.shape[-1]
            pooled = model.pool(word_embeddings, input_sz, init, **kwargs)
            stacked = model.stacked(pooled, init, **kwargs)

            # For fully connected layers, use xavier (glorot) transform
            with tf.variable_scope("output"):

                model.logits = tf.identity(tf.layers.dense(
                    stacked,
                    nc,
                    activation=None,
                    kernel_initializer=tf.glorot_uniform_initializer(seed)),
                                           name="logits")
                model.best = tf.argmax(model.logits, 1, name="best")
                model.probs = tf.nn.softmax(model.logits, name="probs")
        model.sess = sess
        # writer = tf.summary.FileWriter('blah', sess.graph)

        return model
예제 #6
0
    def create(cls, embeddings: Dict[str, BaseLayer], labels: List[str],
               **kwargs) -> 'ClassifierModelBase':
        """The main method for creating all :class:`ClassifierBasedModel` types.

        This method typically instantiates a model with pooling and optional stacking layers.
        Many of the arguments provided are reused by each implementation, but some sub-classes need more
        information in order to properly initialize.  For this reason, the full list of keyword args are passed
        to the :method:`pool` and :method:`stacked` methods.

        :param embeddings: This is a dictionary of embeddings, mapped to their numerical indices in the lookup table
        :param labels: This is a list of the `str` labels
        :param kwargs: There are sub-graph specific Keyword Args allowed for e.g. embeddings. See below for known args:

        :Keyword Arguments:
        * *gpus* -- (``int``) How many GPUs to split training across.  If called this function delegates to
            another class `ClassifyParallelModel` which creates a parent graph and splits its inputs across each
            sub-model, by calling back into this exact method (w/o this argument), once per GPU
        * *model_type* -- The string name for the model (defaults to `default`)
        * *sess* -- An optional tensorflow session.  If not passed, a new session is
            created
        * *lengths_key* -- (``str``) Specifies which `batch_dict` property should be used to determine the temporal length
            if this is not set, it defaults to either `word`, or `x` if `word` is also not a feature
        * *finetune* -- Are we doing fine-tuning of word embeddings (defaults to `True`)
        * *mxlen* -- The maximum signal (`x` tensor temporal) length (defaults to `100`)
        * *dropout* -- This indicates how much dropout should be applied to the model when training.
        * *filtsz* -- This is actually a top-level param due to an unfortunate coupling between the pooling layer
            and the input, which, for convolution, requires input padding.

        :return: A fully-initialized tensorflow classifier
        """
        model = cls(name=kwargs.get('name'))
        #embeddings_ = {}
        #for k, embedding in embeddings.items():
        #    embeddings_[k] = embedding #.detached_ref()

        model.lengths_key = kwargs.get('lengths_key')

        if not tf.executing_eagerly():

            inputs = {}
            if model.lengths_key is not None:
                model._unserializable.append(model.lengths_key)
                model.lengths = kwargs.get(
                    'lengths',
                    tf.compat.v1.placeholder(tf.int32, [None], name="lengths"))
                inputs['lengths'] = model.lengths
            else:
                model.lengths = None

        model._record_state(embeddings, **kwargs)

        nc = len(labels)
        if not tf.executing_eagerly():
            model.y = kwargs.get(
                'y', tf.compat.v1.placeholder(tf.int32, [None, nc], name="y"))
            for k, embedding in embeddings.items():
                x = kwargs.get(k, embedding.create_placeholder(name=k))
                inputs[k] = x

            model.sess = kwargs.get('sess', create_session())

        model.pdrop_value = kwargs.get('dropout', 0.5)
        model.labels = labels
        model.create_layers(embeddings, **kwargs)

        if not tf.executing_eagerly():
            model.logits = tf.identity(model(inputs), name="logits")
            model.best = tf.argmax(model.logits, 1, name="best")
            model.probs = tf.nn.softmax(model.logits, name="probs")
        return model