Exemple #1
0
    def load(cls, basename, **kwargs):
        K.clear_session()
        model = cls()
        model.impl = keras.models.load_model(basename)
        state = read_json(basename + '.state')
        for prop in ls_props(model):
            if prop in state:
                setattr(model, prop, state[prop])
        inputs = dict({(v.name[:v.name.find(':')], v)
                       for v in model.impl.inputs})

        model.embeddings = dict()
        for key, class_name in state['embeddings'].items():
            md = read_json('{}-{}-md.json'.format(basename, key))
            embed_args = dict({'vsz': md['vsz'], 'dsz': md['dsz']})
            embed_args[key] = inputs[key]
            Constructor = eval(class_name)
            model.embeddings[key] = Constructor(key, **embed_args)

        ##model.lengths_key = state.get('lengths_key')

        with open(basename + '.labels', 'r') as f:
            model.labels = json.load(f)

        return model
Exemple #2
0
    def save_md(self, basename):
        """This method saves out a `.state` file containing meta-data from these classes and any info
        registered by a user-defined derived class as a `property`. Also write the `graph` and `saver` and `labels`

        :param basename:
        :return:
        """
        path = basename.split('/')
        base = path[-1]
        outdir = '/'.join(path[:-1])

        # For each embedding, save a record of the keys

        embeddings_info = {}
        for k, v in self.embeddings.items():
            embeddings_info[k] = v.__class__.__name__
        state = {"version": __version__, "embeddings": embeddings_info}
        for prop in ls_props(self):
            state[prop] = getattr(self, prop)

        write_json(state, basename + '.state')
        write_json(self.labels, basename + ".labels")
        for key, embedding in self.embeddings.items():
            embedding.save_md(basename + '-{}-md.json'.format(key))
        tf.train.write_graph(self.sess.graph_def,
                             outdir,
                             base + '.graph',
                             as_text=False)
        with open(basename + '.saver', 'w') as f:
            f.write(str(self.saver.as_saver_def()))
Exemple #3
0
    def save_md(self, basename):

        path = basename.split('/')
        base = path[-1]
        outdir = '/'.join(path[:-1])

        # For each embedding, save a record of the keys

        embeddings_info = {}
        for k, v in self.embeddings.items():
            embeddings_info[k] = v.__class__.__name__
        state = {
            'version': __version__,
            'embeddings': embeddings_info,
            'crf': self.crf,
            'proj': self.proj,
            'constrain_decode': True if self.constraint is not None else False
        }
        for prop in ls_props(self):
            state[prop] = getattr(self, prop)

        write_json(state, basename + '.state')
        write_json(self.labels, basename + ".labels")
        for key, embedding in self.embeddings.items():
            embedding.save_md(basename + '-{}-md.json'.format(key))
        tf.train.write_graph(self.sess.graph_def,
                             outdir,
                             base + '.graph',
                             as_text=False)
        with open(basename + '.saver', 'w') as f:
            f.write(str(self.saver.as_saver_def()))
Exemple #4
0
    def _create_model(self, sess, basename):
        labels = read_json(basename + '.labels')
        model_params = self.task.config_params["model"]
        model_params["sess"] = sess

        state = read_json(basename + '.state')
        if state.get('constrain_decode', False):
            constraint = transition_mask(
                labels, self.task.config_params['train']['span_type'],
                Offsets.GO, Offsets.EOS, Offsets.PAD)
            model_params['constraint'] = constraint

        # Re-create the embeddings sub-graph
        embeddings = dict()
        for key, class_name in state['embeddings'].items():
            md = read_json('{}-{}-md.json'.format(basename, key))
            embed_args = dict({'vsz': md['vsz'], 'dsz': md['dsz']})
            Constructor = eval(class_name)
            embeddings[key] = Constructor(key, **embed_args)

        model = baseline.model.create_model_for(self.task.task_name(),
                                                embeddings, labels,
                                                **model_params)

        for prop in ls_props(model):
            if prop in state:
                setattr(model, prop, state[prop])

        model.create_loss()

        softmax_output = tf.nn.softmax(model.probs)
        values, indices = tf.nn.top_k(softmax_output, 1)

        start_np = np.full((1, 1, len(labels)), -1e4, dtype=np.float32)
        start_np[:, 0, Offsets.GO] = 0
        start = tf.constant(start_np)
        start = tf.tile(start, [tf.shape(model.probs)[0], 1, 1])
        model.probs = tf.concat([start, model.probs], 1)

        ones = tf.fill(tf.shape(model.lengths), 1)
        lengths = tf.add(model.lengths, ones)

        if model.crf is True:
            indices, _ = tf.contrib.crf.crf_decode(model.probs, model.A,
                                                   lengths)
            indices = indices[:, 1:]

        list_of_labels = [''] * len(labels)
        for label, idval in labels.items():
            list_of_labels[idval] = label

        class_tensor = tf.constant(list_of_labels)
        table = tf.contrib.lookup.index_to_string_table_from_tensor(
            class_tensor)
        classes = table.lookup(tf.to_int64(indices))
        self._restore_checkpoint(sess, basename)

        return model, indices, values
Exemple #5
0
    def load(cls, basename, **kwargs):
        state = read_json(basename + '.state')
        if 'predict' in kwargs:
            state['predict'] = kwargs['predict']

        if 'beam' in kwargs:
            state['beam'] = kwargs['beam']

        state['sess'] = kwargs.get('sess', tf.Session())
        state['model_type'] = kwargs.get('model_type', 'default')

        with open(basename + '.saver') as fsv:
            saver_def = tf.train.SaverDef()
            text_format.Merge(fsv.read(), saver_def)

        src_embeddings = dict()
        src_embeddings_dict = state.pop('src_embeddings')
        for key, class_name in src_embeddings_dict.items():
            md = read_json('{}-{}-md.json'.format(basename, key))
            embed_args = dict({'vsz': md['vsz'], 'dsz': md['dsz']})
            Constructor = eval(class_name)
            src_embeddings[key] = Constructor(key, **embed_args)

        tgt_class_name = state.pop('tgt_embedding')
        md = read_json('{}-tgt-md.json'.format(basename))
        embed_args = dict({'vsz': md['vsz'], 'dsz': md['dsz']})
        Constructor = eval(tgt_class_name)
        tgt_embedding = Constructor('tgt', **embed_args)
        model = cls.create(src_embeddings, tgt_embedding, **state)
        for prop in ls_props(model):
            if prop in state:
                setattr(model, prop, state[prop])
        do_init = kwargs.get('init', True)
        if do_init:
            init = tf.global_variables_initializer()
            model.sess.run(init)

        model.saver = tf.train.Saver()
        model.saver.restore(model.sess, basename)
        return model
Exemple #6
0
    def _create_model(self, sess, basename):
        # Read the labels
        labels = read_json(basename + '.labels')

        # Get the parameters from MEAD
        model_params = self.task.config_params["model"]
        model_params["sess"] = sess

        # Read the state file
        state = read_json(basename + '.state')

        # Re-create the embeddings sub-graph
        embeddings = dict()
        for key, class_name in state['embeddings'].items():
            md = read_json('{}-{}-md.json'.format(basename, key))
            embed_args = dict({'vsz': md['vsz'], 'dsz': md['dsz']})
            Constructor = eval(class_name)
            embeddings[key] = Constructor(key, **embed_args)

        # Instantiate a graph
        model = baseline.model.create_model_for(self.task.task_name(),
                                                embeddings, labels,
                                                **model_params)

        # Set the properties of the model from the state file
        for prop in ls_props(model):
            if prop in state:
                setattr(model, prop, state[prop])

        # Append to the graph for class output
        values, indices = tf.nn.top_k(model.probs, len(labels))
        class_tensor = tf.constant(model.labels)
        table = tf.contrib.lookup.index_to_string_table_from_tensor(
            class_tensor)
        classes = table.lookup(tf.to_int64(indices))

        # Restore the checkpoint
        self._restore_checkpoint(sess, basename)
        return model, classes, values
Exemple #7
0
    def save(self, basename):
        self.impl.save(basename, overwrite=True)
        path = basename.split('/')
        base = path[-1]
        outdir = '/'.join(path[:-1])

        # For each embedding, save a record of the keys

        embeddings_info = {}
        for k, v in self.embeddings.items():
            embeddings_info[k] = v.__class__.__name__
        state = {
            "version": __version__,
            "embeddings": embeddings_info
            ## "lengths_key": self.lengths_key
        }
        for prop in ls_props(self):
            state[prop] = getattr(self, prop)

        write_json(state, basename + '.state')
        write_json(self.labels, basename + ".labels")
        for key, embedding in self.embeddings.items():
            embedding.save_md(basename + '-{}-md.json'.format(key))
Exemple #8
0
    def save(self, basename):
        self.impl.save(basename, overwrite=True)
        path = basename.split('/')
        base = path[-1]
        outdir = '/'.join(path[:-1])

        # For each embedding, save a record of the keys

        embeddings_info = {}
        for k, v in self.embeddings.items():
            embeddings_info[k] = v.__class__.__name__
        state = {
            "version": __version__,
            "embeddings": embeddings_info
            ## "lengths_key": self.lengths_key
        }
        for prop in ls_props(self):
            state[prop] = getattr(self, prop)

        write_json(state, basename + '.state')
        write_json(self.labels, basename + ".labels")
        for key, embedding in self.embeddings.items():
            embedding.save_md(basename + '-{}-md.json'.format(key))
Exemple #9
0
    def load(cls, basename, **kwargs):
        K.clear_session()
        model = cls()
        model.impl = keras.models.load_model(basename)
        state = read_json(basename + '.state')
        for prop in ls_props(model):
            if prop in state:
                setattr(model, prop, state[prop])
        inputs = dict({(v.name[:v.name.find(':')], v) for v in model.impl.inputs})

        model.embeddings = dict()
        for key, class_name in state['embeddings'].items():
            md = read_json('{}-{}-md.json'.format(basename, key))
            embed_args = dict({'vsz': md['vsz'], 'dsz': md['dsz']})
            embed_args[key] = inputs[key]
            Constructor = eval(class_name)
            model.embeddings[key] = Constructor(key, **embed_args)

        ##model.lengths_key = state.get('lengths_key')

        with open(basename + '.labels', 'r') as f:
            model.labels = json.load(f)

        return model
Exemple #10
0
    def _create_model(self, sess, basename):
        model_params = self.task.config_params["model"]
        model_params["sess"] = sess
        model_params['predict'] = True
        model_params['beam'] = self.task.config_params.get('beam', 30)

        state = read_json(basename + '.state')
        if not state:
            raise RuntimeError("state file not found or is empty")

        model_params["src_lengths_key"] = state["src_lengths_key"]
        self.length_key = state["src_lengths_key"]

        # Re-create the embeddings sub-graph
        embeddings = self.init_embeddings(
            state[self.SOURCE_STATE_EMBED_KEY].items(), basename)

        # create the taget embeddings. there's only one.
        target_embeddings = self.init_embeddings(
            [(self.TARGET_EMBED_KEY, state[self.TARGET_STATE_EMBED_KEY])],
            basename)
        target_embeddings = target_embeddings[self.TARGET_EMBED_KEY]

        model = baseline.model.create_model_for(self.task.task_name(),
                                                embeddings, target_embeddings,
                                                **model_params)

        for prop in ls_props(model):
            if prop in state:
                setattr(model, prop, state[prop])

        # classes = model.tgt_embedding.lookup(tf.cast(model.best, dtype=tf.int64))
        classes = model.decoder.best
        self._restore_checkpoint(sess, basename)

        return model, classes, None
Exemple #11
0
    def load(cls, basename, **kwargs):
        """Reload the model from a graph file and a checkpoint
        
        The model that is loaded is independent of the pooling and stacking layers, making this class reusable
        by sub-classes.
        
        :param basename: The base directory to load from
        :param kwargs: See below
        
        :Keyword Arguments:
        * *sess* -- An optional tensorflow session.  If not passed, a new session is
            created
        
        :return: A restored model
        """
        sess = kwargs.get('session', kwargs.get('sess', tf.Session()))
        model = cls()
        with open(basename + '.saver') as fsv:
            saver_def = tf.train.SaverDef()
            text_format.Merge(fsv.read(), saver_def)

        checkpoint_name = kwargs.get('checkpoint_name', basename)
        checkpoint_name = checkpoint_name or basename

        state = read_json(basename + '.state')

        for prop in ls_props(model):
            if prop in state:
                setattr(model, prop, state[prop])

        with gfile.FastGFile(basename + '.graph', 'rb') as f:
            gd = tf.GraphDef()
            gd.ParseFromString(f.read())
            sess.graph.as_default()
            tf.import_graph_def(gd, name='')
            try:
                sess.run(saver_def.restore_op_name,
                         {saver_def.filename_tensor_name: checkpoint_name})
            except:
                # Backwards compat
                sess.run(saver_def.restore_op_name, {
                    saver_def.filename_tensor_name:
                    checkpoint_name + ".model"
                })

        model.embeddings = dict()
        for key, class_name in state['embeddings'].items():
            md = read_json('{}-{}-md.json'.format(basename, key))
            embed_args = dict({'vsz': md['vsz'], 'dsz': md['dsz']})
            embed_args[key] = tf.get_default_graph().get_tensor_by_name(
                '{}:0'.format(key))
            Constructor = eval(class_name)
            model.embeddings[key] = Constructor(key, **embed_args)

        if model.lengths_key is not None:
            model.lengths = tf.get_default_graph().get_tensor_by_name(
                'lengths:0')

        else:
            model.lengths = None
        model.probs = tf.get_default_graph().get_tensor_by_name(
            'output/probs:0')

        model.best = tf.get_default_graph().get_tensor_by_name('output/best:0')
        model.logits = tf.get_default_graph().get_tensor_by_name(
            'output/logits:0')

        model.labels = read_json(basename + '.labels')
        model.sess = sess
        return model
Exemple #12
0
 def _write_props_to_state(obj, state):
     for prop in ls_props(obj):
         state[prop] = getattr(obj, prop)