Ejemplo n.º 1
0
    def __init__(self, session, corpus_dir, knbase_dir, result_dir,
                 result_file):
        self.session = session

        # Prepare data and hyper parameters
        print("# Prepare dataset placeholder and hyper parameters ...")
        self.tokenized_data = TokenizedData(corpus_dir=corpus_dir,
                                            knbase_dir=knbase_dir,
                                            training=False)

        self.hparams = self.tokenized_data.hparams
        self.src_placeholder = tf.placeholder(shape=[None], dtype=tf.string)
        src_dataset = tf.data.Dataset.from_tensor_slices(self.src_placeholder)
        self.infer_batch = self.tokenized_data.get_inference_batch(src_dataset)

        # Create model
        print("# Creating inference model ...")
        self.model = ModelCreator(training=False,
                                  tokenized_data=self.tokenized_data,
                                  batch_input=self.infer_batch)
        # Restore model weights
        print("# Restoring model weights ...")
        self.model.saver.restore(session, os.path.join(result_dir,
                                                       result_file))

        self.session.run(tf.tables_initializer())
Ejemplo n.º 2
0
    def __init__(self, session, corpus_dir, knbase_dir, result_dir, result_file):
        """
        Args:
            session: The TensorFlow session.
            corpus_dir: Name of the folder storing corpus files and vocab information.
            knbase_dir: Name of the folder storing data files for the knowledge base.
            result_dir: The folder containing the trained result files.
            result_file: The file name of the trained model.
        """
        self.session = session

        # Prepare data and hyper parameters
        print("# Prepare dataset placeholder and hyper parameters ...")
        tokenized_data = TokenizedData(corpus_dir=corpus_dir, training=False)

        self.knowledge_base = KnowledgeBase()
        self.knowledge_base.load_knbase(knbase_dir)

        self.session_data = SessionData()

        self.hparams = tokenized_data.hparams
        self.src_placeholder = tf.placeholder(shape=[None], dtype=tf.string)
        src_dataset = tf.data.Dataset.from_tensor_slices(self.src_placeholder)
        self.infer_batch = tokenized_data.get_inference_batch(src_dataset)

        # Create model
        print("# Creating inference model ...")
        self.model = ModelCreator(training=False, tokenized_data=tokenized_data,
                                  batch_input=self.infer_batch)
        # Restore model weights
        print("# Restoring model weights ...")
        self.model.saver.restore(session, os.path.join(result_dir, result_file))

        self.session.run(tf.tables_initializer())
Ejemplo n.º 3
0
    def __init__(self,
                 session,
                 corpus_dir,
                 knbase_dir,
                 result_dir,
                 hparams_dir=None):
        self.session = session

        hparams = HParams(hparams_dir).hparams if hparams_dir else None

        # Prepare data and hyper parameters
        print("# Prepare dataset placeholder and hyper parameters ...")
        self.tokenized_data = TokenizedData(corpus_dir=corpus_dir,
                                            hparams=hparams,
                                            knbase_dir=knbase_dir,
                                            training=False)

        self.hparams = self.tokenized_data.hparams
        self.src_placeholder = tf.placeholder(shape=[None], dtype=tf.string)
        src_dataset = tf.contrib.data.Dataset.from_tensor_slices(
            self.src_placeholder)
        self.infer_batch = self.tokenized_data.get_inference_batch(src_dataset)

        # Create model
        print("# Creating inference model ...")
        self.model = ModelCreator(training=False,
                                  tokenized_data=self.tokenized_data,
                                  batch_input=self.infer_batch)
        latest_ckpt = tf.train.latest_checkpoint(result_dir)
        print("# Restoring model weights ...")
        self.model.saver.restore(session, latest_ckpt)
        self.session.run(tf.tables_initializer())
Ejemplo n.º 4
0
    def __init__(self, corpus_dir):
        self.graph = tf.Graph()
        with self.graph.as_default():
            tokenized_data = TokenizedData(corpus_dir=corpus_dir)

            self.hparams = tokenized_data.hparams
            self.train_batch = tokenized_data.get_training_batch()  # Return BatchInput namedtuple from .chatbot/tokenizedata.py
            self.model = ModelCreator(training=True, tokenized_data=tokenized_data,
                                      batch_input=self.train_batch)
Ejemplo n.º 5
0
    def __init__(self, corpus_dir):
        self.graph = tf.Graph()
        with self.graph.as_default():
            tokenized_data = TokenizedData(corpus_dir=corpus_dir)

            self.hparams = tokenized_data.hparams
            self.train_batch = tokenized_data.get_training_batch()
            self.model = ModelCreator(training=True, tokenized_data=tokenized_data,
                                      batch_input=self.train_batch)
Ejemplo n.º 6
0
    def __init__(self, session, corpus_dir, knbase_dir, result_dir, aiml_dir,
                 result_file):
        """
        Args:
            session: The TensorFlow session.
            corpus_dir: Name of the folder storing corpus files and vocab information.
            knbase_dir: Name of the folder storing data files for the knowledge base.
            result_dir: The folder containing the trained result files.
            result_file: The file name of the trained model.
        """
        self.session = session

        # Prepare data and hyper parameters
        print("# Prepare dataset placeholder and hyper parameters ...")
        tokenized_data = TokenizedData(corpus_dir=corpus_dir, training=False)

        self.knowledge_base = KnowledgeBase()
        self.knowledge_base.load_knbase(knbase_dir)

        self.session_data = SessionData()

        self.hparams = tokenized_data.hparams
        self.src_placeholder = tf.placeholder(shape=[None], dtype=tf.string)
        src_dataset = tf.data.Dataset.from_tensor_slices(self.src_placeholder)
        self.infer_batch = tokenized_data.get_inference_batch(src_dataset)

        # Create Retrival model
        self.kmodel = aiml.Kernel()
        brain_file_name = os.path.join(aiml_dir, BRAIN_FILE)
        print(aiml_dir)

        # Restore model rules
        if os.path.exists(brain_file_name):
            print("# Loading from brain file ... ")
            self.kmodel.loadBrain(brain_file_name)
        else:
            print("# Parsing aiml files ...")
            aimls_file_name = os.path.join(aiml_dir, AIMLS_FILE)
            self.kmodel.bootstrap(learnFiles=os.path.abspath(aimls_file_name),
                                  commands="load aiml b")
            print("# Saving brain file: " + BRAIN_FILE)
            self.kmodel.saveBrain(brain_file_name)

        # Create Generative model
        print("# Creating inference model ...")
        self.model = ModelCreator(training=False,
                                  tokenized_data=tokenized_data,
                                  batch_input=self.infer_batch)
        # Restore model weights
        print("# Restoring model weights ...")
        self.model.saver.restore(session, os.path.join(result_dir,
                                                       result_file))

        self.session.run(tf.tables_initializer())
Ejemplo n.º 7
0
    def __init__(self, corpus_dir):
        """
        Constructor of the BotTrainer.
        Args:
            corpus_dir: The folder to save all the training related data.
        """
        self.graph = tf.Graph()
        with self.graph.as_default():
            tokenized_data = TokenizedData(corpus_dir=corpus_dir)

            self.hparams = tokenized_data.hparams
            self.train_batch = tokenized_data.get_training_batch()
            self.model = ModelCreator(training=True, tokenized_data=tokenized_data,
                                      batch_input=self.train_batch)