Ejemplo n.º 1
0
            self.context_seq: context_seq,
            self.pinlei_idx: pinlei_idx
        }

        score_list = session.run(self.score, feed_dict)

        return score_list

    def save(self, session, dir_path):
        import os
        if not (os.path.isdir(dir_path)):
            os.mkdir(dir_path)
        fp = dir_path + "/best_model"
        self.saver.save(session, fp)
        LogInfo.logs("Model saved into %s.", fp)

    def load(self, session, fp):
        LogInfo.logs("Loading Model from %s", fp)
        self.saver.restore(session, fp)
        LogInfo.logs("Model loaded from %s", fp)


if __name__ == "__main__":
    import sys
    import numpy as np
    from xusheng.util.config import ConfigDict
    config = ConfigDict("runnings/%s/%s/param_config" %
                        (sys.argv[1], sys.argv[2]))
    model = IntentionIdentifier(config=config,
                                mode=tf.contrib.learn.ModeKeys.TRAIN,
                                embedding_vocab=np.array([[1, 2]]))
Ejemplo n.º 2
0
import numpy as np
import tensorflow as tf

from xusheng.task.intention.data.data import DataLoader, BatchGenerator
from xusheng.task.intention.model.identifier import IntentionIdentifier
from xusheng.util.config import ConfigDict
from xusheng.util.data_util import VocabularyLoader
from xusheng.util.eval_util import eval_acc_pn
from xusheng.util.log_util import LogInfo

if __name__ == '__main__':
    setting_dir = sys.argv[1]
    try_dir = sys.argv[2]
    root_path = 'runnings/%s/%s' % (setting_dir, try_dir)
    config_path = '%s/param_config' % root_path
    config = ConfigDict(config_path)

    vocab_loader = VocabularyLoader()
    vocab_loader.load_vocab(config.get("vocab_fp"),
                            config.get("embedding_dim"), 'utf-8')
    config.add("vocab_size", vocab_loader.vocab_size)
    LogInfo.logs("Embedding shape: %s.", vocab_loader.vocab_embedding.shape)

    data_loader = DataLoader(config.get("max_seq_len"),
                             vocab_loader.vocab_index_dict)
    data_loader.load(config.get("data_fp"), 'utf-8')

    LogInfo.logs("Create train, valid, test split...")
    train_size = int(0.997 * data_loader.data_size)
    valid_size = int(0.001 * data_loader.data_size)
    test_size = data_loader.data_size - train_size - valid_size
Ejemplo n.º 3
0
import numpy as np
import tensorflow as tf
from xusheng.task.nlu.data.data import DataLoader, BatchGenerator

from xusheng.task.nlu.abort.model.single_task import NER
from xusheng.util.config import ConfigDict
from xusheng.util.data_util import VocabularyLoader
from xusheng.util.eval_util import eval_seq_crf
from xusheng.util.log_util import LogInfo

if __name__ == '__main__':
    setting_dir = sys.argv[1]
    try_dir = sys.argv[2]
    root_path = 'runnings/%s/%s' % (setting_dir, try_dir)
    config_path = '%s/param_config' % root_path
    config = ConfigDict(config_path)

    vocab_loader = VocabularyLoader()
    vocab_loader.load_vocab(config.get("vocab_fp"), config.get("embedding_dim"), 'utf-8')
    config.add("vocab_size", vocab_loader.vocab_size)
    LogInfo.logs("Embedding shape: %s.", vocab_loader.vocab_embedding.shape)

    data_loader = DataLoader(config.get("max_seq_len"), vocab_loader.vocab_index_dict)
    data_loader.load(config.get("data_fp"), 'utf-8')

    LogInfo.logs("Create train, valid, test split...")
    train_size = int(config.get("train_split") * data_loader.data_size)
    valid_size = int(config.get("valid_split") * data_loader.data_size)
    test_size = data_loader.data_size - train_size - valid_size

    train_data = data_loader.data[:train_size]
Ejemplo n.º 4
0
import numpy as np
import tensorflow as tf
import codecs

from xusheng.task.intention.data.data import DataLoader
from xusheng.task.intention.model.identifier import IntentionIdentifier
from xusheng.util.config import ConfigDict
from xusheng.util.data_util import VocabularyLoader
from xusheng.util.log_util import LogInfo

if __name__ == '__main__':
    setting_dir = sys.argv[1]
    try_dir = sys.argv[2]
    root_path = 'runnings/%s/%s' % (setting_dir, try_dir)
    config_path = '%s/param_config' % root_path
    config = ConfigDict(config_path)

    vocab_loader = VocabularyLoader()
    vocab_loader.load_vocab(config.get("vocab_fp"),
                            config.get("embedding_dim"), 'utf-8')
    config.add("vocab_size", vocab_loader.vocab_size)
    LogInfo.logs("Embedding shape: %s.", vocab_loader.vocab_embedding.shape)

    data_loader = DataLoader(config.get("max_seq_len"),
                             vocab_loader.vocab_index_dict)
    data_loader.load(config.get("test_data_fp"), 'utf-8')

    LogInfo.begin_track("Create models...")
    graph = tf.Graph()
    with graph.as_default():
        test_model = IntentionIdentifier(
Ejemplo n.º 5
0
"""
train & test entity linking for Web Q.
"""

import sys
import numpy as np
import tensorflow as tf

from xusheng.task.qa.linking.model import EntityLinker
from xusheng.util.config import ConfigDict
from xusheng.util.log_util import LogInfo

if __name__ == '__main__':
    # config
    setting_dir = sys.argv[1]
    try_dir = sys.argv[2]
    root_path = 'runnings/%s/%s' % (setting_dir, try_dir)
    config_path = '%s/param_config' % root_path
    config = ConfigDict(config_path)

    # model
    model = EntityLinker(config=config)

Ejemplo n.º 6
0
from xusheng.task.intention.data.misc import MultiPinleiEvalDataAdapter
from xusheng.util.config import ConfigDict
from xusheng.util.data_util import VocabularyLoader
from xusheng.util.log_util import LogInfo

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

if __name__ == '__main__':
    setting_dir = sys.argv[1]
    try_dir = sys.argv[2]

    # load configuration
    root_path = 'runnings/%s/%s' % (setting_dir, try_dir)
    config_path = '%s/param_config' % root_path
    config = ConfigDict(config_path)

    # load vocabulary
    vocab_loader = VocabularyLoader()
    vocab_loader.load_vocab(config.get("vocab_fp"),
                            config.get("embedding_dim"), 'utf-8')
    config.add("vocab_size", vocab_loader.vocab_size)
    LogInfo.logs("Embedding shape: %s.", vocab_loader.vocab_embedding.shape)

    # load pinlei
    data_adapter = MultiPinleiEvalDataAdapter()
    data_adapter.load_pinlei()

    # load auxiliary words
    auxiliary_words = set()
    with codecs.open(config.get("auxiliary_words_fp"), 'r',