コード例 #1
0
    def __init__(self):
        tf.reset_default_graph()

        self.encoder_vec_file = "./tfdata/enc.vec"
        self.decoder_vec_file = "./dec.vec"
        self.encoder_vocabulary = "./tfdata/enc.vocab"
        self.decoder_vocabulary = "./tfdata/dec.vocab"
        self.batch_size = 1
        self.max_batches = 100000
        self.show_epoch = 100
        self.model_path = './model/'

        self.model = dynamicSeq2seq(encoder_cell=LSTMCell(40),
                                    decoder_cell=LSTMCell(40),
                                    encoder_vocab_size=600,
                                    decoder_vocab_size=1600,
                                    embedding_size=20,
                                    attention=False,
                                    bidirectional=False,
                                    debug=False,
                                    time_major=True)
        self.location = ["杭州", "重庆", "上海", "北京"]
        self.dec_vocab = {}
        self.enc_vocab = {}
        self.dec_vecToSeg = {}
        tag_location = ''
        with open(self.encoder_vocabulary, "r") as enc_vocab_file:
            for index, word in enumerate(enc_vocab_file.readlines()):
                self.enc_vocab[word.strip()] = index
        with open(self.decoder_vocabulary, "r") as dec_vocab_file:
            for index, word in enumerate(dec_vocab_file.readlines()):
                self.dec_vecToSeg[index] = word.strip()
                self.dec_vocab[word.strip()] = index
コード例 #2
0
    def __init__(self):
        print("tensorflow version: ", tf.__version__)
        tf.reset_default_graph()

        self.encoder_vec_file = "./preprocessing/encode.vector"
        self.decoder_vec_file = "./preprocessing/decode.vector"
        self.encoder_vocabulary = "./preprocessing/encode.vocabulary"
        self.decoder_vocabulary = "./preprocessing/decode.vocabulary"
        self.batch_size = 1
        self.max_batches = 10000
        self.show_epoch = 1000
        self.model_path = './model/'
        self.model = dynamicSeq2seq(encoder_cell=LSTMCell(40),
                                    decoder_cell=LSTMCell(40),
                                    encoder_vocab_size=600,
                                    decoder_vocab_size=1600,
                                    embedding_size=20,
                                    attention=False,
                                    bidirectional=False,
                                    debug=False,
                                    time_major=True)
        self.dec_vocab = {}
        self.enc_vocab = {}
        self.dec_vecToSeg = {}
        tag_location = ''
        with io.open(self.encoder_vocabulary, "r",
                     encoding="utf-8") as enc_vocab_file:
            for index, word in enumerate(enc_vocab_file.readlines()):
                self.enc_vocab[word.strip()] = index
        with io.open(self.decoder_vocabulary, "r",
                     encoding="utf-8") as dec_vocab_file:
            for index, word in enumerate(dec_vocab_file.readlines()):
                self.dec_vecToSeg[index] = word.strip()
                self.dec_vocab[word.strip()] = index
コード例 #3
0
ファイル: main.py プロジェクト: Decalogue/learning-nlp
    def __init__(self):
        tf.reset_default_graph()

        self.encoder_vec_file = "./tfdata/enc.vec"
        self.decoder_vec_file = "./tfdata/dec.vec"
        self.encoder_vocabulary = "./tfdata/enc.vocab"
        self.decoder_vocabulary = "./tfdata/dec.vocab"
        self.batch_size = 1
        self.max_batches = 100000
        self.show_epoch = 100
        self.model_path = './model/'

        self.model = dynamicSeq2seq(encoder_cell=LSTMCell(40),
                                    decoder_cell=LSTMCell(40),
                                    encoder_vocab_size=600,
                                    decoder_vocab_size=1600,
                                    embedding_size=20,
                                    attention=False,
                                    bidirectional=False,
                                    debug=False,
                                    time_major=True)
        self.location = ["杭州", "重庆", "上海", "北京"]
        self.dec_vocab = {}
        self.enc_vocab = {}
        self.dec_vecToSeg = {}
        tag_location = ''
        with open(self.encoder_vocabulary, "r") as enc_vocab_file:
            for index, word in enumerate(enc_vocab_file.readlines()):
                self.enc_vocab[word.strip()] = index
        with open(self.decoder_vocabulary, "r") as dec_vocab_file:
            for index, word in enumerate(dec_vocab_file.readlines()):
                self.dec_vecToSeg[index] = word.strip()
                self.dec_vocab[word.strip()] = index
コード例 #4
0
 def __init__(self):
     tf.reset_default_graph()
     self.encoder_sege_file = "./tf_data_new/enc.segement"
     self.decoder_sege_file = "./tf_data_new/dec.segement"
     self.encoder_vocabulary = "./tf_data_new/enc.vocab"
     self.decoder_vocabulary = "./tf_data_new/dec.vocab"
     self.eval_enc = "./tf_data_new/eval_enc"
     self.eval_dec = "./tf_data_new/eval_dec"
     self.vocab_file = "./tf_data_new/en_de_vocabs"
     self.batch_size = 20
     self.max_batches = 15000
     self.show_epoch = 10
     self.model_path = './model_2/'
     self.transform_model = Transformer(
         embedding_size=128,
         num_layers=6,
         keep_prob_rate=0.2,
         learning_rate=0.0001,
         learning_decay_rate=0.99,
         clip_gradient=True,
         is_embedding_scale=True,
         multihead_num=8,
         max_gradient_norm=5,
         vocab_size=40020,
         max_encoder_len=200,
         max_decoder_len=200,
         share_embedding=True,
         pad_index=0,
         learning_decay_steps=500,
         dimension_feedforword=2048,
         dimension_model=512,
     )
     self.LSTMmodel = dynamicSeq2seq(encoder_cell=LSTMCell(500),
                                     decoder_cell=LSTMCell(500),
                                     encoder_vocab_size=70824,
                                     decoder_vocab_size=70833,
                                     embedding_size=128,
                                     attention=False,
                                     bidirectional=False,
                                     debug=False,
                                     time_major=True)
コード例 #5
0
    def __init__(self):
        print("tensorflow version: ", tf.__version__)
        tf.reset_default_graph()

        self.encoder_vec_file = './preprocessing/enc.vec'
        self.decoder_vec_file = './preprocessing/dec.vec'
        self.encoder_vocabulary = './preprocessing/enc.vocab'
        self.decoder_vocabulary = './preprocessing/dec.vocab'
        self.dictFile = './word_dict.txt'
        self.batch_size = 1
        self.max_batches = 100000
        self.show_epoch = 100
        self.model_path = './model/'

        # jieba导入词典
        jieba.load_userdict(self.dictFile)

        self.model = dynamicSeq2seq(encoder_cell=LSTMCell(40),
                                    decoder_cell=LSTMCell(40),
                                    encoder_vocab_size=600,
                                    decoder_vocab_size=1600,
                                    embedding_size=20,
                                    attention=False,
                                    bidirectional=False,
                                    debug=False,
                                    time_major=True)
        self.location = ["杭州", "重庆", "上海", "北京"]
        self.user_info = {"__username__": "yw", "__location__": "重庆"}
        self.robot_info = {"__robotname__": "Rr"}
        self.dec_vocab = {}
        self.enc_vocab = {}
        self.dec_vecToSeg = {}
        tag_location = ''
        with open(self.encoder_vocabulary, "r") as enc_vocab_file:
            for index, word in enumerate(enc_vocab_file.readlines()):
                self.enc_vocab[word.strip()] = index
        with open(self.decoder_vocabulary, "r") as dec_vocab_file:
            for index, word in enumerate(dec_vocab_file.readlines()):
                self.dec_vecToSeg[index] = word.strip()
                self.dec_vocab[word.strip()] = index
コード例 #6
0
    def __init__(self):
        tf.reset_default_graph()  # 用于清除默认图形堆栈并重置全局默认图形。

        self.encoder_vec_file = "./tfdata/enc.vec"
        self.decoder_vec_file = "./tfdata/dec.vec"
        self.encoder_vocabulary = "./tfdata/enc.vocab"
        self.decoder_vocabulary = "./tfdata/dec.vocab"
        self.batch_size = 1
        self.max_batches = 100000
        self.show_epoch = 100
        self.model_path = './model/'

        self.model = dynamicSeq2seq(encoder_cell=LSTMCell(40),
                                    decoder_cell=LSTMCell(40),
                                    encoder_vocab_size=600,
                                    decoder_vocab_size=1600,
                                    embedding_size=20,
                                    attention=False,
                                    bidirectional=False,
                                    debug=False,
                                    time_major=True)
        self.location = ["杭州", "重庆", "上海", "北京"]
        self.dec_vocab = {}
        self.enc_vocab = {}
        self.dec_vecToSeg = {}
        tag_location = ''
        with open(self.encoder_vocabulary, "r") as enc_vocab_file:
            # enumerate用于将一个可遍历的数据对象(如列表、元组或字符串)组合为一个索引序列,同时列出数据和数据下标
            # readlines用于读取所有行(直到结束符EOF)并返回列表
            for index, word in enumerate(enc_vocab_file.readlines()):
                # strip用于移除字符串头尾指定的字符(默认为空格或换行符)或字符序列
                self.enc_vocab[word.strip()] = index
        with open(self.decoder_vocabulary, "r") as dec_vocab_file:
            for index, word in enumerate(dec_vocab_file.readlines()):
                self.dec_vecToSeg[index] = word.strip()
                self.dec_vocab[word.strip()] = index