def __init__(self):
     self.char_dict = wordDict()  # 词典
     self.char2vec = word2Vec()  # 词向量 gensim
     self.embedding = self.char2vec.get_embedding()
     self._build_layers()
     self.model_path = [
         None, './models/new', './models/thirdSentence/', './models/new'
     ]
     self.loss_path = [f'./result/loss/{i}_sentence' for i in range(4)]
     self.attention_img_path = [f'./result/attention/']
 def __init__(self):
     self.char_dict = wordDict()
     self._pron_dict = dict()
     with open(_pinyin_path, 'r') as fin:
         for line in fin.readlines():
             toks = line.strip().split()
             ch = chr(int(toks[0], 16))
             if ch not in self.char_dict:
                 continue
             self._pron_dict[ch] = []
             for tok in toks[1:]:
                 self._pron_dict[ch].append((tok[:-1], int(tok[-1])))
Beispiel #3
0
def _gen_word2vec():
    print("Generating word2vec model ...")
    word_dict = wordDict()
    poems = Poems()
    poems = [poem[0] + poem[1] + poem[2] + poem[3] for poem in poems]
    print(poems[1])
    model = models.Word2Vec(poems, size=WORD_VEC_DIM, min_count=1)  # 低频词比较多
    embedding = uniform(-1.0, 1.0, [len(word_dict), WORD_VEC_DIM])
    for i, ch in enumerate(word_dict):
        if ch in model:
            embedding[i, :] = model[ch]
    np.save(word2vec_path, embedding)
def _gen_poems():
    print("Parsing poems ...")
    word_dict = wordDict()
    with open(poems_path, 'w', encoding='utf-8') as fout:
        for corpus in _corpus_list:
            with open(os.path.join(raw_dir, corpus), 'r',
                      encoding='utf-8') as fin:
                for line in fin.readlines():
                    sentences = split_sentences(line)
                    if len(sentences[0].split()) != 3:
                        continue
                    all_word_in_dict = True
                    for sentence in sentences:
                        sentence = sentence.strip().split()
                        for ch in sentence:
                            if word_dict.word2int(ch) < 0:
                                all_word_in_dict = False
                                break
                        if not all_word_in_dict:
                            break
                    if all_word_in_dict:
                        fout.write('|'.join(sentences) + '\n')
            print("Finished parsing %s." % corpus)
from paths import save_dir
from pron_dict import PronDict
from random import random
from singleton import Singleton
from utils import WORD_VEC_DIM, NUM_OF_SENTENCES
import numpy as np
import os
import sys
import tensorflow.compat.v1 as tf
import time
import matplotlib.pyplot as plt
BATCH_SIZE = 128
NUM_UNITS = 128
LEN_PER_SENTENCE = 5
_model_path = os.path.join(save_dir, 'model')
WORD_DICT_SIZE = wordDict().__len__()
model_load_path = {
    0: './models/pair',
    1: './models/pair',
    2: './models/pair',
    3: './models/pair',
    4: './models/pair',
}
model_save_path = {
    0: './models/pair',
    1: './models/sentence1',
    2: './models/sentence2',
    3: './models/sentence3',
    4: './models/sentence4',
}
result_save_path = {
Beispiel #6
0
 def __init__(self):
     '''if not check_uptodate(word2vec_path):
         _gen_word2vec()'''
     self.embedding = np.load(word2vec_path)
     self.word_dict = wordDict()
Beispiel #7
0
from plan import Planner
from generator_2021 import Generator
from typing import Set, List
import os
from word_vec2 import word2Vec
from word_dict import wordDict
os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'

# For testing purpose.
if __name__ == '__main__':
    word_dict = wordDict()
    word2vec = word2Vec()
    generator = Generator()
    i = 0
    while True:
        i += 1
        hint: str = input("Type first sentence: ")
        print(hint)
        keywords = [i for i in list(hint) if word_dict.word2int(i) != -1]
        keyword = keywords[0]
        keywords = keywords[1:] + word2vec.similar_word_(
            keywords[0], 5 - len(keywords))
        keywords = [keyword] + [i for i in keywords if i != keyword]
        print("Keywords: ", keywords)
        keywords = [' '.join(list(i)) for i in keywords]
        poem: List[str] = generator.generate_by_multiple_models(
            keywords, 0, './result/demo', '', f'{i}')
        output = ''.join(poem.split()).strip('^').replace('$', '\n')
        print("Poem: \n", output)
        with open("./result/demo/result.txt", 'a', encoding='utf-8') as f:
            f.write("Input: " + hint + '\n')