def convert_to_signed(s: Sentence): conn, a = s.getComponents() if a is None: return [s.generate('signtrue')] + s.reduceBrackets() else: (_, form) = a if conn.startswith('not_'): return [s.generate('signfalse')] + form.reduceBrackets() else: return [s.generate('signtrue')] + s.reduceBrackets()
def into_sentence(prefix: list[int], conn_dict: dict[int, tp.Iterable[str]], var_amount: int, var_type: str, sess) -> Sentence: s = Sentence([], sess) variables = [] for _ in range(var_amount): t = s.generate(var_type) s.append(t) variables.append(t) s = Sentence([], sess) _into_sentence(s, prefix, conn_dict, variables) return s
def _into_sentence(s: Sentence, prefix: list[int], conn_dict: dict[int, tp.Iterable[str]], variables: list[str]): l = prefix[0] if l == 0: s.append(rchoice(variables)) else: possible_main = conn_dict[l] main = s.generate(rchoice(possible_main)) if l == 2: # INFIX s.append('(') _into_sentence(s, prefix[1:], conn_dict, variables) s.append(main) _into_sentence(s, prefix[1:], conn_dict, variables) s.append(')') else: s.append(main) for _ in range(l): _into_sentence(s, prefix[1:], conn_dict, variables)
def add_prefix(sentence: Sentence, prefix: str, lexem: str = None) -> Sentence: """Dodaje prefiks do zdania :param sentence: Zdanie do modyfikacji :type sentence: Sentence :param prefix: Typ prefiksu (`x` w `x_y`) :type prefix: str :param lexem: Leksem prefiksu (`y` in `x_y`) :type lexem: str :return: Zmieniony prefiks :rtype: Sentence """ token = sentence.generate(prefix) if not lexem else f"{prefix}_{lexem}" if len(sentence) == 1: return Sentence([token, *sentence], sentence.S) new_record = {0: sentence.calcPrecedenceVal(prefix)} return Sentence( [token, '(', *sentence, ')'], sentence.S, {i + 2: j + 1 for i, j in sentence.precedenceBaked.values()} | new_record)
object* = null | object ''' corpus = ''' article = 这 这个 这部 object = 电影 影片 片子 片儿 他 她 他们 她们 它 它们 男主 男主角 男主角儿 女主 女主角 女主角儿 男配角 女配角 观众 导演 adv = 太 十分 相当 相当的 真是 真的是 很 非常 非常非常 特别地 相当地 令人 让人 竟然 居然 adj = 精彩 吸引人 绝了 值得一看 好看 引人入胜 感人 动容 可怜 愤怒 气人 可恶 垃圾 难看 不真实 不现实 帅 崇拜 美丽 漂亮 可恨 惋惜 牛 厉害 有水平 水平不行 辛苦 水了 差 不咋地 刮目相看 赞 赞了 verb = 出场 上场 出现 消失 牺牲 杀了 打败了 击败 击败了 爱上了 喜欢上 看上了 相中了 憎恨 不喜欢 仇恨 复仇 ''' # adv = 神秘地 # adj = 不正经 snt_grt = Sentence() snt_grt.set_corpus(corpus) snt_grt.set_grammar(gram) sentece_eg = snt_grt.generate('sentence') print(sentece_eg) # 2元语法模型 t1 = time() data_df = pd.read_csv('input/movie_comments.csv') model = TwoGrams() model.train(data_df.loc[:20000, 'comment']) stn = '导演太让人喜欢了' print('(p,ppl){}={}'.format(stn, model.prob_sentence(stn))) stn1 = '高晓松是土肥圆?' print('(p,ppl){}={}'.format(stn1, model.prob_sentence(stn1))) # best sentence print(generate_best(snt_grt, model, 100000, 20))
def test_sentence_generator(self): random_sentence = Sentence() self.assertEqual(random_sentence.generate(), ("Colorless green ideas sleep furiously"))
from sentence import Sentence import config as config logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG) if __name__ == '__main__': # update mimicry model tweet twitter = Twitter(consumer_key=config.CONSUMER_KEY, consumer_secret=config.CONSUMER_SECRET, access_token_key=config.ACCESS_TOKEN, access_token_secret=config.ACCESS_TOKEN_SECRET) twitter.update_status(config.MIMICRY_MODEL) # setup natural language recognition env. message = twitter.get_latest_status_text(config.MIMICRY_MODEL) if message is None: logging.error("Mimicry target's tweet is None.") exit() semantics = Semantics() similar_words = semantics.get_similar_words(message) train_posts = twitter.get_all_status_text() sentence = Sentence() sentence.learn(train_posts) generated_message = sentence.generate(similar_words) twitter.post(generated_message)