class MarkovOnTopic(object): ''' TODO: ngrams bumped to four or five-grams TODO: first ngram is the technical noun topic of the tweet TODO: stem the topics to narrow them. or even better find a way to standardize synonyms to the same word and then stem TODO: db key has no hashmarks but the values do TODO: db key in lowercase, but values are original ''' def __init__(self, db_path='markov.db'): try: self.mc = MarkovChain(db_path, verbose=False) except: print('No database found at path. Creating new database.') self.mc = seed_db(db_path) def generate_db(self, docs, filename=None): self.docs = docs pass def generate_topics(self): pass def generate_string(self, seed=None): regen = True while regen: if seed: gen_text = self.mc.generateStringWithSeed(seed) else: gen_text = self.mc.generateString() if not drop(gen_text): print gen_text regen = False
def generate_seedless_markov_sentence(): mc = MarkovChain(verbose=False) mc.generateDatabase((' '.join(get_text()))) sent = mc.generateString() if check_blacklist(sent): return '' else: return sentence_case(sent)