def twitter(file: str, n: int, words: int, method: str): ''' Returns the list of generated tweets file -> name of file n -> number of n-grams words -> how many tweets to generate method -> method used to generate ''' file = open(file, 'r') text = file.read() tagged = StanfordTagger(verbose=True).tag(text) file.close() detok = StanfordDetokenizer() model = LVGNgramGenerator(tagged, n) methods = {'b': model.generate_without_pos, 'n': model.generate, 't': model.generate_alternative} result = [] x = 0 while x < words: num_words = str(random.randint(20, 65)) if num_words.isdigit(): if method in methods: result.append(detok.detokenize(' '.join(methods[method](int(num_words))))) x += 1 final = [] for i in result: if i.startswith("forced"): pass else: final.append(i) return final
def generate(self): """ Generates text using provided source text data """ if self.data is not None: with open(self.data, 'r') as data_file: text = data_file.read() tagged = StanfordTagger(verbose=True).tag(text) if self.save_tagged_data: save_file = self.data + ".tags" with open(self.data + ".tags", 'w') as save_file: save = csv.writer(save_file) save.writerows(tagged) elif self.tagged_data is not None: with open(self.tagged_data, 'r') as data_file: tagged = csv.reader(data_file) tagged = [tuple(row) for row in tagged] detok = StanfordDetokenizer() model = LVGNgramGenerator(tagged, self.n_gram) methods = {'b': model.generate_without_pos, 'n': model.generate, 't': model.generate_alternative} while True: num_words = input('\nEnter the length in words to generate (or "q" to exit): ') if num_words.isdigit(): method = input('Enter a generation method {b: baseline, n: normal, t: tuned}: ') if method in methods: print("\n\t" + detok.detokenize(' '.join(methods[method](int(num_words))))) elif num_words == 'q': break
def yelp_or_gutenberg(file: str, n: int, words: int, method: str): ''' Returns the list of generated sentences file -> name of file n -> number of n-grams words -> how many sentences to generate method -> method used to generate ''' file = open(file, 'r') tag = csv.reader(file) tagged = [] try: for row in tag: tagged.append(tuple(row)) except: pass file.close() detok = StanfordDetokenizer() model = LVGNgramGenerator(tagged, n) methods = {'b': model.generate_without_pos, 'n': model.generate, 't': model.generate_alternative} x = 0 result = [] final = [] while x < words: num_words = str(random.randint(20, 65)) if num_words.isdigit(): if method in methods: result.append(detok.detokenize(' '.join(methods[method](int(num_words))))) x += 1 for i in result: if i.startswith("forced"): pass else: final.append(i) return final