def find_lines(emotion: str, rhyming_partials: List[Dict]): """ Creates combinations of ending lines (3rd and 4th) from some knowledgebase. """ data = read_json_file("data/bible_kjv_wrangled.json") keys, last_words_of_sentences = extract_keys_sentences_last_words(data) if DEBUG: print( f'\nchoose_lines,\n\tkeys length {len(keys)}' f'\n\n\tlength last words of sentences {len(last_words_of_sentences)}' ) ret = [] for partial in rhyming_partials: for word in partial['rhymes']: rhyming_sentences = [] indices = [ i for i, x in enumerate(last_words_of_sentences) if x == word ] if indices: for ix in indices: rhyming_sentences.append(keys[ix]) if DEBUG: print(f'rhyming sentences in choose lines {rhyming_sentences}') for generated_partial in generate_partials(data, partial, rhyming_sentences): ret.append(generated_partial) return ret
def train_model(): data = read_json_file("data/bible_kjv_wrangled.json") sentences = list(data.values()) # Do we want everything in lowercase? sentences = [s.lower() for s in sentences] print("-----------Tokenize corpus-------------") tokenized_sentences = [] for s in sentences: tokens = nltk.word_tokenize(s) tokenized_sentences.append(tokens) for s in abc.sents(): s = list(filter(lambda x: x.isalpha() and len(x) > 1, s)) s = [x.lower() for x in s] # Do we want everything in lowercase? tokenized_sentences.append(s) for s in brown.sents(): s = list(filter(lambda x: x.isalpha() and len(x) > 1, s)) s = [x.lower() for x in s] # Do we want everything in lowercase? tokenized_sentences.append(s) print("------------TRAINING FASTTEXT-----------") model = FastText(tokenized_sentences, size=100, window=5, min_count=5, workers=4, sg=1) print("----------------DONE-------------") return model
def find_lines(emotion: str, rhyming_partials: List[Dict]): """ Creates combinations of ending lines (3rd and 4th) from some knowledgebase. """ data = read_json_file("data/bible_kjv_wrangled.json") # There probably is a better/faster way to do this using dictionaries but I dont know how rn keys = [] sentences = [] last_word_of_sentences = [] for key, value in data.items(): keys.append(key) sentences.append(value) last_word_of_sentence = value.translate(str.maketrans('', '', string.punctuation)) last_word_of_sentence = last_word_of_sentence.strip().split(' ')[-1] last_word_of_sentences.append(last_word_of_sentence.lower()) ret = [] for partial in rhyming_partials: for word in partial['rhymes']: rhyming_sentences = [] indices = [i for i, x in enumerate(last_word_of_sentences) if x == word] if indices: for ix in indices: rhyming_sentences.append(keys[ix]) third = data[random.choice(list(data))] # selects rhyming sentence if there is at least one, else select random sentence as before if rhyming_sentences: fourth = data[random.choice(rhyming_sentences)] else: continue new_partial = partial.copy() new_partial['rest'] = (third, fourth) ret.append(new_partial) return ret
should be a dictionary holding at least 'evaluation' keyword with float value. """ print("Group Roses create with input args: {} {}".format( emotion, word_pairs)) poems = self.evaluate(emotion, word_pairs, self.generate(emotion, word_pairs)) poems.sort(key=lambda x: x[1]) return list( map(lambda x: ('\n'.join(x[0]), { 'evaluation': x[1] }), poems[0:number_of_artifacts])) if __name__ == '__main__': poem_creator = PoemCreator() parser = argparse.ArgumentParser() parser.add_argument('emotion', help='Emotion for poem.') parser.add_argument('word_pairs', help='File for word pairs. Json list of lists') parser.add_argument('num_poems', help='Number of poems to output.', type=int) args = parser.parse_args() word_pairs = read_json_file(DATA_FOLDER + args.word_pairs) word_pairs = [tuple(word_pair) for word_pair in word_pairs] for poem in poem_creator.create(args.emotion, [('human', 'boss'), ('animal', 'legged')], args.num_poems): print(f'----Poem evaluated {poem[1]}\n{poem[0]}\n----')