def synonymsDatamuse(term, max): api = datamuse.Datamuse() response = api.words(ml=term.lower()) resultslist = [] counter = 0 for i in response: if (i.get('tags') and ('syn' in i['tags'])): if (counter < max): word = i['word'].lower().replace("_", " ") resultslist.append(word) counter += 1 else: break if (len(resultslist) == 0 and len(response) > 0): counter2 = 0 maxwords = min(5, len(response)) for i in response: if (i.get('tags') and ('ant' not in i['tags'])): if (counter2 < maxwords): word = i['word'].lower().replace("_", " ") resultslist.append(word) counter2 += 1 else: break return resultslist
def give_pickup_lines(key_word, num_lines_weight=2, num_lines_random=1): """ Inputs: key_word - the category for which you want related pickuplines num_lines_weight - how many relevant pickup lines you want based on weights num_lines_random - how many relevant pickup lines you want that are random """ api = datamuse.Datamuse() related_words = get_related_words(key_word) category_dict = find_category(related_words) if category_dict == False: trigger_words = get_trigger_words(related_words) new_category, all_dicts = create_category(key_word, trigger_words, all_dicts) num_lines_random, num_lines_weight = check_num_lines( new_category, num_lines_weight, num_lines_random) pul_list, temp_dict = find_top_weights(new_category, num_lines_weight) else: num_lines_random, num_lines_weight = check_num_lines( category_dict, num_lines_weight, num_lines_random) pul_list, temp_dict = find_top_weights(category_dict, num_lines_weight) return pul_list, category_dict
def getDatamuse(): api = datamuse.Datamuse() source = 'datamuse' def callApi(terms, gender): # to store words from this source and check for multiple definitions of the same word words = [] for term in terms: results = api.words(ml=term, max=1000, md='dp') for result in results: word = result['word'].lower() # check if it's a noun if word not in discardSet: if ('tags' in result and 'n' in result['tags']): if ('defs' in result): definition = result['defs'] else: definition = getWordDefinition(word) if (definition != ' ' and definition is not None and len(definition) > 0): if word in wordSet: entry = findWordInArray(word, words) if entry is not None: addDefinition(entry, definition) continue elif (word not in wordSet and isValidWord(word)): processWord(word, definition, source, words, gender) allWords.extend(words) callApi(femaleTermsArr, 'female') callApi(maleTermsArr, 'male') print('datamuse done')
def get_rhymes(input_lyric): # Get words that rhyme with input word api = datamuse.Datamuse() rhymes = api.words(rel_rhy=input_lyric, max=75) sorted_output = sorted(rhymes, key=lambda i: i['numSyllables']) rhymes = [x['word'] for x in sorted_output] return [word.replace('"', '') for word in rhymes]
def suggest_word_datamuse(w, topics, max_res=100): datamuseapi = datamuse.Datamuse() results = [] mean = pd.DataFrame.from_dict( datamuseapi.words(ml=w, topics=topics, max=max_res)) mean['tag'] = 'means' results.append(mean) syn = pd.DataFrame.from_dict( datamuseapi.words(rel_syn=w, topics=topics, max=max_res)) syn['tag'] = 'synonym' results.append(syn) typ = pd.DataFrame.from_dict( datamuseapi.words(rel_spc=w, topics=topics, max=max_res)) typ['tag'] = 'type' results.append(typ) exm = pd.DataFrame.from_dict( datamuseapi.words(rel_gen=w, topics=topics, max=max_res)) exm['tag'] = 'exampleOf' results.append(exm) comp = pd.DataFrame.from_dict( datamuseapi.words(rel_com=w, topics=topics, max=max_res)) comp['tag'] = 'hasComponent' results.append(comp) partof = pd.DataFrame.from_dict( datamuseapi.words(rel_par=w, topics=topics, max=max_res)) partof['tag'] = 'partOf' results.append(partof) output = pd.concat(results, ignore_index=True, sort=True) return output
def get_related_words(key_word, n=10): """Input: keyword, n Output: list containing keyword and up to n related words (10 by default)""" api = datamuse.Datamuse() related_words = [] related_words.append(key_word) related_words += (get_list(api.words(ml=key_word, max=n), key_word)) return related_words
def get_trigger_words(related_words, key_word, n=10): """Input: list of related_words, n Output: list containing all of the related words, as well as n trigger words per related words """ api = datamuse.Datamuse() trigger_words = [] trigger_words += related_words trigger_words += (get_list(api.words(rel_trg=key_word, max=n), key_word)) return trigger_words
def create_list_of_near_names(name): api = datamuse.Datamuse() list_of_dictionaries = api.words(sl=name, max=10) list_of_names = [] for i in list_of_dictionaries: if i['score'] == 100: list_of_names.append(i['word']) if name not in list_of_names: list_of_names.append(name) return list_of_names
def get_datamuse_sysnonymous(word): api = datamuse.Datamuse() terms = api.words(ml=word) # return terms syns = [] for t in terms: if 'tags' in t and 'n' in t['tags']: #and len(t['word'].split(' '))==1: syns.append(t) #syns.append(t['word']+':'+str(t['score'])) return syns
def findNearNames(name): api = datamuse.Datamuse() listofDictionaries = api.words(sl=name, max=10) nameSet = set() for i in listofDictionaries: if i['score'] == 100: nameSet.add(i['word']) nameSet.add(name) nameList = list(nameSet) if len(nameSet) > 5: nameList = nameList[:4] return nameList
def __init__(self, lyrics: str): """initialize song object Arguments: string {str} -- string to analyze """ self.api = datamuse.Datamuse() self.lyrics = lyrics replace_all_punc = re.sub("[.,:?!;\-()']", "", self.lyrics) self.lyrics_array = re.split("[ |\n]", replace_all_punc) self.rhymes = self.generate_rhymes(self.lyrics_array) self.blacklist = ['a', 'the', 'can', 'an']
def get_rhyme_dict(top_words, api=None): print('Getting rhyming dictionaries...') rhyme_dict = {} if not api: api = datamuse.Datamuse() for word in tqdm(top_words): rhymes = api.words(rel_rhy=word, max=20) rhymes = [x['word'] for x in rhymes] near_rhymes = api.words(rel_nry=word, max=20) near_rhymes = [x['word'] for x in near_rhymes] all_rhymes = rhymes + near_rhymes rhyme_dict[word] = all_rhymes return rhyme_dict
def get_datamuse_sysnonymous(word, size): api = datamuse.Datamuse() terms = api.words(ml=word) # return terms syns = [] for t in terms: if 'tags' in t and 'n' in t['tags']: # #syns.append(t) syns.append(t['word']) if size != -1: if len(syns) > size: return syns[0:size] return syns
def __init__(self, context=None): """ context -- context to be parsed and used for context (can be raw or nltk.Text) """ self.datamuse = datamuse.Datamuse() self.syllable_dict = {} self.last_seen_word = None self.contextual = False if context: self.context = context self.contextual=True self._prepare_for_poet() print("contextual mode") if self.contextual else print("free mode")
def downloadData(): api = datamuse.Datamuse() try: words = open("words_alpha.txt", encoding='utf-8') rhymes = open("rhymes.txt", 'a', encoding='utf-8') progress = open("progress.txt", 'r', encoding='utf-8') line_no = int(progress.readline()) progress.close() line_begin = line_no print(line_no) for i, line in enumerate(words): if (i == line_no): if (line_no < line_begin + 75000): line = line.rstrip() try: api_rhymes = api.words(rel_rhy=line, max=5) [ rhymes.write(line + ';' + rh["word"] + '\n') for rh in api_rhymes ] print(line) except: print("json decode on " + line) if len(api_rhymes) < 5: try: near_rhymes = api.words(rel_nry=line, max=5) [ rhymes.write(line + ';' + rh["word"] + '\n') for rh in near_rhymes ] print(line) except: print("json decode on " + line) rhymes.flush() line_no = line_no + 1 else: break finally: progress = open("progress.txt", 'w', encoding='utf-8') progress.write(str(line_no)) words.close() rhymes.close() progress.close()
def get_rhyming_words(to_rhyme): """ params: str This function uses Datamuse to check for rhyming words with the last syllable of the user's name. It checks against the original last syllable, calls get_alternate_syllable if that yields no matches, and returns empty list if neither yields matches. returns: list """ api = datamuse.Datamuse() # Does first check for exact rhymes for last syllable rhyme_matches = api.words(rel_rhy=to_rhyme, md='p' 's') match_list = get_list(rhyme_matches) # Does second check for exact rhymes for alternate syllable if match_list == []: alt_syll = get_alternate_syllable(to_rhyme) rhyme_matches = api.words(rel_rhy=alt_syll, md='p' 's') match_list = get_list(rhyme_matches) # Does third check for common English words url = "http://www.desiquintans.com/downloads/nounlist/nounlist.txt" file = urllib.request.urlopen(url) text_list = [] for line in file: decoded_line = line.decode("utf-8") decoded_line = decoded_line.strip() text_list.append(decoded_line) matched_list = [] for rhyme in match_list: for word in text_list: if rhyme == word: matched_list.append(rhyme) if matched_list == []: return match_list else: return matched_list
def bfs(u, v) -> list: api = datamuse.Datamuse() SFMap = {} q = deque() q.append(u) SFMap[u.title] = NodeData(0, None) lst = createList(v, api) while q: u = q.popleft() print("Popping " + u.title + "...") if u == v: return pathify(SFMap, v) for pageTitle in u.links: if (len(list) == 1 or pageTitle.lower() in lst): try: wikiPage = wikipedia.page(pageTitle, auto_suggest=False) except wikipedia.DisambiguationError: continue except wikipedia.PageError: continue pageDist = 1 + SFMap.get(u.title).dist if (wikiPage.title not in SFMap): print("Adding " + pageTitle + "...") SFMap[wikiPage.title] = NodeData(pageDist, u) q.append(wikiPage) if (len(q) % 100 == 0): print("Queue Size: " + str(len(q)) + " articles!") print("\n") #if (pageTitle == v.title): if (wikipage == v): q.appendleft(wikiPage) break return []
def api(word): """ Requires: word is a string Modifies: nothing Effects: uses datamuse api to return list of words that rhyme with word """ # makes rhymer an instance of Datamuse class rhymer = datamuse.Datamuse() # makes rhymes_dict a list of dictionaries # that contain the rhyming words and some # extra info that we don't need perfect_rhymes = rhymer.words(rel_rhy=word) near_rhymes = rhymer.words(rel_nry=word) rhymes_dict = perfect_rhymes rhymes_dict += near_rhymes # removes unneeded key/value pairs from rhymes_dict exclude1 = 'score' exclude2 = 'numSyllables' for i in range(len(rhymes_dict)): for j in range(len(rhymes_dict[i])): if exclude1 in rhymes_dict[i]: del rhymes_dict[i][exclude1] elif exclude2 in rhymes_dict[i]: del rhymes_dict[i][exclude2] # makes u_rhymes a list of unicode strings # that rhyme with word u_rhymes = [] for i in range(len(rhymes_dict)): u_rhymes += rhymes_dict[i].values() # converts unicode strings in u_rhymes to UTF8 strings # and assings this new list to rhymes rhymes = [i.encode('UTF8') for i in u_rhymes] return rhymes
def give_pickup_lines(key_word, num_lines_weight=2, num_lines_random=3): """ Inputs: key_word - the category for which you want related pickuplines num_lines_weight - how many relevant pickup lines you want based on weights num_lines_random - how many relevant pickup lines you want that are random """ #setting up datamuse api api = datamuse.Datamuse() #pickling in the database of pickup lines pickle_files = find_pickled_files() all_dicts = get_dicts(pickle_files) # print(all_dicts) while 1: num_lines_weight = 2 num_lines_random = 1 #taking a keyword and searching for relevant categories key_word = standardize_format( input( "Beep boop! Give me one key word and I'll give you pickup lines! Beep boop! \n" )) related_words = get_related_words(key_word) category_dict = find_category(related_words) #if no categories exist, the script makes its own and returns it if category_dict == False: print( "Hmm... I don't think anyone's asked for that key word before! Let me search around in my bigger database." ) trigger_words = get_trigger_words(related_words) new_category, all_dicts = create_category(key_word, trigger_words, all_dicts) num_lines_random, num_lines_weight = check_num_lines( new_category, num_lines_weight, num_lines_random) if num_lines_random + num_lines_weight == 0: print( "Whoops, didn't find any relevant pickup lines for that. Feel free to try other words though!" ) pul_list, temp_dict = find_top_weights(new_category, num_lines_weight) for pul in pul_list: print("\n" + pul + "\n") user_input = input( "Give me feedback on the pickup line! \n Type good if it was good \n Type okay if it was okay \n Type bad if it was bad \n Type wrong if it was irrelavent \n" ) new_category, all_dicts = adjust_weight( user_input, key_word, new_category, pul, all_dicts) serialize_dicts(all_dicts) pul_list, temp_dict = find_random(temp_dict, num_lines_random) for pul in pul_list: print("\n" + pul + "\n") user_input = input( "Give me feedback on the pickup line! \n Type good if it was good \n Type okay if it was okay \n Type bad if it was bad \n Type wrong if it was irrelavent \n" ) new_category, all_dicts = adjust_weight( user_input, key_word, new_category, pul, all_dicts) serialize_dicts(all_dicts) #if a category does exist, the script returns the relevant category else: num_lines_random, num_lines_weight = check_num_lines( category_dict, num_lines_weight, num_lines_random) if num_lines_random + num_lines_weight == 0: print( "Whoops, didn't find any relevant pickup lines for that. Feel free to try other words though!" ) pul_list, temp_dict = find_top_weights(category_dict, num_lines_weight) for pul in pul_list: print("\n" + pul + "\n") user_input = input( "Give me feedback on the pickup line! \n Type good if it was good \n Type okay if it was okay \n Type bad if it was bad \n Type wrong if it was irrelavent \n" ) category_dict, all_dicts = adjust_weight( user_input, key_word, category_dict, pul, all_dicts) serialize_dicts(all_dicts) pul_list, temp_dict = find_random(temp_dict, num_lines_random) for pul in pul_list: print("\n" + pul + "\n") user_input = input( "Give me feedback on the pickup line! \n Type good if it was good \n Type okay if it was okay \n Type bad if it was bad \n Type wrong if it was irrelavent \n" ) category_dict, all_dicts = adjust_weight( user_input, key_word, category_dict, pul, all_dicts) serialize_dicts(all_dicts)
def __init__(self, bot, db): self.bot = bot self.db = db self.api = datamuse.Datamuse()
def __init__(self): self.api = datamuse.Datamuse() self.app = ClarifaiApp(api_key=config.get_api_key()) self.workflow = Workflow(self.app.api, workflow_id="rebus-workflow")
print(define_word('food')) print('food' in words.words())""" #from datamuse import datamuse #dm = datamuse.Datamuse() #orange_rhymes = dm.words(rel_rhy='orange', max=5) #print(orange_rhymes) #import requests #import re #import xml #r = requests.get('http://en.wiktionary.org/w/index.php?title=test&printable=yes') #c = requests.get('http://en.wiktionary.org/w/index.php', params={'title': 'test', 'printable': 'yes'}) #print(r.text) #import requests # we will be using requests to query the unofficial Google Dictionary API # from https://googledictionaryapi.eu-gb.mybluemix.net/ #r = requests.get('https://mydictionaryapi.appspot.com', params={'define': 'test'}) #print(r.text["meaning"]) from datamuse import datamuse dm = datamuse.Datamuse() w = dm.words(rel_jjb='industrialization', md='d,f', max=10) for word in w: print(word['word']) print(word['tags'])
import tweepy from datamuse import datamuse from random import randint CONSUMER_KEY = "CVFmG4DSOvGc1gVHtBP5qbYzJ" CONSUMER_SECRET = "t4ouGgBMNGbabfrSRe7k0rZUVdTIDGNYa7tqL4bEeoEYKitE2u" ACCESS_TOKEN = "1091439390541271040-otarsi1XOiPZU5vw5L8yqkkNo0NX12" ACCESS_TOKEN_SECRET = "nq41iqJPyrAHhqywycDn4HpYqLnSta2i412LsHBNjFSVk" auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET) auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET) api = tweepy.API(auth) api2 = datamuse.Datamuse() #public_tweets = api.home_timeline() #for tweet in public_tweets: # print (tweet.text) def word(str): b = api2.words(rel_rhy=str, max=10) return (b) print("Enter a word fam") userWord = input("") anyword = word(userWord) #dict1 = word[2] #print(dict1) #del dict1['score']
def uploaded_file(filename, s, e): import fitz import pytesseract pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files (x86)\Tesseract-OCR\tesseract.exe" pdffile = filename doc = fitz.open('static' + '/' + filename) for i in range(int(s) - 1, int(e)): page = doc.loadPage(i) # number of page pix = page.getPixmap() output = "outfile" + str(i) + ".png" pix.writePNG(output) x = '' for i in range(int(s) - 1, int(e)): x += pytesseract.image_to_string(f'outfile{str(i)}.png') from PyDictionary import PyDictionary from summa import keywords from summa.summarizer import summarize import nltk from nltk.tokenize import sent_tokenize from docx import Document f = x b = str(filename.replace('.pdf', '')) a = x a = keywords.keywords(a) dictionary = PyDictionary() a = a.split('\n') a1 = [] for i in a: x = i.split(' ') for j in x: a1.append(j) a1.sort(key=lambda s: len(s)) a1.reverse() try: a1 = a1[:20] except: pass a = set(a1) a = tuple(a1) a1 = [] for i in range(10): try: a1.append(a[i]) except: pass from nltk.stem import WordNetLemmatizer lemmatizer = WordNetLemmatizer() a = a1 a1 = [] for i in a: a1.append(lemmatizer.lemmatize(i)) a = list(set(a1)) a1 = a a = [dictionary.meaning(i) for i in a1] z = sent_tokenize(summarize(f, ratio=0.25)) doc = Document() doc.add_heading('Notes for ' + b, 0) for i in z: doc.add_paragraph(i) doc.add_heading('Vocab Words from ' + b, 0) for i in range(len(a)): c = doc.add_paragraph(str(i + 1) + ') ') c.add_run(a1[i]).bold = True c.add_run(': ') d = str(list(a[i].values())) d = d.replace('[', '') d = d.replace(']', '') c.add_run(d) g = doc.add_paragraph('') g.add_run('Synonyms for ') g.add_run(a1[i].upper() + ': ').bold = True from datamuse import datamuse api = datamuse.Datamuse() s = api.words(ml=a1[i], max=10) s1 = [] for i in s: for j in i: if j == 'word': s1.append(i[j]) g.add_run(str(s1).replace('[', '').replace(']', '').replace("'", '')).italic = True whitelist = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ') fileName = b.replace(' ', '') fileName = ''.join(filter(whitelist.__contains__, fileName)) fileName += '.docx' doc.save(fileName) import cloudmersive_convert_api_client from cloudmersive_convert_api_client.rest import ApiException configuration = cloudmersive_convert_api_client.Configuration() configuration.api_key['Apikey'] = 'f0c513bc-8c00-4491-830e-3e83b015feb6' api_instance = cloudmersive_convert_api_client.ConvertDocumentApi( cloudmersive_convert_api_client.ApiClient(configuration)) try: # Convert Word DOCX Document to PDF api_response = api_instance.convert_document_docx_to_pdf(fileName) file = open('static/' + fileName.replace('.docx', '.pdf'), 'wb') file.write(api_response) file.close() except ApiException as e: print( "Exception when calling ConvertDocumentApi->convert_document_docx_to_pdf: %s\n" % e) myFile = fileName.replace('.docx', '.pdf') myFile2 = myFile note = Note(noteFile=str(myFile2), creator=current_user) db.session.add(note) db.session.commit() myFile = url_for('.static', filename=myFile) return render_template('notes.html', myFile=myFile)
def predict(word): api = datamuse.Datamuse() return api.suggest(s=word, max=6)
def initialize_all_dicts(): api = datamuse.Datamuse() pickle_files = find_pickled_files() all_dicts = get_dicts(pickle_files) return all_dicts
from googletrans import Translator as GoogleTrans from datamuse import datamuse translator = GoogleTrans() similar_finder = datamuse.Datamuse() def get_translation(word, src, dest='en'): if src: res = translator.translate(word, src=src, dest=dest) else: res = translator.translate(word, dest=dest) translation = {} translation['target'] = word translation['result'] = res.text translation['from_google'] = [] translation['extra_similarity'] = [] if res.extra_data['all-translations']: for item in res.extra_data['all-translations']: translation['from_google'].append( (item[0], [i[:2] + i[3:] for i in item[2]])) if len(translation['from_google']) <= 1: text = res.text # datamuse only support english similar words right now if dest == 'en': similars = similar_finder.words(ml=text, max=4) for item in similars: translation['extra_similarity'].append([ item['word'], [i for i in item.get("tags", []) if i != 'syn'], item['score']
from googletrans import Translator from datamuse import datamuse translator = Translator() api = datamuse.Datamuse() wlFinalMl = set() wlFinalTrg = set() wlTranslatedMl = set() wlTranslatedTrg = set() def generatingWordListMl(wordList): for word in wordList: #print(len(wordList)) if len(wordList) >= 15: break mlResults = api.words(ml=word, max=10) for result in mlResults: res = result.get("word") if res not in wordList: wlFinalMl.add(res) wordList.append(res) return wordList def generatingWordListTrg(wordList):
def search_products(request): products = [] serializer = ProductSearchSerializer(data=request.data) serializer.is_valid(raise_exception=True) query = serializer.validated_data['query'] if not query: return Response(data={'error': 'Query is not valid'}, status=status.HTTP_400_BAD_REQUEST) porter_stemmer = PorterStemmer() query_list = [q.strip() for q in query.split()] stem_list = [porter_stemmer.stem(q) for q in query_list] Q_strict = Q() for index in range(len(query_list)): Q_product = Q(name__icontains=query_list[index]) | Q( description__icontains=query_list[index]) | Q( name__icontains=stem_list[index]) | Q( description__icontains=stem_list[index]) Q_vendor = Q(vendor__user__username__icontains=query_list[index]) Q_strict &= Q_product | Q_vendor if bool(Q_strict): products_strict = Product.objects.filter(Q_strict) for product in products_strict: if product not in products: products.append(product) Q_soft = Q() for index in range(len(query_list)): Q_product = Q(name__icontains=query_list[index]) | Q( description__icontains=query_list[index]) | Q( name__icontains=stem_list[index]) | Q( description__icontains=stem_list[index]) Q_category = Q(subcategory__name__icontains=query_list[index]) | Q( subcategory__category__name__icontains=query_list[index]) Q_vendor = Q(vendor__user__username__icontains=query_list[index]) Q_soft |= Q_product | Q_category | Q_vendor if bool(Q_soft): products_soft = Product.objects.filter(Q_soft) for product in products_soft: if product not in products: products.append(product) datamuse_api = datamuse.Datamuse() keyword_list = datamuse_api.words(ml=query, max=5) Q_datamuse = Q() for keyword in keyword_list: word = keyword['word'] Q_product = Q(name__icontains=word) | Q( description__icontains=word) | Q( subcategory__name__icontains=word) | Q( subcategory__category__name__icontains=word) Q_vendor = Q(vendor__user__username__icontains=word) Q_datamuse |= Q_product | Q_vendor if bool(Q_datamuse): products_datamuse = Product.objects.filter(Q_datamuse) for product in products_datamuse: if product not in products: products.append(product) # Add prodcuts to search history if user is authenticated if (not request.user.is_anonymous) and products: user = request.user search_history = SearchHistory.objects.filter(user=user).first() if search_history: search_history.delete() search_history = SearchHistory(user=user) search_history.save() for product in products: search_history.products.add(product) content = ProductSerializer(products, many=True) return Response(data=content.data, status=status.HTTP_200_OK)
def rap(sentences): used = set() rhymes = [] api = datamuse.Datamuse() for s in sentences: if not s: sentences.remove(s) ending_words = get_end_words(sentences) seen = set() threads = [None] * len(ending_words) results = [[] for _ in range(len(ending_words))] for x, word1 in enumerate(ending_words): threads[x] = Thread( target=thread_perf, args=(word1, x, ending_words, results[x], seen, api, used), ) threads[x].start() sleep(THREAD_WAIT) for i in range(len(threads)): threads[i].join() for r in results: if r: for e in r: rhymes.append(e) print(rhymes) seen2 = set() if len(rhymes) < USE_MORE_MAX: print("USING MORE") threads2 = [None] * len(ending_words) results2 = [[] for _ in range(len(ending_words))] for x, word1 in enumerate(ending_words): threads2[x] = Thread( target=thread_almost, args=(word1, x, ending_words, results2[x], seen2, api, used), ) threads2[x].start() sleep(THREAD_WAIT) for i in range(len(threads2)): threads2[i].join() for r in results2: if r: for e in r: rhymes.append(e) print(rhymes) rhymes.sort(key=lambda x: abs(x[2] - x[3])) rhymes.sort(key=lambda x: x[2]) couplets = [] for r in rhymes: couplets.append((sentences[r[2]].replace(u'\xa0', u' '), sentences[r[3]].replace(u'\xa0', u' '))) # couplets.sort(key=lambda x: abs(len(x[0]) - len(x[1]))) # couplets.sort(key=lambda x: len(x[0])) couplets = [ c for c in couplets if len(c[0]) < MAX_LINE_LENGTH and len(c[1]) < MAX_LINE_LENGTH ] return couplets