def dictonarySynonums(self, word): synJSON = vb.synonym(word) if synJSON != False: synonyms_lists = [ dictSyno["text"].encode('ascii', 'ignore') for dictSyno in json.loads(vb.synonym(word)) ] return set(synonyms_lists) else: return set([])
def labels(self, query): """ return parsed result """ res = [] meanings = vb.meaning(query, format="list") if meanings: meanings = [ e.replace("<i>", "").replace("</i>", "").replace("[i]", "").replace("[/i]", "") for e in meanings ] for e in meanings: res.append((query, "meaning", e)) synonyms = vb.synonym(query, format="list") if synonyms: for e in synonyms: res.append((query, "synonym", e)) antonyms = vb.antonym(query, format="list") if antonyms: for e in antonyms: res.append((query, "antonym", e)) #ps = vb.part_of_speech(query, format="list") #if ps: # for e in ps: # res.append((query, "part_of_speech", e)) examples = vb.usage_example(query, format="list") if examples: for e in examples: res.append((query, "usage_example", e)) return res
def test_synonynm_found(self, mock_api_call): res = { "tuc": [{ "phrase": { "text": "get angry", "language": "en" } }, { "phrase": { "text": "mad", "language": "en" }, }] } mock_api_call.return_value = mock.Mock() mock_api_call.return_value.status_code = 200 mock_api_call.return_value.json.return_value = res expected_result = '[{"text": "get angry", "seq": 0}, {"text": "mad", "seq": 1}]' expected_result = json.dumps(json.loads(expected_result)) result = vb.synonym("angry") if sys.version_info[:2] <= (2, 7): self.assertItemsEqual(expected_result, result) else: self.assertCountEqual(expected_result, result)
def query(self, query): """ return raw result (dict) """ cons = { "meaning": [], "synonym": [], "antonym": [], "usage_example": [], "part of speech": [] } meanings = vb.meaning(query, format="list") if meanings: cons["meaning"] = [ e.replace("<i>", "").replace("</i>", "").replace("[i]", "").replace("[/i]", "") for e in meanings ] synonyms = vb.synonym(query, format="list") if synonyms: cons["synonym"] = synonyms antonyms = vb.antonym(query, format="list") if antonyms: cons["antonym"] = antonyms ps = vb.part_of_speech(query, format="list") if ps: cons["part of speech"] = ps examples = vb.usage_example(query, format="list") if examples: cons["usage_example"] = [ e.replace("[", "").replace("]", "") for e in examples ] return cons
def test_synonynm_empty_list(self, mock_api_call): res = {"result": "ok", "tuc": [], "phrase": "angry"} mock_api_call.return_value = mock.Mock() mock_api_call.return_value.status_code = 200 mock_api_call.return_value.json.return_value = res self.assertFalse(vb.synonym("angry"))
def synonym(self, word): res = 'SYNONYMS: ' synonyms = vb.synonym(word, format="list") if not synonyms: return False res += synonyms[0] res += self.more(synonyms[1:]) res += self.addLine() return res
def get_syn(x): word = x.split() syns = [] for ii in range(0, len(word)): s = vb.synonym(word[ii], format='list') if s: if s[0] not in syns: syns.append(s[0]) return syns
def parse_keyword_synonym(self, tokens): synonyms = [] for token in tokens: try: data = json.loads(vb.synonym(token))[0:self.synonym_limit] for word_txt in data: synonyms.append(word_txt["text"]) except: continue return synonyms
def synonym(text, lang='en-en'): # do not support russian src, dest = lang.split('-') response = voc.synonym(text, src, dest) if not response or response == '[]': return '' response = json.loads(response) output = "" for i, a in enumerate(response, start=1): output += '\t' + str(i) + '. ' + a['text'] + '\n' return output
def synonym(word): syn = vb.synonym(word) if (syn == False): return "No Synonyms Founds" syns = json.loads(syn) ret = "Synonyms are " i = 0 while (i < 4 and i < len(syns)): ret += syns[i]['text'] + ", " i += 1 ret = ret[:-2] return ret
def generate_synonym_list(query): """ Creates a list of synonym suggestions for important words in the query """ synonym_list = [] for word in query.split(): syn = vb.synonym(word) if syn: syn = json.loads(syn) for synonym in syn: # Generate all the synonyms synonym_list.append( ("Replace %s with %s" % (word, synonym['text']), word + " " + synonym['text'])) return synonym_list
def wordApplications(mess): if mess.find("soph") == 0 and ("define" in mess or "definition" in mess or "meaning" in mess) or mess.find("define") == 0: word = getWord(mess) if word == None: return else: result = str(d.meaning(word)) resultF = meaningParser(result) return ("***" + str(word).upper() + "*** meaning(s)" + str(resultF)) elif mess.find("soph") == 0 and ("antonym" in mess or "opposite" in mess or "different" in mess and "word" in mess) or mess.find("antonym") == 0: word = getWord(mess) if word == None: return else: result = str(v.antonym(word)) resultF = parser(result) if resultF == None or resultF == "1. ": return ("Couldn't find any :(") return ("**" + str(word) + "** antonym(s)\n" + str(resultF)) elif mess.find("soph") == 0 and ("synonym" in mess or "similar to" in mess and "word" in mess) or mess.find("synonym") == 0: word = getWord(mess) if word == None: return else: result = str(v.synonym(word)) resultF = parser(result) if resultF == None or resultF == "1. ": return ("Couldn't find any :(") return ("**" + str(word) + "** synonym(s)\n" + str(resultF)) mess = mess[:mess.find("d") + 1] revmess = mess[::-1] if revmess.find("drow") == 0 and mess.find( "soph") == 0 and " is " in mess and " a " in mess: word = mess[mess.find(" is ") + 4:mess.find(" a ")] if d.meaning(word) == None: return ("Well, it's not in my dictionary... :(") else: return ("Yeah, it is! :D")
def get_dictionary(subject): cons = {"meaning": [], "synonym": [], "antonym": [], "example": [], "part of speech": []} meanings = vb.meaning(subject, format="list") if meanings: cons["meaning"] = [e.replace("<i>", "").replace("</i>", "").replace("[i]", "") .replace("[/i]", "") for e in meanings] synonyms = vb.synonym(subject, format="list") if synonyms: cons["synonym"] = synonyms antonyms = vb.antonym(subject, format="list") if antonyms: cons["antonym"] = antonyms ps = vb.part_of_speech(subject, format="list") if ps: cons["part of speech"] = ps examples = vb.usage_example(subject, format="list") if examples: cons["example"] = [e.replace("[", "").replace("]", "") for e in examples] return cons
def modify(text: str) -> Tuple[str, str]: stopwords = get_stopwords() re_word = re.compile(r'^\w+$') word_counter = collections.Counter( w for w in text.lower().split() if w not in stopwords and re_word.match(w)) freq_words = word_counter.most_common(5) modified_text = text for word, _ in freq_words: try: syns = json.loads(Vocabulary.synonym(word)) except TypeError as e: print(f'Cannot find synonym for "{word}" ({e})') continue repl = syns[0]['text'] modified_text = re.sub(word, repl, modified_text, flags=re.I | re.M | re.S) return modified_text
def details(word): meaning = vb.meaning(word) antonym = vb.antonym(word) synonym = vb.synonym(word) usage = vb.usage_example(word) if meaning == False: meaning = 'Not Found' else: meaning = json.loads(meaning) # meaning = str(meaning[0]['text']) meaning = [unescape(meaning[i]['text']) for i in range(len(meaning))] if antonym == False: antonym = 'Not Found' else: antonym = json.loads(antonym) antonym = str(antonym[0]['text']) if synonym == False: synonym = 'Not Found' else: synonym = json.loads(synonym) synonym = str(synonym[0]['text']) if usage == False: usage = 'Not Found' else: usage = json.loads(usage) usage = str(usage[-1]['text']) values = { 'meaning': meaning, 'antonym': antonym, 'synonym': synonym, 'usage': usage } return values
def getSynonyms(word): syns = set() result = vb.synonym(word) if isinstance(result, list): result = json.loads(result) syns.update([synonym['text'] for synonym in result]) result = [] for syn in wordnet.synsets(word): for l in syn.lemmas(): syns.add(l.name()) w = Word(word) syns.update(w.synonyms()) if not syns: apiKey = os.getenv('BIG_HUGE') result = pyhugeconnector.thesaurus_entry(word=word, api_key=apiKey, pos_tag='n', ngram=2, relationship_type='syn') if isinstance(result, list): syns.update(result) return syns
def simplify(self): print(self.words) simplified = [] simplified2 = [] simplified3 = [] for word in self.model: print("Word: ", word) shortest_syn = "" # synonyms = list(chain.from_iterable([word.lemma_names() for word in wordnet.synsets(word[0])])) synonyms_json = vb.synonym(word[0]) or [] synonyms = [s["text"] for s in json.loads(synonyms_json)] if synonyms == [] or word[0].lower() in ' '.join(self.common): print("Either no synonym found or too common:", word) simplified.append(word[0]) simplified2.append(word[0]) simplified3.append(word[0]) continue print(word[0] + "-->" + word[1]) print(synonyms) best_synonym = sorted(synonyms, key=textstat.gunning_fog)[0] print(best_synonym) if "V" in word[1]: print("Cast: " + self.cast_base_verb(best_synonym, word[1])) simplified2.append(self.cast_base_verb(best_synonym, word[1])) else: print("Not a verb") simplified2.append(best_synonym) for definition in [ e for e in synonyms if e in ' '.join(self.common) ]: if len(definition) < len(shortest_syn) or shortest_syn == "": shortest_syn = definition print("Selected definition: " + str(shortest_syn)) simplified.append(shortest_syn) simplest = "" simplest_score = 100000000 for definition in [ e for e in synonyms if e in ' '.join(self.common) ]: if textstat.gunning_fog(definition) < simplest_score: simplest = definition simplest_score = textstat.gunning_fog(definition) print(simplest) if "V" in word[1]: print("Cast: " + self.cast_base_verb(simplest, word[1])) simplified3.append(self.cast_base_verb(simplest, word[1])) else: print("Not a verb") simplified3.append(simplest) simp = "" for s in simplified: simp = simp + s # if s in ",.!;:?": simp = simp + " " simp2 = "" for s in simplified2: simp2 = simp2 + s # if s in ",.!;:?": simp2 = simp2 + " " simp3 = "" for s in simplified3: simp3 = simp3 + s # if s in ",.!;:?": simp3 = simp3 + " " return [simp, simp2, simp3]
def test_synonynm_not_found(self, mock_api_call): mock_api_call.return_value = mock.Mock() mock_api_call.return_value.status_code = 404 self.assertFalse(vb.synonym("angry"))
def obtain_result(named_entity, query_properties): """ an array to store property codes for wikidata query, as wikidata needs code not natural language for SPARQL query """ property_code = [] # if there are any query properties then only search for it's code if len(query_properties) != 0: # loading properties with it's code from property.json file which is property dump of wikidata properties = open('property.json', 'r') properties = json.load(properties) print(type(properties)) # extracting the first query from the query properties list noun = query_properties[0] """ finding the synonyms of the queried property using a library called vocabulary so that it matches the property from wikidata E.g. wikidata doesn't have a property wife, but it has a property spouse, so it'll consider the property code corresponding to spouse property """ noun_synonyms = vb.synonym(noun, format="dict") # checking if the query itself is a property in the property.json file for p, prop in properties.items(): if prop == noun: property_code.append(p) break # if the query is not found as a property in property.json file, then search in it's synonyms if len(property_code) == 0: # dict.items() extracts key and value from a dictionary for p, prop in properties.items(): # vocabulary returns bool if there isn't any synonym for a particular word if type(noun_synonyms) != bool: # dict.itervalues basically iterate through the values of the dictionary for synonym in noun_synonyms.itervalues(): if prop == synonym: property_code.append(p) break print(property_code) # if our NLP program fails to detect the named entity (subject) from the query sentence, then flag an error response if len(named_entity) != 0: """ if there is any query for properties then use wikidata for that E.g :- Who is the wife of Shahrukh Khan? Here, "wife" is the query property and "Shahrukh Khan" is the subject """ if len(property_code) != 0: flag = 0 sparql = SPARQLWrapper("https://query.wikidata.org/sparql") query = """SELECT ?label ?property ?thumbnail WHERE { ?entity rdfs:label ?label . ?entity wdt:""" + property_code[0] + """ ?property_id . ?property_id rdfs:label ?property . OPTIONAL { ?property_id wdt:P18 ?thumbnail .} FILTER (STR(?label) = '""" + named_entity[0] + """') . FILTER (LANG(?property) = "en") }""" print query else: """ if there is no query for any property then use dbpedia for extracting the abstract of that entity E.g : Who is Shahrukh Khan? Note: There is no query property, but there is a subject """ flag = 1 sparql = SPARQLWrapper("https://dbpedia.org/sparql") query = """SELECT ?label ?description ?thumbnail WHERE { ?entity rdfs:label ?label . ?entity dbo:abstract ?description . ?entity dbo:thumbnail ?thumbnail . FILTER (STR(?label) = '""" + named_entity[0] + """' && LANG(?description) = "en") . } LIMIT 1""" print query sparql.setQuery(query) sparql.setReturnFormat(JSON) global result result = list() temp = str() try: results = sparql.query().convert() print(results) # parsing the result obtained from SPARQLquery if flag == 0: # parsing for wikidata for data in results["results"]["bindings"]: result.append(data["property"]["value"]) # if the thumbnail do not exist for an entity if "thumbnail" in data and data["thumbnail"]["value"] != "": temp = data["thumbnail"]["value"] else: # parsing for dbpedia for data in results["results"]["bindings"]: result.append(data["description"]["value"]) # if the thumbnail do not exist for an entity if "thumbnail" in data and data["thumbnail"]["value"] != "": temp = data["thumbnail"]["value"] # removing redundant data from the result set, by using set function and then converting it to a list result = list(set(result)) response = [] for i in result: print(i) # encoding required to remove the 'u' character from the list elements, by deafult there in Python list response.append(u''.join(i).encode('utf-8')) print(response) # Response sent as result status, the data and the thumbnail for that data if flag == 0: data = {"status": "200", "data": response, "thumbnail": temp} else: data = {"status": "200", "data": response, "thumbnail": temp} result = json.dumps(data) print(results) except: response = ["Unable to retrieve data"] data = {"status": "500", "data": response} result = json.dumps(data) raise else: response = ["Unable to retrieve data"] data = {"status": "500", "data": response} result = json.dumps(data) print(result) return result
class HQTrivia(): #initialization def __init__(self): # QuickTime - MacOS has record feature for phone (best) self.use_quicktime = False self.use_input = False # the filename of the image (no extension = capturing image) self.picture = 'source' # location of where to work on self.picture self.location = os.getcwd() # Replace with your own auth file name self.google_auth_json = 'HQproject-a1a4e25e4b45.json' # wikipedia setting (english) self.wiki = wikipediaapi.Wikipedia('en') self.vb = Vocabulary() # The OCR text (directly converted from image) self.raw = '' # processed texts self.question = '' self.question_nouns = '' self.answers = {} self.lookup_info = {} # For debugging self.times = {} self.verbose = False def debug(self, msg): # in multiprocessing environments, following line helps sys.stdout.flush() print("hqtrivia-automation.py: " + str(msg)) def capture(self, ftype='tiff'): # function to selection function to capture picture if self.verbose: pre = "[DEBUG] in capture() | " self.debug(pre + "choosing how to capture...") if self.use_input: if self.verbose: self.debug(pre + "using user input") return # add extension name as 'tiff' self.picture += '.' + ftype if self.use_quicktime: if self.verbose: self.debug(pre + "quicktime") #call scan_quicktime function (take screenshot) self.scan_quicktime(ftype) def scan_quicktime(self, ftype='tiff'): # function to take screenshot via AppleScript (wire connection to computer) # To do: 1. open QuickTime player and do a movie recording # 2. Select drop down arrow next to record button, select device # Steps: 1. Get Window ID of QuickTime Player # 2. Run shell script to screen-capture the window ID if self.verbose: self.debug("[DEBUG] Starting QuickTime") start = time.time() full_path = os.path.join(self.location, self.picture) script = """tell application "QuickTime Player" set winID to id of window 1 end tell do shell script "screencapture -x -t tiff -l " & winID &""" script += ' " ' + full_path + '"' # replace 'tiff' with ftype script = script.replace('tiff', ftype) # Take screenshot s = NSAppleScript.alloc().initWithSource_(script) s.executeAndReturnError_(None) if self.verbose: diff = time.time() - start self.debug("[DEBUG] Quicktime - elapsed {!s}".format(diff)) self.times['scan_quicktime'] = diff def ocr_vision(self, queue): # Use Google Cloud Vision API to process OCR if self.verbose: pre = "[DEBUG] In ocr_vision() | " start = time.time() self.debug(pre + "starting") # Authenticate try: file_path = os.path.join(self.location, self.google_auth_json) if not os.path.isfile(file_path): if self.verbose: self.debug(pre + "no auth file") queue.put("END") return except: if self.verbose: self.debug(pre + "no auth file") queue.put("END") return # Google Vision API credential os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = file_path # Instantiates a client client = vision.ImageAnnotatorClient() # get the image file (full path) if not os.path.isfile(self.picture): full_path = os.path.join(self.location, self.picture) else: full_path = self.picture # loads the image into memory with io.open(full_path, 'rb') as image_file: content = image_file.read() image = types.Image(content=content) # text detection on the image response = client.text_detection(image=image) text = response.text_annotations for t in text: self.raw = t.description break # cleaning up the text self.raw = self.raw.split('\n') # print out raw message self.debug(pre + "raw - " + str(self.raw)) i = 0 while i < len(self.raw): value = self.raw[i].lower() if i < 2: self.raw.pop(i) self.debug("method - ocr | delete [" + value + "]") i += 1 else: i += 1 # swipe left comment self.raw.pop(-1) # Return data to parent process queue.put(self.raw) if self.verbose: self.debug(pre + "raw - cleaned" + str(self.raw)) diff = time.time() - start self.debug(pre + "elapsed {!s}".format(diff)) queue.put("END") def parse(self): # Parse the raw OCR text to find Q&A if self.verbose: pre = "[DEBUG] parsing texts | " self.debug(pre + "starting") start = time.time() # Save it to question and answer variable check_q = True count_answer = 1 for line in self.raw: #print(len(line), end=' ['+line+']\n') # check for question mark in the question if check_q: if len(line) > 2: if '?' not in line: self.question += line + ' ' else: self.question += line check_q = False else: if 'Swipe left' not in line: if len(line) > 0 and line != '-': ans = line self.answers[ans] = { "answer": ans, "keywords": [], "score": 0, "index": str(count_answer) } self.lookup_info[ans] = [] count_answer += 1 else: break # checking parsed results if '?' not in self.question: self.debug(pre + "Could not find question!") raise if len(self.answers) < 1: self.debug(pre + "Could not find answers!") raise elif len(self.answers) > 3: self.debug(pre + "Found more than three answers!") raise # Use local dictionary (nltk) to find nouns for q in nltk.pos_tag(nltk.word_tokenize(self.question)): if q[1] == 'NN' or q[1] == 'NNP': self.question_nouns += " " + q[0] self.question_nouns = self.question_nouns.strip().split(' ') if self.verbose: self.debug(pre + "question = " + str(self.question)) self.debug(pre + "nouns in question - {!s}".format(self.question_nouns)) self.debug(pre + "answer = " + str(self.answers)) diff = time.time() - start self.debug(pre + "elapsed {!s}".format(diff)) self.times["parse"] = diff def keywords(self, words): # Function to find words in a string that are also in question # and return keywords found keywords = [] for w in words: if len(w) > 2: if w in self.question_nouns: if w not in keywords: keywords.append(w) return keywords def lookup_wiki(self, queue): # Get wiki info about answer # Needs to return results to parent (for multi-processing) if self.verbose: pre = "[DEBUG] lookup_wiki() | " self.debug(pre + "starting") start = time.time() # search in wikipedia for each answer for index, ans in self.answers.items(): l_info = self.lookup_info[ans['answer']] try: page = self.wiki.page(ans['answer']) if page.exists(): try: words = [] for i in page.sections: words += i.text.split(' ') except: self.debug(pre + "issue with wikipedia for {!s}".format( ans['answer'])) else: l_info.append("[Wikipedia]: " + page.summary) queue.put( [ans['answer'], self.keywords(words), l_info]) else: a = ans['answer'].split(' ') if len(a) < 2: # Could not find page, so throw exception and move on self.debug(pre + "no results for {!s} in wikipedia... ". format(ans['asnwer'])) raise else: # Try searching each word in answer as last resort for w in a: if len(w) > 3: page = self.wiki.page(w) if page.exists(): try: words = [] for i in page.sections: words += i.text.split(' ') except: self.debug( pre + "issue with wikipedia for {!s}". format(ans['answer'])) else: l_info.append( "[Wikipedia {!s}]: ".format(w) + page.summary) queue.put([ ans['answer'], self.keywords(words), l_info ]) except: self.debug( pre + "issue with wikipedia for {!s}... ".format(ans['answer'])) self.debug(sys.exc_info()) queue.put("END") if self.verbose: self.debug(pre + "elapsed " + str(time.time() - start)) def lookup_dict_and_syn(self, queue): # Use nltk to look up word info(synonym). Use online dictionary if fails. if self.verbose: pre = "[DEBUG] lookup_dict_and_syn() | " self.debug(pre + "starting") start = time.time() # Get dictionary/synonyms for index, ans in self.answers.items(): l_info = self.lookup_info[ans['answer']] a = ans['answer'].split(' ') # incase of multi word answers for w in a: # don't waste time on looking for smaller words if len(w) > 3: # definition define = nltk.corpus.wordnet.synsets(w) synset_found = False if len(define) < 1: # local dictionary didn't find anything so search online if self.verbose: self.debug(pre + "nltk none for {!s}, using vocabulary". format(w)) try: define = self.vb.meaning(w, format='list') if define != False: # Multiple definitions possible for d in define: l_info.append( "[Meaning {!s}]: ".format(w) + d) queue.put([ ans['answer'], self.keywords(d), l_info ]) except: self.debug( pre + "issue with vocabulary for {!s}... ".format(w)) self.debug(sys.exc_info()) else: synset_found = True l_info.append("[Meaning {!s}]: ".format(w) + define[0].definition()) queue.put([ ans['answer'], self.keywords(define[0].definition()), l_info ]) # Synonyms if synset_found: synonyms = [ l.name() for s in define for l in s.lemmas() ] # Remove duplicates nltk adds s = [] i = 0 while i < len(synonyms): if synonyms[i] in s: synonyms.pop(i) else: s.append(synonyms[i]) i += 1 syn = ', '.join(s) l_info.append("[Synonyms {!s}]: ".format(w) + syn) queue.put([ans['answer'], self.keywords(syn), l_info]) else: # Local dictionary didn't find anything so search online self.debug( pre + "nltk has nothing for {!s}, using vocabulary". format(w)) try: synonyms = self.vb.synonym(w, format='list') if synonyms != False: l_info.append("[Synonyms {!s}]: ".format(w) + str(synonyms)) queue.put([ ans['answer'], self.keywords(str(synonyms)), l_info ]) except: self.debug( pre + "issue with vocabulary for {!s}... ".format(w)) self.debug(sys.exc_info()) queue.put("END") if self.verbose: self.debug(pre + "elapsed " + str(time.time() - start)) def lookup_google_search(self, queue): # Do google search for each answer # Find if words in results are found in the question if self.verbose: pre = "[DEBUG] lookup_google_search() | " self.debug(pre + "starting") start = time.time() # Google Search for index, ans in self.answers.items(): l_info = self.lookup_info[ans['answer']] try: #parse replace space by plus sgin text = urllib.parse.quote_plus(ans['answer']) url = 'https://google.com/search?q=' + text #request google search response = requests.get(url, timeout=2) #pulling data out of html. lxml is a python paraser soup = BeautifulSoup(response.text, 'lxml') results = '' #find_all() - mnethod to look through a tag's descendent (class in CSS) for g in soup.find_all(class_='st'): results += " " + g.text #remove new line cleaned_results = results.strip().replace('\n', '') l_info.append("[Google]: " + cleaned_results) queue.put( [ans['answer'], self.keywords(cleaned_results), l_info]) except: self.debug(pre + "issue with google search for {!s}... ".format( ans['answer'])) self.debug(sys.exc_info()) if self.verbose: self.debug(pre + "google search elapsed " + str(time.time() - start)) def display(self): # Clear the screen os.system('cls' if os.name == 'nt' else 'clear') # Text to output to screen output = [] # Question output.append('\n\nQuestion - ' + self.question + '\n') # Answers & Lookup Info # choice to track answer with the highest score choice = {'index': [], 'score': 0, 'l_info': []} # a is the key and ans is the value; items() is for dict datastructure for a, ans in self.answers.items(): if ans['score'] == choice['score']: choice['index'].append(a) if 'NOT' in self.question: if ans['score'] < choice['score']: choice['index'] = [a] choice['score'] = ans['score'] else: if ans['score'] > choice['score']: choice['index'] = [a] choice['score'] = ans['score'] output.append("Choice - " + ans['answer'] + ' - Score ' + str(ans['score'])) for l_info in self.lookup_info[ans['answer']]: for l in l_info: l_index = l.split(':')[0] if l_index not in choice['l_info']: choice['l_info'].append(l_index) if len(l) > 140: output.append(l[0:140]) else: output.append(l) output.append("[Keywords]: " + str(ans['keywords'])) output.append("") # Highest scoring answer if len(choice['index']) > 0: choose = [] for i in choice['index']: choose.append(self.answers[i]['answer']) msg = "Answer - " + ', '.join(choose) # If negative word, choose the lowest score if 'NOT' in self.question: msg += (" - NOT keyword so lowest score is " + str(choice['score'])) else: msg += (" - highest score is " + str(choice['score'])) output.append(msg) else: output.append("Answer - Unknown") output.append("") output.insert(1, msg + '\n') # print it all for line in output: print(line)
def getsimilar(word): return vb.synonym(word, format="list")
def synonym_request(word): response = vb.synonym(word) print(response)
async def on_message(message): # we do not want the bot to reply to itself if message.author == client.user: return #check if bot is working if message.content.startswith('!heck'): msg = 'Heck Off {0.author.mention}'.format(message) await client.send_message(message.channel, msg) #text response if message.content.startswith('!venom'): msg = 'is cute uwu'.format(message) await client.send_message(message.channel, msg) #text response if message.content[0:4] == "ayy": await client.send_message(message.channel, "lmao".format(message)) #text response if message.content[0:5] == "lmao": await client.send_message(message.channel, "ayy".format(message)) #using giphy api post a random happy birthday gif if message.content.startswith("!hbd"): msg = "HAPPPY BARTHDAYYYYY " if len(message.mentions) > 0: msg += message.mentions[0].mention hbds = [x for x in g.search("happy birthday")] hbd = hbds[random.randint(0, len(hbds))] msg += " " + hbd.media_url await client.send_message(message.channel, msg) #tag spam a user(not recommended) if message.content.startswith("!tagspam"): msg = "" if len(message.mentions) > 0: for i in message.mentions: if i.mention == "<@199515135142920192>": #hardcoded to not work against me xd await client.send_message(message.channel, "Nope") return msg += i.mention + "\t" else: msg = "Mention someone." await client.send_message(message.channel, msg) return if len(message.content.split(" ")) > 2: try: r = int(message.content.split(" ")[2]) if r > 50: r = 50 except: r = 5 else: r = 5 for x in range(r): await client.send_message(message.channel, msg) #synonym using vocabulary api if message.content[0:3] == "!s ": #match first 3 charachters query = message.content.split(" ")[ 1] #seperate the content from the identifier result = vb.synonym(query) msg = "" if result == False: #if no reply from api msg = "Not found" else: result = json.loads(result) #parse json string for i in result: msg += i["text"] + "\n" #add all results await client.send_message(message.channel, msg) #antonym using vocabulary api if message.content[0:3] == "!a ": query = message.content.split(" ")[1] result = vb.antonym(query) msg = "" if result == False: msg = "Not found" else: result = json.loads(result) for i in result: msg += i["text"] + "\n" await client.send_message(message.channel, msg) #usage if message.content[0:3] == "!u ": query = message.content.split(" ")[1:] query = ' '.join(query) result = vb.usage_example(query) msg = "" if result == False: msg = "Not found" else: result = json.loads(result) for i in result: msg += i["text"] + "\n" await client.send_message(message.channel, msg) #meaning if message.content[0:3] == "!m ": query = message.content.split(" ")[1] result = vb.meaning(query) msg = "" if result == False: msg = "Not found" else: result = json.loads(result) for i in result: msg += i["text"] + "\n" await client.send_message(message.channel, msg) #despacito if message.content.startswith("!despacito"): with open("despacito.txt") as file: content = file.readlines() j = 0 while j < len(content): msg = "" i = 0 while i < 10 and j < len( content ): #10 lines at a time to prevent getting rate limited by discord msg += content[j] i += 1 j += 1 await client.send_message(message.channel, msg)
def __cmp__(self, other): if self.confidence > other.confidence: return -1 elif self.confidence == other.confidence: return 0 elif self.confidence < other.confidence: return 1 def __repr__(self): return str(self.confidence)+' '+str(self.words) if __name__=="__main__": from vocabulary.vocabulary import Vocabulary as vb import json try: car_synonyms = json.loads(vb.synonym("qertadf")) except Exception as e: car_synonyms = [] print(car_synonyms) # print(vb.synonym("car")) # corrector = Spelling(["corpora/words_extend","corpora/words_domain"]) # corrector.load() # corrector.train() # print corrector.correct("tape") # print corrector.correct("chinaa") # print corrector.correct("speling'") # print corrector.correct("bycycle") # print corrector.correct("inconvient") # print corrector.correct("arrainged") # print corrector.correct("peotry") # print corrector.correct("peotryy")
from vocabulary.vocabulary import Vocabulary as vb name = raw_input() print(vb.synonym("% s" % name))
def add_synonyms(query): try: synonyms = json.loads(vb.synonym(query)) except Exception as e: synonyms = [] return query + ' ' + ' '.join([item['text'] for item in synonyms])
class HQTrivia(): """ A simple test to see if one can automate determining the best answer """ def __init__(self): # This determines source location on where to caputer picture # QuickTime - MacOS has record feature for phone (best) # WebCam - Use OpenCV to capture photo (untested) self.use_quicktime = False self.use_webcam = False self.use_input = False # The filename of picture (no extension means we're capturing image) self.picture = 'source' # Default location of where to work on self.picture self.location = os.getcwd() # Replace with your own auth file name self.google_auth_json = 'blissend.json' # Default the language for wikipedia searches self.wiki = wikipediaapi.Wikipedia('en') self.vb = Vocabulary() # The OCR text self.raw = '' # The information we ultimately wanted to be analyzed self.question = '' self.answers = {} self.definitions = {} # For debugging self.verbose = False def debug(self, msg): # In multiprocessing environments, the below statement helps sys.stdout.flush() print("hqtrivia-automation.py: " + str(msg)) def capture(self, ftype='tiff'): """ Simple function to select function to capture picture """ if self.verbose: pre = "method - capture | " self.debug(pre + "choosing how to capture...") if self.use_input: if self.verbose: self.debug(pre + "input provided, don't capture") return # Set file type self.picture += '.' + ftype if self.use_quicktime: if self.verbose: self.debug(pre + "quicktime") self.quicktime(ftype) elif self.use_webcam: if self.verbose: self.debug(pre + "webcam") self.webcam() def quicktime(self, ftype='tiff'): """ Takes screenshot of phone screen via AppleScript To use this open QuickTime player and do a movie recording. Select the drop down arrow next to record button and select your iPhone. This requires a wire connection to your computer using QuickTime. Remember, don't record anything. Having it show on screen is enough for a screencapture! 1. Get window ID of QuickTime Player 2. Tell script to run shell script command screencapture the window ID """ if self.verbose: self.debug("method - quicktime | starting") start = time.time() full_path = os.path.join(self.location, self.picture) script = """tell application "QuickTime Player" set winID to id of window 1 end tell do shell script "screencapture -x -t tiff -l " & winID &""" script += ' " ' + full_path + '"' script = script.replace('tiff', ftype) s = NSAppleScript.alloc().initWithSource_(script) s.executeAndReturnError_(None) if self.verbose: diff = time.time() - start self.debug("method - quicktime | elapsed {!s}".format(diff)) def webcam(self): """ Takes screenshot using webcam. This is untested but here just in case it's needed. You need to figure out which camera to capture which unfortnately appears to be a discovery process of entering in numbers from 0 to higher until found. Also note, not all cameras have good controls and autofocus sucks for this. """ if self.verbose: self.debug("method - webcam | starting") video = cv2.VideoCapture(1) # cam id (try from 0 and higher til found) video.set(cv2.CAP_PROP_AUTOFOCUS, 0) # turn the autofocus off video.set(3, 1920) video.set(4, 1080) cv2.namedWindow("HQ OCR Camera") #img_counter = 0 while True: ret, frame = video.read() cv2.imshow("HQ OCR Camera", frame) if not ret: break k = cv2.waitKey(1) if k%256 == 27: # ESC pressed print("Escape hit, closing...") break elif k%256 == 32: # SPACE pressed img_name = self.picture # format with counter for multiple pics cv2.imwrite(img_name, frame) print("{} written!".format(img_name)) #img_counter += 1 break video.release() cv2.destroyAllWindows() def enhance(self): """ Edit image readability for Google OCR that is suuuuuuuuuuuuuper... picky 1. Eliminate background on buttons (answers) 2. Turn to grayscale 3. Make image BIG because Google OCR likes it big ;) 4. Reduce the grayscale (eliminates button borders in good pictures) 5. Make anything not white, black because google can't see color -_- """ if self.verbose: self.debug("method - enhance | starting") start = time.time() # Replace buttons (answers) background color, incease size scale/DPI im = Image.open(self.picture) im = im.convert("RGBA") data = np.array(im) red, green, blue, alpha = data.T # Unpack the bands for readability gray_buttons = (red > 225) & (green > 225) & (blue > 225) data[..., :-1][gray_buttons.T] = (255, 255, 255) im = Image.fromarray(data) width, height = im.size # New file since we're going to edit it file = self.picture.split('.') self.picture = "source_edited." + file[len(file)-1] im.crop((0, 300, width, height-400)).save(self.picture) #im.resize((round(width*3), round(height*3))).save( #self.picture, dpi=(600,600)) # Make grayscale im = Image.open(self.picture) im = im.convert('RGBA') im = im.convert('L').save(self.picture) #exit() # Reduce the grayscale #im = Image.open(self.picture) #im = im.convert('RGBA') #data = np.array(im) #red, green, blue, alpha = data.T # Unpack the bands for readability #gray_triming = (red > 158) & (green > 158) & (blue > 158) #data[..., :-1][gray_triming.T] = (255, 255, 255) #Image.fromarray(data).save(self.picture) #exit() # Replace non white with black im = Image.open(self.picture) im = im.convert('RGBA') data = np.array(im) red, green, blue, alpha = data.T # Unpack the bands for readability non_white = (red < 255) & (green < 255) & (blue < 255) data[..., :-1][non_white.T] = (0, 0, 0) im = Image.fromarray(data) width, height = im.size im.resize((round(width*3), round(height*3))).save(self.picture) #exit() if self.verbose: diff = time.time() - start self.debug("method - enhance | elapsed {!s}".format(diff)) def vision_ocr(self, queue): """ Google Cloud Vision The better OCR tool out there but requires additional setup. It is free under limitations. """ if self.verbose: pre = "method - vision_ocr | " start = time.time() self.debug(pre + "starting") # See if we have an auth file, if not return try: file_path = os.path.join(self.location, self.google_auth_json) if not os.path.isfile(file_path): if self.verbose: self.debug(pre + "no auth file") queue.put("END") return except: if self.verbose: self.debug(pre + "no auth file") queue.put("END") return # Credentials os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = file_path # Instantiates a client client = vision.ImageAnnotatorClient() # spits out shit, don't know why # The image file if not os.path.isfile(self.picture): full_path = os.path.join(self.location, self.picture) else: full_path = self.picture # Loads the image into memory with io.open(full_path, 'rb') as image_file: content = image_file.read() image = types.Image(content=content) # Performs text detection on the image file response = client.text_detection(image=image) text = response.text_annotations for t in text: self.raw = t.description break # Clean up text self.raw = self.raw.split('\n') self.debug("method - vision_ocr | raw - " + str(self.raw)) index = 0 while index < len(self.raw): value = self.raw[index].lower() if len(value) < 10: self.raw.pop(index) #self.debug("method - ocr | delete [" + value + "]") else: index += 1 self.raw.pop(-1) # swipe left comment if self.verbose: self.debug("method - vision_ocr | raw - cleaned" + str(self.raw)) diff = time.time() - start self.debug(pre + "elapsed {!s}".format(diff)) # Return data to parent process queue.put(self.raw) def tesseract_ocr(self, queue): """ Google Tesseract OCR Finally read the image text if possible """ # Include the below line, if you don't have tesseract in your PATH # Example tesseract_cmd: '/usr/local/bin/tesseract' #pytesseract.pytesseract.tesseract_cmd = '<fullpath_to_tesseract>' if self.verbose: pre = "method - tesseract_ocr | " self.debug(pre + "starting") start = time.time() # Enhance image first since tesseract doesn't do it self.enhance() # Get text from image (OCR) self.raw = pytesseract.image_to_string( Image.open(self.picture), config="-psm 11") # Clean it up self.raw = self.raw.split('\n') if self.verbose: self.debug(pre + "raw = " + str(self.raw)) index = 0 while index < len(self.raw): value = self.raw[index].lower() if len(value) < 1: self.raw.pop(index) #self.debug("method - ocr | delete [" + value + "]") else: index += 1 if self.verbose: self.debug(pre + "raw - cleaned = " + str(self.raw)) diff = time.time() - start self.debug(pre + "elapsed {!s}".format(diff)) # Return the data to main parent process queue.put([self.picture, self.raw]) def parse(self): """ Parser for the OCR text This is tricky because the OCR text won't always be the same. So adjustments may have to be tweaked here. """ if self.verbose: self.debug("method - parse | starting") start = time.time() # Parse text into question and answer variables check_q = True counter = 1 # for counting answers for line in self.raw: #print(len(line), end=' ['+line+']\n') if check_q: if len(line) > 2: if '?' not in line: self.question += line + ' ' else: self.question += line check_q = False else: if 'Swipe left' not in line: if len(line) > 0 and line != '-': ans = line self.answers[str(counter)] = { "answer": ans, "keywords": [], "score": 0, "index": str(counter) } self.definitions[ans] = [] counter += 1 else: break if self.verbose: self.debug("method - parse | question = " + str(self.question)) self.debug("method - parse | answer = " + str(self.answers)) diff = time.time() - start self.debug("method - parse | elapsed {!s}".format(diff)) def lookup(self, index): """ Gets information about answer to determine relevance to question This is a multiprocess function and therefore updated values have to be returned to parent process. """ if self.verbose: pre = "method - lookup | " self.debug(pre + "starting") start = time.time() # Reference/copy of self values answers = self.answers[index] definitions = self.definitions[answers['answer']] value = answers['answer'] question_nouns = '' for q in nltk.pos_tag(nltk.word_tokenize(self.question)): if q[1] == 'NN' or q[1] == 'NNP': question_nouns += " " + q[0] question_nouns = question_nouns.strip().split(' ') if self.verbose: self.debug(pre + "nouns in question - {!s}".format(question_nouns)) # First get wikipedia information (the most helpful) time_wiki = time.time() try: page = self.wiki.page(value) if page.exists(): definitions.append(page) definitions.append("[Wikipedia]: " + page.summary) except: self.debug(pre + "issue with wikipedia... ") self.debug(sys.exc_info()[0]) if self.verbose: self.debug(pre + "wiki elapsed " + str(time.time() - time_wiki)) # Google search time_gsearch = time.time() try: text = urllib.parse.quote_plus(value) url = 'https://google.com/search?q=' + text response = requests.get(url, timeout=2) soup = BeautifulSoup(response.text, 'lxml') results = '' for g in soup.find_all(class_='st'): results += " " + g.text definitions.append("[Google]: " + results.strip().replace('\n','')) except: self.debug(pre + "issue with google search... ") self.debug(sys.exc_info()[0]) if self.verbose: self.debug( pre + "google search elapsed " + str(time.time() - time_gsearch)) # Get dictionary definitions time_define = time.time() define = nltk.corpus.wordnet.synsets(value) synset_found = False if len(define) < 1: # Means local dictionary didn't find anything so search online if self.verbose: self.debug( pre + "nltk nothing for {!s}, using vocabulary".format(value)) try: define = self.vb.meaning(value, format='list') if define != False: # There may be multiple difinitions so count them up counter = 1 for d in define: definitions.append( "[Meaning " + str(counter) + "]: " + d) counter += 1 except: self.debug(pre + "issue with vocabulary... ") self.debug(sys.exc_info()[0]) else: synset_found = True definitions.append("[Meaning]: " + define[0].definition()) if self.verbose: self.debug( pre + "dictionary elapsed " + str(time.time() - time_define)) # Get synonyms time_synonyms = time.time() if synset_found: synonyms = [l.name() for s in define for l in s.lemmas()] # Remove duplicates s = [] i = 0 while i < len(synonyms): if synonyms[i] in s: synonyms.pop(i) else: s.append(synonyms[i]) i += 1 definitions.append("[Synonyms]: " + ', '.join(s)) else: # Means local dictionary didn't find anything so search online try: synonyms = self.vb.synonym(value, format='list') if synonyms != False: definitions.append("[Synonyms]: " + str(synonyms)) except: self.debug(pre + "issue with vocabulary... ") self.debug(sys.exc_info()[0]) if self.verbose: self.debug( pre + "synonyms elapsed " + str(time.time() - time_synonyms)) # Score the answer if len(definitions) > 0: for define in definitions: if type(define) == str: if "[Wikipedia]" not in define: d = define.split(':') # remove pretag [Google]: bla blah words = d[1].split(' ') else: # This is for WIKIPEDIA sections which isn't a string try: words = [] for i in page.sections: words += i.text.split(' ') except: self.debug(pre + "issue with wikipedia") for w in words: if len(w) > 2: if w in question_nouns: if w not in answers['keywords']: answers['keywords'].append(w) answers['score'] += 1 if self.verbose: diff = time.time() - start self.debug("method - lookup | elapsed {!s} for {!s}".format(diff, index)) # Send data back to parent process return answers, definitions, index def display(self): # Question print('\n\nQuestion - ' + self.question, end='\n\n') # Answers choice = {'index': [], 'score': 0} for a, ans in self.answers.items(): if ans['score'] == choice['score']: choice['index'].append(a) if 'NOT' in self.question: if ans['score'] < choice['score']: choice['index'] = [a] choice['score'] = ans['score'] else: if ans['score'] > choice['score']: choice['index'] = [a] choice['score'] = ans['score'] print("Choice - " + ans['answer'] + ' - Score ' + str(ans['score'])) for d in self.definitions[ans['answer']]: if type(d) == str: if len(d) > 140: print(d[0:140]) else: print(d) print("[Keywords]: " + str(ans['keywords'])) print("") # Choose answer if len(choice['index']) > 0: choose = [] for i in choice['index']: choose.append(self.answers[i]['answer']) print("Answer - " + ', '.join(choose), end='') if 'NOT' in self.question: print(" - NOT keyword so lowest score is " + str(choice['score'])) else: print(" - highest score is " + str(choice['score'])) else: print("Answer - Unknown") print("")