def inner(collections, parsed_args): if len(collections) != 1: raise ValueError collection = collections[0] embed_locations = parsed_args.embed_locations if parsed_args.search: from glob import glob embed_locations = [] for embed_location in parsed_args.embed_locations: glob_format = '{}/**/{}'.format(embed_location, parsed_args.doc_embed) embed_locations += list( map(os.path.dirname, glob(glob_format, recursive=True))) print_with_time('Found {} locations to test.'.format( len(embed_locations))) df = pd.DataFrame(columns=test.columns, index=map(os.path.basename, embed_locations)) for i, embed_location in enumerate(embed_locations): print_with_time('Testing ({}/{}) {}...'.format( i + 1, len(embed_locations), embed_location)) doc_dict = dictionary(os.path.join(embed_location, parsed_args.doc_embed), language='doc', use_subword=parsed_args.subword, normalize=parsed_args.normalize) query_dict = dictionary(os.path.join(embed_location, parsed_args.query_embed), language='query', use_subword=parsed_args.subword, normalize=parsed_args.normalize) if parsed_args.oov_embed: oov_dict = dictionary(os.path.join(embed_location, parsed_args.oov_embed), language='query', use_subword=parsed_args.subword, normalize=parsed_args.normalize) query_dict = OovDictionary([query_dict, doc_dict, oov_dict]) bilingual_dictionary = BilingualDictionary(src_dict=doc_dict, tgt_dict=query_dict, default_lang='doc') bilingual_search_engine = BilingualEmbeddingSearchEngine( dictionary=bilingual_dictionary, doc_lang='doc', query_lang='query', query_df_file=parsed_args.df_file, use_weights=parsed_args.use_weights) bilingual_search_engine.index_documents( collection.documents.values()) result = test.f(collection, bilingual_search_engine) df.loc[os.path.basename(embed_location)] = result print_with_time('Found MAP@10 {} for embed {}.'.format( result, embed_location)) return df
def dictionaries(path, parsed_args): if parsed_args.all_dictionary_options: base_dict = dictionary(path, use_subword=False, normalize=False) dicts = [ copy.copy(base_dict), copy.copy(base_dict), copy.copy(base_dict) ] dicts[1].use_subword = True dicts[2].normalize = True options = [{ 'subword': False, 'norm': False }, { 'subword': True, 'norm': False }, { 'subword': False, 'norm': True }] else: dicts = [ dictionary(path, use_subword=parsed_args.subword, normalize=parsed_args.normalize) ] options = [{ 'subword': parsed_args.subword, 'norm': parsed_args.normalize }] return zip(dicts, options)
def send_message(text, chat_id, name): if 'weather' in text: place = text.split(' ')[0] data = weather_report(place) if data[1] == True: msg = """ {} weather status temperature: {}C status: {} wind: {} humidity: {} """.format(str(data[0]['plc']), str(data[0]['temp']), str(data[0]['status']), str(data[0]['wind']), str(data[0]['hm'])) else: msg = "Weather report for {} not found!".format(place) elif text in ['/help', '/Help', '/HELP']: msg = """ Available Commands: 1. 'place-name' weather 2. 'word' meaning 3. /sms <message>@<number> 4. /help 5. /start """ elif 'meaning' in text: if len(text.split(' ')) == 2: if dictionary(text.split(' ')[0])[0] == 200: meaning = dictionary(text.split(' ')[0])[1] msg = """ {} : {} """.format(text.split(' ')[0], meaning[0]) else: msg = "sorry that word could not be found" else: msg = "please use the format '<word> meaning', type /help for more." elif '/start' in text: msg = "Welcome {} to AI Assistant, I am here to help you, if you are new type '/help' for available commands".format(name) elif '/sms' in text: data = text.split("@") sms = data[0][5:] number = data[1] if len(sms) > 110: msg = "the message length should not exceed 120" elif len(number) != 10: msg = "the mobile number should be of 10 digits, don't use +91." else: s = 'from {} using AI BOT : {}'.format(name, sms) status = send_sms(s, number) if status == 200: msg = "Your sms successfully sent to {}".format(number) else: msg = "Sorry, your sms was not sent" else: msg = "wrong command type '/help' for available commands" url = URL + "sendMessage?text={}&chat_id={}".format(msg, chat_id) print get_url(url)
def main(): data_store = dictionary() Pyro4.Daemon.serveSimple( { data_store: "example.datastore" }, ns = False)
def selectProcess(command, authkeys): commandList = command.lower().split() command = command.lower() google_authkeys = authkeys["google"] yelp_authkeys = authkeys["yelp"] if (directionCommand(commandList)): return (Directions(command, google_authkeys)) elif (dictionaryCommand(commandList)): return (dictionary(command)) elif (yelpCommand(commandList)): return (Yelp(command, yelp_authkeys)) elif (wikiCommand(commandList)): return (SearchWiki(command)) elif (placesCommand(commandList)): return (GooglePlace(command, google_authkeys)) elif (helpCommand(commandList)): return (printHelp()) else: return ("We didn't understand your search, see if this helps:\n" + printHelp())
def write_cache(): #nltk,dico,anto = dico.find(word2) print("*************************** BEGIN ********************************") facit_keys = [ 'need', 'know', 'much', 'quantity', 'vinegar', 'used', 'container', 'need', 'know', 'type', 'sort', 'vinegar', 'used', 'container', 'need', 'know', 'sort', 'type', 'materials', 'test', 'need', 'know', 'size', 'surface', 'area', 'material', 'materials', 'used', 'know', 'long', 'time', 'sample', 'rinsed', 'rinse', 'distilled', 'water', 'need', 'know', 'drying', 'method', 'use', 'know', 'size', 'type', 'container', 'use' ] from dictionary import dictionary count = 0 for y in facit_keys: dico = dictionary() #cache['1']['nltk'][y] = dico.synomyns(y) cache['1']['dico'][y] = dico.lexic(y) cache['1']['antonym'][y] = dico.antonyms(y).append('not ' + y) count = count + 1 print("*************************** ", count, " ********************************") print( "*************************** | END | ********************************") f = open('cache_dico.json', 'w') json.dump(cache, f, indent=2) f.close()
def selectProcess(command, authkeys): commandList = command.lower().split() command = command.lower() google_authkeys = authkeys["google"] yelp_authkeys = authkeys["yelp"] if (directionCommand(commandList)): return(Directions(command, google_authkeys)) elif(dictionaryCommand(commandList)): return(dictionary(command)) elif(yelpCommand(commandList)): return(Yelp(command, yelp_authkeys)) elif (wikiCommand(commandList)): return(SearchWiki(command)) elif(placesCommand(commandList)): return(GooglePlace(command, google_authkeys)) elif(helpCommand(commandList)): return(printHelp()) else: return ("We didn't understand your search, see if this helps:\n" + printHelp())
def bellman_ford(self, origin): elements = dct.dictionary() for vertex in self.__vertexs: elements.add(vertex.data, InfoElement(vertex)) elements[origin].accumulated_cost = 0 for i in range(self.size - 1): for vertex in self.__vertexs: for edge in vertex.edge_list: last_cost = elements[ edge.target_vertex.data].accumulated_cost new_cost = elements[ edge.source_vertex.data].accumulated_cost + edge.weight if last_cost > new_cost: elements[edge.target_vertex. data].accumulated_cost = new_cost elements[ edge.target_vertex.data].last_vertex = elements[ edge.source_vertex.data] elements[edge.target_vertex.data].cost = edge.weight for vertex in self.__vertexs: for edge in vertex.edge_list: last_cost = elements[edge.target_vertex.data].accumulated_cost new_cost = elements[ edge.source_vertex.data].accumulated_cost + edge.weight if last_cost > new_cost: print("Negative cicle") return None return self.__graph_from_info_elements(elements)
def searchWord(): textInput = e1.get() if len(textInput) < 1: ans = "Please enter word..." else: ans = dictionary(textInput) l3.config(text=ans, wraplength=435, font=("Courier", 14), bg="#87cefa")
def find_all(key='container', index='1'): cacha = 'C:/Users/ben/Desktop/BachelorProject/dico_cache.json' f1 = open(cacha) cach = json.load(f1) f1.close() nltk = cach[index]['nltk'][key] dico = cach[index]['dico'][key] antonym = cach[index]['antonym'][key] from dictionary import dictionary dico = dictionary() a, b, c = dico.find(key) print(" Cache NLTK = ", nltk) print(" Cache Dico = ", dico) print(" Cache Anto = ", antonym) print(" Dictionary NLTK = ", a) print(" Dictionary Dico = ", b) print(" Dictionary Anto = ", c)
def inner(collections, parsed_args): df = pd.DataFrame(index=[c.name for c in collections], columns=[]) for collection in collections: paths = list(parsed_args.embed) for domain_embed_path in parsed_args.domain_embed: paths.append(domain_embed_path.format(collection.name)) for globbed_path in paths: embeds = glob.glob(globbed_path) for embed_path in embeds: embed = dictionary(embed_path, use_subword=parsed_args.subword, normalize=parsed_args.normalize) star = globbed_path.index('*') column = embed_path[star:star - len(globbed_path) + 1] df.loc[collection.name, column] = test.f(collection, embed) if parsed_args.relative: baseline = df.loc[collection.name, parsed_args.relative] df.loc[collection.name] = ( (df.loc[collection.name] / baseline) - 1) * 100 return df
def compute_all(text1, ref = ''): dico = dictionary() answers = clean_paragraph(text1) answers = duplicat(answers) #filt = filter(answers) answers = filter(answers) ref = clean_paragraph(dico.getDoc(), found_sent=False) print(" Answer : ", answers) print(" Ref : ", ref) #print("********************************1***************************************") list = [] one = [] for x in answers: for y in ref: score = align(x,y,tokenize = False) if score > 0.3: list.append((x,y,score)) one.append(x) ref = clean_paragraph(dico.getDoc(), found_sent=False) #return list, len(list) return list, duplist(one)
def __init__(self): self.environ_dictionary = dictionary.dictionary() self.environ_dictionary.set_dictionary(os.environ)
def g_file_svm(infile, save_file, terminology, dev_size=900): dataset = open(infile, 'r').read().split("===========================\n") qa_set = [] no_postive = 0 Id_dict = read_term(terminology) if dev_size > 0: mode = 'train' mdict = dictionary() mdict.clean() else: mdict = dictionary() mode = 'test' for x, data in enumerate(dataset): if data.strip() == '': continue item_set = data.strip().split('\n') if len(set(item_set)) == 1: continue query = '' pos_set, neg_set = [], [] for y, item in enumerate(item_set): repeat_list = [] if item in repeat_list: continue else: repeat_list.append(item) #print item try: #label , query, question, ques_id, query_id = item.split('\t') q, a, q_id, a_id, sieve, fname, index = item.split('\t') if a in repeat_list: continue else: repeat_list.append(a) q_id = normId(q_id, Id_dict) a_id = normId(a_id, Id_dict) if a_id in q_id: label = 1 else: label = 0 except: print "Error parser:" + item continue qt = tokeniztion(q) #"\t".join(word_tokenize(query)) a = tokeniztion(a) if int(label) == 1: pos_set.append(a) else: neg_set.append(a) if pos_set == []: no_postive += 1 line = generate_qaset_svm(uid=str(x), q=qt, pos_set=pos_set, neg_set=neg_set, mdict=mdict, mode=mode) qa_set.append(line) print len(qa_set) open(save_file, 'w').write('\n'.join(qa_set[dev_size:])) if dev_size > 0: open(save_file.replace("train", "dev"), 'w').write('\n'.join(qa_set[:dev_size])) mdict.store_dict() return 1
import re from nltk import word_tokenize from nltk.corpus import stopwords from textblob import TextBlob as tb from dictionary import dictionary from nltk.corpus import wordnet as wn import json from data import data dico = dictionary() question = data() def sentences_finder(text): # Comma splitter mass = text.split(',') length = len(mass) #sentence finder blob blob = tb(text) # grammatical correction blob = blob.correct() #*********************** sent = blob.sentences ret = [ ''.join(x).strip() for x in sent] if length >= 4: ret = mass return ret # print(sentences_finder(text1)) def flatten(s):
def __init__(self): self.dictionary = dictionary.dictionary()
def __init__( self, column_name ): self.column_name = column_name self.data = dictionary.dictionary()
def __init__(self, filename): self.bgf_table = bgf_table.bgf_table() #list of valid bigrams self.dictionary = dictionary.dictionary( "../dictionary.txt") #dictionary of adjectives self.find_bi_grams(filename)
__author__ = 'rodas' from collections import defaultdict from operator import itemgetter from swda import CorpusReader from sys import argv from gensim import corpora, models, similarities from nltk.stem.porter import * from dictionary import dictionary from preprocess import preprocess texts= preprocess() dictionary(texts) dictionary = corpora.Dictionary.load('/tmp/speechact.dict') class BowCorpus(object): def __iter__(self): for text in texts: yield dictionary.doc2bow(text) if __name__ == '__main__': corpus = BowCorpus() corpora.MmCorpus.serialize('/tmp/corpus.mm', corpus)
import Pyro4 from dictionary import dictionary, ResourceManager import client data_store = dictionary() user1 = client("user1", data_store) print user1.get('key1') print user1.delete('key1')
def print_usage(): print """Usage: %(script)s [word] """ % { "script": sys.argv[0] } if __name__ == "__main__": if len(sys.argv) < 2: print_usage() sys.exit(1) # load the currently saved dictionary phonetic_dictionary = dictionary.dictionary() phonetic_dictionary.load(filename="dictionary.json") # calculate the value of the word value = phonetic_dictionary.calculate_word_value(sys.argv[1], values_filename="values.json") print "Word has value '%(value)d'" % {"value": value} # find similar words if str(value) in phonetic_dictionary.dictionary: # print them out words = phonetic_dictionary.dictionary[str(value)] print "The following matches were found:" for word in words: print "\t" + word else: # no similar words
for x in range(1, num_sentences): sentences.append( sentence.gen_sentence(dictionary=dictionary, purpose='declarative', sentence_type='simple', num_clauses=num_clauses)) return sentences if __name__ == '__main__': if len(sys.argv) == 3: print('Loading configuration file:', sys.argv[1]) config = load_config(sys.argv[1]) dict = dictionary() print('Initializing Dictionary Connection') try: dict.init(host=config['DICTIONARY']['host'], user=config['DICTIONARY']['user'], passwd=config['DICTIONARY']['passwd'], dbname=config['DICTIONARY']['dbname']) sentences = gen_sentence(dictionary=dict, num_sentences=int(sys.argv[2])) for concept in sentences: print(concept) dict.close()
def __init__( self ): self.string_dictionary = dictionary.dictionary()
def translate(column): d = dictionary() return d.get(column, column)
def __init__(self): self.catalog = dictionary.dictionary()
from collections import Counter import sys import dictionary as d words = (d.dictionary( 'C:\\Users\\bobei\\Downloads\\12dicts-6.0.2\\International\\2of4brif.txt')) words.append('a') #added a and i because some dictionaries don't have them words.append('i') words = sorted(words) ini_name = input('Enter your name') def anagramfinder(name, wordlist): #this finds the anagram from the name name_map = Counter( name ) #makes a dictionary thingy with each letter and the number of times it occurs anagrams = [] for word in wordlist: #from dictionary file test = '' word_map = Counter(word) for letter in word: if word_map[letter] <= name_map[ letter]: #for each letter in the word, if it occurs less than or equal to the amount of times it does in the name, then it can be valid test += letter if Counter( test ) == word_map: #if the test dictionary is the same as the original word, then it's a good anagram and gets added to the list anagrams.append(word) print(*anagrams, sep='\n') print()
#!/usr/bin/python #---------------------------------- # make_dictionary.py #---------------------------------- # # builds a dictionary to translate between ints and words import dictionary #specify input files - these are generated by preprocess.py filename = 'words.txt' bigram_filename = 'bigrams.txt' # build the dictionaries my_dict = dictionary.dictionary([filename], n_vocab=5000) # build word -> int mapping bigram_dict = dictionary.dictionary( [bigram_filename], n_vocab=5000) # build bigram -> int mapping # print the dictionaries my_dict.print_dictionary_to_file('word_dictionary.txt', True) bigram_dict.print_dictionary_to_file('bigram_dictionary.txt', True) #---------------------------------- # translate the file to ints def translate_file(the_dict, filename): ''' translate words in a file to integers, producing an ordered unique list of the words present''' infile = open(filename) outfile = open(filename + '.int', 'w')