def inner(collections, parsed_args):
        if len(collections) != 1:
            raise ValueError
        collection = collections[0]

        embed_locations = parsed_args.embed_locations
        if parsed_args.search:
            from glob import glob
            embed_locations = []
            for embed_location in parsed_args.embed_locations:
                glob_format = '{}/**/{}'.format(embed_location,
                                                parsed_args.doc_embed)
                embed_locations += list(
                    map(os.path.dirname, glob(glob_format, recursive=True)))
        print_with_time('Found {} locations to test.'.format(
            len(embed_locations)))

        df = pd.DataFrame(columns=test.columns,
                          index=map(os.path.basename, embed_locations))

        for i, embed_location in enumerate(embed_locations):
            print_with_time('Testing ({}/{}) {}...'.format(
                i + 1, len(embed_locations), embed_location))

            doc_dict = dictionary(os.path.join(embed_location,
                                               parsed_args.doc_embed),
                                  language='doc',
                                  use_subword=parsed_args.subword,
                                  normalize=parsed_args.normalize)
            query_dict = dictionary(os.path.join(embed_location,
                                                 parsed_args.query_embed),
                                    language='query',
                                    use_subword=parsed_args.subword,
                                    normalize=parsed_args.normalize)
            if parsed_args.oov_embed:
                oov_dict = dictionary(os.path.join(embed_location,
                                                   parsed_args.oov_embed),
                                      language='query',
                                      use_subword=parsed_args.subword,
                                      normalize=parsed_args.normalize)
                query_dict = OovDictionary([query_dict, doc_dict, oov_dict])
            bilingual_dictionary = BilingualDictionary(src_dict=doc_dict,
                                                       tgt_dict=query_dict,
                                                       default_lang='doc')

            bilingual_search_engine = BilingualEmbeddingSearchEngine(
                dictionary=bilingual_dictionary,
                doc_lang='doc',
                query_lang='query',
                query_df_file=parsed_args.df_file,
                use_weights=parsed_args.use_weights)
            bilingual_search_engine.index_documents(
                collection.documents.values())

            result = test.f(collection, bilingual_search_engine)
            df.loc[os.path.basename(embed_location)] = result
            print_with_time('Found MAP@10 {} for embed {}.'.format(
                result, embed_location))

        return df
def dictionaries(path, parsed_args):
    if parsed_args.all_dictionary_options:
        base_dict = dictionary(path, use_subword=False, normalize=False)
        dicts = [
            copy.copy(base_dict),
            copy.copy(base_dict),
            copy.copy(base_dict)
        ]
        dicts[1].use_subword = True
        dicts[2].normalize = True
        options = [{
            'subword': False,
            'norm': False
        }, {
            'subword': True,
            'norm': False
        }, {
            'subword': False,
            'norm': True
        }]
    else:
        dicts = [
            dictionary(path,
                       use_subword=parsed_args.subword,
                       normalize=parsed_args.normalize)
        ]
        options = [{
            'subword': parsed_args.subword,
            'norm': parsed_args.normalize
        }]
    return zip(dicts, options)
Ejemplo n.º 3
0
def send_message(text, chat_id, name):
    if 'weather' in text:
        place = text.split(' ')[0]
        data = weather_report(place)
        if data[1] == True:
            msg = """
            {} weather status
            temperature: {}C
            status: {}
            wind: {}
            humidity: {}
            """.format(str(data[0]['plc']), str(data[0]['temp']), str(data[0]['status']), str(data[0]['wind']), str(data[0]['hm']))
        else:
            msg = "Weather report for {} not found!".format(place)
    elif text in ['/help', '/Help', '/HELP']:
        msg = """
        Available Commands:
      1. 'place-name' weather
      2. 'word' meaning
      3. /sms <message>@<number>
      4. /help
      5. /start
        """
    elif 'meaning' in text:
        if len(text.split(' ')) == 2:
            if dictionary(text.split(' ')[0])[0] == 200:
                meaning = dictionary(text.split(' ')[0])[1]
                msg = """
                {} : 
                {}
                """.format(text.split(' ')[0], meaning[0])
            else:
                msg = "sorry that word could not be found"
        else:
            msg = "please use the format '<word> meaning', type /help for more."

    elif '/start' in text:
        msg = "Welcome {} to AI Assistant, I am here to help you, if you are new type '/help' for available commands".format(name)

    elif '/sms' in text:
        data = text.split("@")
        sms = data[0][5:]
        number = data[1]
        if len(sms) > 110:
            msg = "the message length should not exceed 120"
        elif len(number) != 10:
            msg = "the mobile number should be of 10 digits, don't use +91."
        else:
            s = 'from {} using AI BOT : {}'.format(name, sms)
            status = send_sms(s, number)
            if status == 200:
                msg = "Your sms successfully sent to {}".format(number)
            else:
                msg = "Sorry, your sms was not sent" 
    else: 
        msg = "wrong command type '/help' for available commands"
    
    url = URL + "sendMessage?text={}&chat_id={}".format(msg, chat_id)
    print get_url(url)
Ejemplo n.º 4
0
def main():
    data_store = dictionary()
    Pyro4.Daemon.serveSimple(
            {
                data_store: "example.datastore"
            },
            ns = False)
Ejemplo n.º 5
0
def selectProcess(command, authkeys):
    commandList = command.lower().split()
    command = command.lower()

    google_authkeys = authkeys["google"]
    yelp_authkeys = authkeys["yelp"]

    if (directionCommand(commandList)):
        return (Directions(command, google_authkeys))

    elif (dictionaryCommand(commandList)):
        return (dictionary(command))

    elif (yelpCommand(commandList)):
        return (Yelp(command, yelp_authkeys))

    elif (wikiCommand(commandList)):
        return (SearchWiki(command))

    elif (placesCommand(commandList)):
        return (GooglePlace(command, google_authkeys))

    elif (helpCommand(commandList)):
        return (printHelp())

    else:
        return ("We didn't understand your search, see if this helps:\n" +
                printHelp())
Ejemplo n.º 6
0
def write_cache():
    #nltk,dico,anto = dico.find(word2)
    print("*************************** BEGIN ********************************")
    facit_keys = [
        'need', 'know', 'much', 'quantity', 'vinegar', 'used', 'container',
        'need', 'know', 'type', 'sort', 'vinegar', 'used', 'container', 'need',
        'know', 'sort', 'type', 'materials', 'test', 'need', 'know', 'size',
        'surface', 'area', 'material', 'materials', 'used', 'know', 'long',
        'time', 'sample', 'rinsed', 'rinse', 'distilled', 'water', 'need',
        'know', 'drying', 'method', 'use', 'know', 'size', 'type', 'container',
        'use'
    ]
    from dictionary import dictionary
    count = 0
    for y in facit_keys:
        dico = dictionary()
        #cache['1']['nltk'][y] = dico.synomyns(y)
        cache['1']['dico'][y] = dico.lexic(y)
        cache['1']['antonym'][y] = dico.antonyms(y).append('not ' + y)
        count = count + 1
        print("*************************** ", count,
              " ********************************")
    print(
        "*************************** | END | ********************************")
    f = open('cache_dico.json', 'w')
    json.dump(cache, f, indent=2)
    f.close()
Ejemplo n.º 7
0
def selectProcess(command, authkeys):
    commandList = command.lower().split()
    command = command.lower()

    google_authkeys = authkeys["google"]
    yelp_authkeys = authkeys["yelp"]

    if (directionCommand(commandList)):
        return(Directions(command, google_authkeys))

    elif(dictionaryCommand(commandList)):
        return(dictionary(command))

    elif(yelpCommand(commandList)):
        return(Yelp(command, yelp_authkeys))

    elif (wikiCommand(commandList)):
        return(SearchWiki(command))

    elif(placesCommand(commandList)):
        return(GooglePlace(command, google_authkeys))

    elif(helpCommand(commandList)):
        return(printHelp())

    else:
        return ("We didn't understand your search, see if this helps:\n" + printHelp())
Ejemplo n.º 8
0
    def bellman_ford(self, origin):
        elements = dct.dictionary()

        for vertex in self.__vertexs:
            elements.add(vertex.data, InfoElement(vertex))
        elements[origin].accumulated_cost = 0

        for i in range(self.size - 1):
            for vertex in self.__vertexs:
                for edge in vertex.edge_list:
                    last_cost = elements[
                        edge.target_vertex.data].accumulated_cost
                    new_cost = elements[
                        edge.source_vertex.data].accumulated_cost + edge.weight

                    if last_cost > new_cost:
                        elements[edge.target_vertex.
                                 data].accumulated_cost = new_cost
                        elements[
                            edge.target_vertex.data].last_vertex = elements[
                                edge.source_vertex.data]
                        elements[edge.target_vertex.data].cost = edge.weight

        for vertex in self.__vertexs:
            for edge in vertex.edge_list:
                last_cost = elements[edge.target_vertex.data].accumulated_cost
                new_cost = elements[
                    edge.source_vertex.data].accumulated_cost + edge.weight

                if last_cost > new_cost:
                    print("Negative cicle")
                    return None

        return self.__graph_from_info_elements(elements)
Ejemplo n.º 9
0
def searchWord():
    textInput = e1.get()
    if len(textInput) < 1:
        ans = "Please enter word..."
    else:
        ans = dictionary(textInput)
    l3.config(text=ans, wraplength=435, font=("Courier", 14), bg="#87cefa")
Ejemplo n.º 10
0
def find_all(key='container', index='1'):
    cacha = 'C:/Users/ben/Desktop/BachelorProject/dico_cache.json'
    f1 = open(cacha)
    cach = json.load(f1)
    f1.close()
    nltk = cach[index]['nltk'][key]
    dico = cach[index]['dico'][key]
    antonym = cach[index]['antonym'][key]

    from dictionary import dictionary
    dico = dictionary()
    a, b, c = dico.find(key)

    print(" Cache NLTK = ", nltk)
    print(" Cache Dico = ", dico)
    print(" Cache Anto = ", antonym)

    print(" Dictionary NLTK = ", a)
    print(" Dictionary Dico = ", b)
    print(" Dictionary Anto = ", c)
    def inner(collections, parsed_args):
        df = pd.DataFrame(index=[c.name for c in collections], columns=[])

        for collection in collections:
            paths = list(parsed_args.embed)
            for domain_embed_path in parsed_args.domain_embed:
                paths.append(domain_embed_path.format(collection.name))
            for globbed_path in paths:
                embeds = glob.glob(globbed_path)
                for embed_path in embeds:
                    embed = dictionary(embed_path,
                                       use_subword=parsed_args.subword,
                                       normalize=parsed_args.normalize)
                    star = globbed_path.index('*')
                    column = embed_path[star:star - len(globbed_path) + 1]
                    df.loc[collection.name, column] = test.f(collection, embed)
            if parsed_args.relative:
                baseline = df.loc[collection.name, parsed_args.relative]
                df.loc[collection.name] = (
                    (df.loc[collection.name] / baseline) - 1) * 100

        return df
Ejemplo n.º 12
0
def compute_all(text1, ref = ''):
    
    dico = dictionary()
    answers = clean_paragraph(text1)
    answers = duplicat(answers)
    #filt = filter(answers)
    answers = filter(answers)
    ref = clean_paragraph(dico.getDoc(), found_sent=False)
    print(" Answer : ", answers)
    print(" Ref : ", ref)
    #print("********************************1***************************************")
    list = []
    one = []
    for x in answers:
        for y in ref:
            score = align(x,y,tokenize = False)

            if score > 0.3:
                list.append((x,y,score))
                one.append(x)
            ref = clean_paragraph(dico.getDoc(), found_sent=False)

    #return list, len(list)
    return list, duplist(one)
Ejemplo n.º 13
0
 def __init__(self):
     self.environ_dictionary = dictionary.dictionary()
     self.environ_dictionary.set_dictionary(os.environ)
def g_file_svm(infile, save_file, terminology, dev_size=900):
    dataset = open(infile, 'r').read().split("===========================\n")
    qa_set = []
    no_postive = 0
    Id_dict = read_term(terminology)
    if dev_size > 0:
        mode = 'train'
        mdict = dictionary()
        mdict.clean()
    else:
        mdict = dictionary()
        mode = 'test'
    for x, data in enumerate(dataset):
        if data.strip() == '':
            continue
        item_set = data.strip().split('\n')

        if len(set(item_set)) == 1:
            continue
        query = ''
        pos_set, neg_set = [], []
        for y, item in enumerate(item_set):
            repeat_list = []
            if item in repeat_list:
                continue
            else:
                repeat_list.append(item)
            #print item
            try:
                #label , query, question, ques_id, query_id =  item.split('\t')
                q, a, q_id, a_id, sieve, fname, index = item.split('\t')
                if a in repeat_list:
                    continue
                else:
                    repeat_list.append(a)
                q_id = normId(q_id, Id_dict)
                a_id = normId(a_id, Id_dict)
                if a_id in q_id:
                    label = 1
                else:
                    label = 0
            except:
                print "Error parser:" + item
                continue
            qt = tokeniztion(q)  #"\t".join(word_tokenize(query))
            a = tokeniztion(a)
            if int(label) == 1:
                pos_set.append(a)
            else:
                neg_set.append(a)
        if pos_set == []:
            no_postive += 1
        line = generate_qaset_svm(uid=str(x),
                                  q=qt,
                                  pos_set=pos_set,
                                  neg_set=neg_set,
                                  mdict=mdict,
                                  mode=mode)
        qa_set.append(line)
    print len(qa_set)

    open(save_file, 'w').write('\n'.join(qa_set[dev_size:]))
    if dev_size > 0:
        open(save_file.replace("train", "dev"),
             'w').write('\n'.join(qa_set[:dev_size]))
    mdict.store_dict()
    return 1
Ejemplo n.º 15
0
import re
from nltk import word_tokenize
from nltk.corpus import stopwords
from textblob import TextBlob as tb
from dictionary import dictionary 
from nltk.corpus import wordnet as wn
import json

from data import data 

dico = dictionary()
question = data()

def sentences_finder(text):
    # Comma splitter
    mass = text.split(',')
    length = len(mass)
    
    #sentence finder blob 
    blob = tb(text)
    # grammatical correction
    blob = blob.correct()
    #***********************
    sent = blob.sentences
    ret = [ ''.join(x).strip() for x in sent]
    if length >= 4:
        ret = mass
    return ret
    
# print(sentences_finder(text1))
def flatten(s):
Ejemplo n.º 16
0
 def __init__(self):
     self.dictionary = dictionary.dictionary()
Ejemplo n.º 17
0
	def __init__( self, column_name ):
		self.column_name = column_name
		self.data = dictionary.dictionary()
Ejemplo n.º 18
0
 def __init__(self, filename):
     self.bgf_table = bgf_table.bgf_table()  #list of valid bigrams
     self.dictionary = dictionary.dictionary(
         "../dictionary.txt")  #dictionary of adjectives
     self.find_bi_grams(filename)
Ejemplo n.º 19
0
__author__ = 'rodas'


from collections import defaultdict
from operator import itemgetter
from swda import CorpusReader
from sys import argv
from gensim import corpora, models, similarities
from nltk.stem.porter import *
from dictionary import dictionary
from preprocess import preprocess


texts= preprocess()
dictionary(texts)
dictionary = corpora.Dictionary.load('/tmp/speechact.dict')

class BowCorpus(object):
    def __iter__(self):
         for text in texts:
            yield dictionary.doc2bow(text)



if __name__ == '__main__':
    corpus = BowCorpus()
    corpora.MmCorpus.serialize('/tmp/corpus.mm', corpus)

Ejemplo n.º 20
0
import Pyro4
from dictionary import dictionary, ResourceManager
import client

data_store = dictionary()
user1 = client("user1", data_store)

print user1.get('key1')
print user1.delete('key1')
Ejemplo n.º 21
0
def print_usage():
    print """Usage:
    %(script)s [word]
    """ % {
        "script": sys.argv[0]
    }


if __name__ == "__main__":
    if len(sys.argv) < 2:
        print_usage()
        sys.exit(1)

    # load the currently saved dictionary
    phonetic_dictionary = dictionary.dictionary()
    phonetic_dictionary.load(filename="dictionary.json")

    # calculate the value of the word
    value = phonetic_dictionary.calculate_word_value(sys.argv[1], values_filename="values.json")
    print "Word has value '%(value)d'" % {"value": value}

    # find similar words
    if str(value) in phonetic_dictionary.dictionary:
        # print them out
        words = phonetic_dictionary.dictionary[str(value)]
        print "The following matches were found:"
        for word in words:
            print "\t" + word
    else:
        # no similar words
Ejemplo n.º 22
0
    for x in range(1, num_sentences):
        sentences.append(
            sentence.gen_sentence(dictionary=dictionary,
                                  purpose='declarative',
                                  sentence_type='simple',
                                  num_clauses=num_clauses))

    return sentences


if __name__ == '__main__':
    if len(sys.argv) == 3:
        print('Loading configuration file:', sys.argv[1])
        config = load_config(sys.argv[1])

        dict = dictionary()

        print('Initializing Dictionary Connection')
        try:
            dict.init(host=config['DICTIONARY']['host'],
                      user=config['DICTIONARY']['user'],
                      passwd=config['DICTIONARY']['passwd'],
                      dbname=config['DICTIONARY']['dbname'])

            sentences = gen_sentence(dictionary=dict,
                                     num_sentences=int(sys.argv[2]))
            for concept in sentences:
                print(concept)

            dict.close()
Ejemplo n.º 23
0
	def __init__( self ):
		self.string_dictionary = dictionary.dictionary()
Ejemplo n.º 24
0
def translate(column):

    d = dictionary()
    return d.get(column, column)
Ejemplo n.º 25
0
 def __init__(self):
     self.catalog = dictionary.dictionary()
Ejemplo n.º 26
0
from collections import Counter
import sys
import dictionary as d

words = (d.dictionary(
    'C:\\Users\\bobei\\Downloads\\12dicts-6.0.2\\International\\2of4brif.txt'))
words.append('a')  #added a and i because some dictionaries don't have them
words.append('i')
words = sorted(words)

ini_name = input('Enter your name')


def anagramfinder(name, wordlist):  #this finds the anagram from the name
    name_map = Counter(
        name
    )  #makes a dictionary thingy with each letter and the number of times it occurs
    anagrams = []
    for word in wordlist:  #from dictionary file
        test = ''
        word_map = Counter(word)
        for letter in word:
            if word_map[letter] <= name_map[
                    letter]:  #for each letter in the word, if it occurs less than or equal to the amount of times it does in the name, then it can be valid
                test += letter
        if Counter(
                test
        ) == word_map:  #if the test dictionary is the same as the original word, then it's a good anagram and gets added to the list
            anagrams.append(word)
    print(*anagrams, sep='\n')
    print()
Ejemplo n.º 27
0
#!/usr/bin/python
#----------------------------------
#  make_dictionary.py
#----------------------------------
#
# builds a dictionary to translate between ints and words

import dictionary

#specify input files  - these are generated by preprocess.py
filename = 'words.txt'
bigram_filename = 'bigrams.txt'

# build the dictionaries
my_dict = dictionary.dictionary([filename],
                                n_vocab=5000)  # build word -> int mapping
bigram_dict = dictionary.dictionary(
    [bigram_filename], n_vocab=5000)  # build bigram -> int mapping

# print the dictionaries
my_dict.print_dictionary_to_file('word_dictionary.txt', True)
bigram_dict.print_dictionary_to_file('bigram_dictionary.txt', True)

#----------------------------------
# translate the file to ints


def translate_file(the_dict, filename):
    ''' translate words in a file to integers, producing an ordered unique list of the words present'''
    infile = open(filename)
    outfile = open(filename + '.int', 'w')