Ejemplo n.º 1
0
def the_test(test):
    (key, value, expected_result) = test
    calculated_result = Dictionary(key, value)
    print("input: {0}, output: {1}, expected: {2}".format([key, value],
                                                          calculated_result,
                                                          expected_result))
    assert Dictionary(key, value) == expected_result
 def action27(self, lineNumber, symbol):
     dictionary = Dictionary()
     ss1 = SS1()
     if not dictionary.isFound(symbol):
         E.E(lineNumber, 0).unknownIdentifier(symbol)
     else:
         ss1.push(symbol)
Ejemplo n.º 3
0
 def get_dictionary( ):
     """ Build a Dictionary based on the Diceware data. """
     
     dicto = Dictionary()
     
     print 'Parsing Diceware data...'
     i = 0;
     nLines = 7780
     
     # open file for reading
     with open(Diceware.fname, 'r') as fid:    
         for line in fid:
             tokens = Diceware.parse_line(line)
             if tokens is None:
                 continue
             
             # save data to list
             word = Word(tokens['word'], -1, -1, i);
             dicto.add_word(word)
             
             # increment counter and show progress
             i = i + 1;
             progress = float(i) / float(nLines)
             if (progress % 0.05) < 1e-4:
                 sys.stdout.write("\r%2.2f%%" %(progress*100))
                 sys.stdout.flush()
                 
     print '\nDone.'
     
     return dicto
    def action18(self, lineNumber, symbol):
#        symbol = symbol['name']
        dictionary = Dictionary()
        ss1 = SS1()
        if dictionary.isFound(symbol):
            E.E(lineNumber, 0).doubleDefinition(symbol, 'procedure or function')
        ss1.push(symbol)
        ss1.push('proc_params')
Ejemplo n.º 5
0
 def AddClick():
     global operator
     DC = Dictionary()
     DC.AddToArray(self.operator, list)
     DC.PrintList(list)
     self.operator = " "
     self.text_Input.set(self.operator)
     return
    def action20000(self, lineNumber, symbol):
#        symbol = symbol['name']
        dictionary = Dictionary()
        if not dictionary.isFound(symbol):
            E.E(lineNumber, 0).typeUnknown(symbol)
        type = dictionary.get(symbol)
        ss1 = SS1()
        ss1.push(type)
    def action8000(self, lineNumber, symbol):
#        symbol = symbol['name']
        dictionary = Dictionary()
        if dictionary.isFound(symbol):
            E.E(lineNumber, 0).typeDoubleDefinition(symbol)
        _attrUserType = AtrClasses.AttrUserType(symbol)
        ss1 = SS1()
        ss1.push(_attrUserType)
    def action4000(self, lineNumber, symbol):
#        symbol = symbol['name']
        ss1 = SS1()
        value = ss1.pop()
        name = ss1.pop()
        name.bind(value)
        dictionary = Dictionary()
        dictionary.setObject(name.name, name)
Ejemplo n.º 9
0
 def __init__(self, aHt, aId, aCode, aLnotab, aArgs):
     self.hT = aHt
     self.locals = Dictionary(self.hT)
     self.argument = ()
     self.lnotab = aLnotab
     self.code = aCode
     self.name = aCode.co_name
     self.Id = aId
     self.__updateArgument__(aArgs)
    def action17000(self, lineNumber, symbol):
#        symbol = symbol['name']
        dictionary = Dictionary()
        if not dictionary.isFound(symbol):
            E.E(lineNumber, 0).typeUnknown(symbol)
        ss1 = SS1()
        _attrArray = ss1.pop()
        _attrArray['object'].setName(symbol)
        ss1.push( _attrArray )
    def action7000(self, lineNumber, symbol):
#        symbol = symbol['name']
        ss1 = SS1()
        record = ss1.pop();
        dictionary = Dictionary()
        # @todo make interface for array, record, diapason and id
        _class = dictionary.get(record.name.name)['class']
        dictionary.setObject(record.name.name, record)
        row = self.getRow(record.name.name, _class, record)
    def action5000(self, lineNumber, symbol):
#        symbol = symbol['name']
        dictionary = Dictionary()
        if dictionary.isFound(symbol):
            E.E(lineNumber, 0).varDoubleDefinition(symbol)
        else:
            _attrVar = AtrClasses.AttrVar(symbol)
            ss1 = SS1()
            ss1.push(_attrVar)
Ejemplo n.º 13
0
 def test_get(self):
     d = Dictionary()
     d['raymond'] = 'red'
     self.assertEqual(d['raymond'], 'red')
     d['rachel'] = 'blue'
     self.assertEqual(d['rachel'], 'blue')
     d['critter'] = 'yellow'
     self.assertEqual(d.get('raymond', 'not found'), 'red')
     self.assertEqual(d.get('john', 'not found'), 'not found')
Ejemplo n.º 14
0
 def test_pop(self):
     d = Dictionary()
     d['raymond'] = 'red'
     d['rachel'] = 'blue'
     self.assertEqual(d.pop('rachel'), 'blue')
     self.assertEqual(d['raymond'], 'red')
     self.assertEqual(len(d), 1)
     with self.assertRaises(KeyError):
         d.pop('john')
    def action6000(self, lineNumber, symbol):
#        symbol = symbol['name']        
        ss1 = SS1()
        dictionary = Dictionary()
        type = ss1.pop()
        while ss1.top() != None:
            name = ss1.pop()
            name.bindType(type['name'])
            dictionary.setObject(name.name, name)
        ss1.pop()
Ejemplo n.º 16
0
 def __init__(self, aHt, aClassId, aCode, aLnotab):
     self.hT = aHt
     self.staticField = Dictionary(self.hT)
     self.attributes = Dictionary(self.hT)
     self.method = Dictionary(self.hT)
     self.lnotab = aLnotab
     self.code = aCode
     self.name = aCode.co_name
     self.Id = aClassId
     self.SpecialBehaviorId = -1
Ejemplo n.º 17
0
class Method(object):

    def __init__(self, aHt, aId, aCode, aLnotab, aIdClass, aArgs):
        self.hT = aHt
        self.locals = Dictionary(self.hT)
        self.argument = ()
        self.lnotab = aLnotab
        self.code = aCode
        self.name = aCode.co_name
        self.idClass = aIdClass
        self.Id = aId
        self.__updateArgument__(aArgs)

    def __getId__(self):
        return self.Id

    def __getLnotab__(self):
        return self.lnotab

    def __getLocals__(self):
        return self.locals

    def __getTarget__(self):
        return self.idClass

    def __getArgs__(self):
        return self.argument
    
    def __getArgsValues__(self, aLocals):
        argValues = ()
        for name in self.argument:
            if aLocals.has_key(name):
                argValues = argValues + (aLocals[name],)
        #TODO: analizar caso para cuando sean tuple, list, dict
        return argValues

    def __updateArgument__(self, aArgs):
        #no se registra self como argumento valido
        for theArg in aArgs:
            if not theArg == 'self':
                self.argument += (theArg,)
        theParentId = self.Id
        if self.hT.FLAG_DEBUGG:
            for theIndex in range(len(aArgs)):
                if not aArgs[theIndex] == 'self':
                    print self.hT.itsEvents['register'],
                    print self.hT.itsObjects['local'],
                    print theIndex + 1,
                    print theParentId,
                    print aArgs[theIndex]
                    raw_input() 
                    
    def __registerLocals__(self, aLocal):
        self.locals.__update__(aLocal,self.Id,self.argument)
Ejemplo n.º 18
0
 def __init__(self, filename):
     # game setting parameters
     self.__my_dictionary = Dictionary(filename)
     #print(self.__my_dictionary)
     self.__word_length = 0
     self.__guess_num = 0
     self.__want_remaining_num = False
     # guessing status
     self.__remaining_word_list = []
     self.__guessed_letter_list = []
     self.__current_blanked_out_version = ""
    def action55000(self, lineNumber, symbol):
#        symbol = symbol['name']
        dictionary = Dictionary()
        # @todo problem Fields in record cannot have name equal to usual variables
        if dictionary.isFound(symbol):
            E.E(lineNumber, 0).varDoubleDefinition(symbol)
        else:
            _attrVar = AtrClasses.AttrVar(symbol)
            ss1 = SS1()
            ss1.push(None)
            ss1.push(_attrVar)
    def action22(self, lineNumber, symbol):
        ' not done '
#        symbol = symbol['name']
        dictionary = Dictionary()
        ss1 = SS1()
        ss1.push(symbol)
        
        _restype = ss1.pop()
        _attrProc = ss1.pop()
        _attrProc.setResType(_restype)
        dictionary.setObject(_attrProc.name, _attrProc)
 def getType(self, right):
     dictionary = Dictionary()
     isright = isinstance(right, AtrClasses.AttrElemAction) or isinstance(right, AtrClasses.AttrVar) or isinstance(right, AtrClasses.AttrIntConst) or isinstance(right, AtrClasses.AttrFloatConst)  or isinstance(right, AtrClasses.AttrStringConst) or isinstance(right, AtrClasses.AttrField) or isinstance(right, AtrClasses.AttrParam) or isinstance(right, AtrClasses.AttrRelation)
     if isright:
         rightType = right.type
     else:
         right = right.getValue()
         rightType = dictionary.get(right)
         if not rightType.has_key('object'): 
             E.E(self.lineNumber, 0).unknownIdentifier(right)
         rightType = rightType['object'].type
     return rightType
Ejemplo n.º 22
0
def main():
    dictionary = Dictionary(path)
    grid = process_input()
    word = ''
    result = []
    i = 0
    j = 0
    dir = -1
    startX = 0
    startY = 0
    while startX <= 3 and startY <= 3:
        letter = grid[i][j]
        word += letter
        # print(word)
        if add_valid_word(dictionary, result, word):
            word = ''
            dir = -1
            startX, startY = calcStart(startX, startY)
            i = startY
            j = startX
            continue
        if dictionary.is_partial_match(word):
            if j == 3:
                if i == 3:
                    break
                else:
                    i += 1
                    dir = 1
            else:
                j += 1
                dir = 0
        else:
            word = word[:-1]
            if len(word) == 0 or dir == -1:
                startX, startY = calcStart(startX, startY)
                i = startY
                j = startX
                continue
            if dir == 0:
                if startY - j == 0:
                    j += 1

                dir = 1

            else:
                if dir == 1:
                    if startX - i == 0:
                        i += 1
                    else:
                        word = ''
                        dir = -1

    print(result)
Ejemplo n.º 23
0
    def __init__(self):
        self.alphabet='abcdefghijklmnopqrstuvwxyz'
        self.guten="data/gutenburg_small.txt"
        self.guten_pickle="data/gutenburg_small.pickle"
        #self.american="words/american-english"
        self.gutenburg={}
        self.learned={}
        self.dictionary=Dictionary("usa")
        self.stopwords=Dictionary("stopwords")

        #self.Load_dictionary()
        self.Load_gutenburg()
        self.Load_learned()
Ejemplo n.º 24
0
 def test_keys_values_items(self):
     d = Dictionary()
     d['raymond'] = 'red'
     d['rachel'] = 'blue'
     keys = d.keys()
     self.assertTrue(isinstance(keys,list))
     self.assertEqual(set(keys), {'raymond', 'rachel'})
     values = d.values()
     self.assertTrue(isinstance(values,list))
     self.assertEqual(set(values), {'red', 'blue'})
     
     items = d.items()
     self.assertTrue(isinstance(values,list))
     self.assertEqual(set(items), {('raymond','red'), 
                                  ('rachel','blue')})
Ejemplo n.º 25
0
    def writeMergeOnDisk(self, dictionary, blockNum, postingListsPointersList, termsFreqPointersList):
        """
        private method to write the result block of merging on disk
        :param dictionary: result dictionary
        :param blockNum: number of the new block
        :param postingListsPointersList: list of pointers to posting lists of the terms
        :param termsFreqPointersList: list of pointers to terms frequencies
        :return:
        """
        DictionaryPath = PATH.format(self.indexDir, DICTIONARY_FILE_NAME + str(blockNum))
        FCDataPath = PATH.format(self.indexDir, FC_DATA_FILE_NAME + str(blockNum))
        DocsFreqPath = PATH.format(self.indexDir, DOCS_FREQ_FiLE_NAME + str(blockNum))
        PostingListsPointersPath = PATH.format(self.indexDir, POSTING_LISTS_POINTERS_FILE_NAME + str(blockNum))
        TermsFreqPointersPath = PATH.format(self.indexDir, TERMS_FREQ_POINTERS_FILE_NAME + str(blockNum))

        terms, docsFreq = self.getDetailsFromDict(dictionary)
        FCObj = Dictionary(terms, (COMPRESSION_TYPE, COMPRESSION_BLOCKS))
        encodeObj = PostingList([], VARIANT_ENCODE_TYPE)
        dictionaryStream = FCObj.str
        docsFreqStream = encodeObj.getEncode(docsFreq)
        FCData = self.getFCDataFromDict(FCObj.dict)
        FCDataStream = encodeObj.getEncode(FCData)

        self.writeToFile(DictionaryPath, dictionaryStream, 'a+')
        self.writeToFile(FCDataPath, FCDataStream, 'ab+')
        self.writeToFile(DocsFreqPath, docsFreqStream, 'ab+')

        postingPointersEncode = PostingList(postingListsPointersList, VARIANT_ENCODE_TYPE)
        termsFreqPointersEncode = PostingList(termsFreqPointersList, VARIANT_ENCODE_TYPE)

        postingListsPointersStream = postingPointersEncode.GetList()
        termsFreqPointersStream = termsFreqPointersEncode.GetList()

        self.writeToFile(PostingListsPointersPath, postingListsPointersStream, 'ab+')
        self.writeToFile(TermsFreqPointersPath, termsFreqPointersStream, 'ab+')
def init_dictionary(train_path, min_token_count):
    """
    Constructs a dictionary from Semantic Scholar JSONs found in 'train_path'.
    :param train_path: file path
        The path to the JSON documents meant for training / validation.
    :param min_token_count:
        The minimum number of times a word has to occur to be included.
    :return: A dictionary of training and development data.
    """
    all_training_examples = os.listdir(train_path)

    tokens = []
    for file in tqdm(all_training_examples):
        file_path = os.path.join(train_path, file)
        tokens += extract_tokens_from_json(file_path)

    # Map words to the number of times they occur in the dictionary.
    word_frequencies = dict(Counter(tokens))

    # Sieve the dictionary by excluding all words that appear fewer
    # than min_token_count times.
    vocabulary = set(
        [w for w, f in word_frequencies.items() if f >= min_token_count])

    # Construct the dictionary with the given vocabulary.
    dictionary = Dictionary(vocabulary)

    return dictionary
Ejemplo n.º 27
0
    def __init__(self,
                 corpus=None,
                 stop_words=None,
                 K=20,
                 alpha=0.5,
                 beta=0.5,
                 iterations=50):

        self.__vocabulary = Dictionary(stop_words, excluds_stopwords=False)
        docs = [
            self.__vocabulary.doc_to_ids(doc.get_text())
            for doc in corpus.get_documents()
        ]
        self.__V = self.__vocabulary.size(
        )  # number of different words in the vocabulary
        self.__K = K
        self.__alpha = numpy.ones(K) * alpha  # parameter of topics prior
        self.__docs = docs  # a list of documents which include the words
        self.__pers = []  # Array for keeping perplexities over iterations
        self.__beta = numpy.ones(
            self.__vocabulary.size()) * beta  # parameter of words prior
        self.__z_m_n = {}  # topic assignements for documents
        self.__n_m_z = numpy.zeros(
            (len(self.__docs),
             K))  # number of words assigned to topic z in document m
        self.__n_z_t = numpy.zeros(
            (K, self.__vocabulary.size()
             )) + beta  # number of times a word v is assigned to a topic z
        self.__theta = numpy.zeros(
            (len(self.__docs), K))  # topic distribution for each document
        self.__phi = numpy.zeros(
            (K, self.__vocabulary.size()
             ))  # topic-words distribution for whole of corpus
        self.__n_z = numpy.zeros(K) + self.__vocabulary.size(
        ) * beta  # total number of words assigned to a topic z
        self.__iterations = iterations

        for m, doc in enumerate(docs):  # Initialization
            for n, w in enumerate(doc):
                z = numpy.random.randint(
                    0, K
                )  # Randomly assign a topic to a word and increase the counting array
                self.__n_m_z[m, z] += 1
                self.__n_z_t[z, w] += 1
                self.__z_m_n[(m, n)] = z
                self.__n_z[z] += 1
Ejemplo n.º 28
0
    def getDictionaryFromFiles(self, blockNum):
        """
        private method to get the dictionary from the files on the disc
        :param blockNum: the number of the block to get data from
        :return: dictionary of terms
        """
        DictionaryPath = PATH.format(self.indexDir, DICTIONARY_FILE_NAME + str(blockNum))
        FCDataPath = PATH.format(self.indexDir, FC_DATA_FILE_NAME + str(blockNum))
        DocsFreqPath = PATH.format(self.indexDir, DOCS_FREQ_FiLE_NAME + str(blockNum))
        PostingListsPointersPath = PATH.format(self.indexDir, POSTING_LISTS_POINTERS_FILE_NAME + str(blockNum))
        TermsFreqPointersPath = PATH.format(self.indexDir, TERMS_FREQ_POINTERS_FILE_NAME + str(blockNum))

        dict, termsStr, fcData, docsFreq, postingListsPointers, termsFreqPointers = {}, '', [], [], [], []

        FCObj = Dictionary([], (COMPRESSION_TYPE, COMPRESSION_BLOCKS))
        encodeObj = PostingList([], VARIANT_ENCODE_TYPE)

        with open(DictionaryPath, 'r') as dictFid, open(FCDataPath, 'rb') as fcDataFid:
            termsStr = dictFid.read()
            fcDataStream = fcDataFid.read()
            fcData = encodeObj.variantDecode(fcDataStream)

        with open(DocsFreqPath, 'rb') as docsFreqFid:
            docsFreqStream = docsFreqFid.read()
            docsFreq = encodeObj.variantDecode(docsFreqStream)

        with open(PostingListsPointersPath, 'rb') as postingListsPointersFid, open(TermsFreqPointersPath,
                                                                                   'rb') as termsFreqPointersFid:
            postingListsPointersStream = postingListsPointersFid.read()
            termsFreqPointersStream = termsFreqPointersFid.read()
            postingListsPointers = encodeObj.variantDecode(postingListsPointersStream)
            termsFreqPointers = encodeObj.variantDecode(termsFreqPointersStream)

            postingListsPointers = getListFromGaps(postingListsPointers)
            termsFreqPointers = getListFromGaps(termsFreqPointers)

        fcData = self.getFCDataFromFile(fcData)
        terms = []
        FCObj.str = termsStr
        for block in fcData:
            terms.extend(FCObj.getListOfWords(block))

        for i, term in enumerate(terms):
            dict[term] = [docsFreq[i], -1, postingListsPointers[i], termsFreqPointers[i]]

        return dict, terms
Ejemplo n.º 29
0
    def getDictionaryFromFiles(self):
        DictionaryPath = PATH.format(self.indexDir, DICTIONARY_FILE_NAME)
        FCDataPath = PATH.format(self.indexDir, FC_DATA_FILE_NAME)
        DocsFreqPath = PATH.format(self.indexDir, DOCS_FREQ_FiLE_NAME)
        PostingListsPointersPath = PATH.format(self.indexDir, POSTING_LISTS_POINTERS_FILE_NAME)
        TermsFreqPointersPath = PATH.format(self.indexDir, TERMS_FREQ_POINTERS_FILE_NAME)

        if not (os.path.isfile(DictionaryPath)) or not (os.path.isfile(FCDataPath)) \
                or not (os.path.isfile(DocsFreqPath)) or not (os.path.isfile(PostingListsPointersPath)) \
                or not (os.path.isfile(TermsFreqPointersPath)):
            print('Error - Invalid File Path! Please Enter a Valid Path..')
            exit(0)
        dict, termsStr, fcData, docsFreq, postingListsPointers, termsFreqPointers = {}, '', [], [], [], []

        FCObj = Dictionary([], (COMPRESSION_TYPE, COMPRESSION_BLOCKS))
        encodeObj = PostingList([], VARIANT_ENCODE_TYPE)

        with open(DictionaryPath, 'r') as dictFid, open(FCDataPath, 'rb') as fcDataFid:
            termsStr = dictFid.read()
            fcDataStream = fcDataFid.read()
            fcData = encodeObj.variantDecode(fcDataStream)

        with open(DocsFreqPath, 'rb') as docsFreqFid:
            docsFreqStream = docsFreqFid.read()
            docsFreq = encodeObj.variantDecode(docsFreqStream)

        with open(PostingListsPointersPath, 'rb') as postingListsPointersFid, open(TermsFreqPointersPath,
                                                                                   'rb') as termsFreqPointersFid:
            postingListsPointersStream = postingListsPointersFid.read()
            termsFreqPointersStream = termsFreqPointersFid.read()
            postingListsPointers = encodeObj.variantDecode(postingListsPointersStream)
            termsFreqPointers = encodeObj.variantDecode(termsFreqPointersStream)

            postingListsPointers = getListFromGaps(postingListsPointers)
            termsFreqPointers = getListFromGaps(termsFreqPointers)

        fcData = self.getFCDataFromFile(fcData)
        terms = []
        FCObj.str = termsStr
        for block in fcData:
            terms.extend(FCObj.getListOfWords(block))
        self.listOfTerms = terms
        for i, term in enumerate(terms):
            dict[term] = [docsFreq[i], -1, postingListsPointers[i], termsFreqPointers[i]]

        return dict
Ejemplo n.º 30
0
def main():
    path = "/path/to/corpus"
    # コーパスの読み込み
    # 前処理
    documents = []
    for filename in os.listdir(path):
        document = open(os.path.join(path, filename)).read().strip()
        document = preprocess(document)
        documents.append(document)

    # 辞書を作成
    # 低頻度すぎるもの, 高頻度すぎる単語は除去
    dictionary = Dictionary(documents)
    dictionary.filter_extremes(no_below=3, no_above=0.6)

    # 辞書の保存
    dictionary.save("id2word.txt")

    # vocabulary の表示
    vocabulary = dictionary.get_vocabulary()
    print vocabulary, "(%d words)" % len(vocabulary)

    # 特徴量(BoWベクトル)への変換
    for document in documents:
        bowvec = dictionary.doc2bow(document)
Ejemplo n.º 31
0
    def erase(self, key):
        node_to_be_erased = Dictionary._find(self._root, key)[0]
        if node_to_be_erased is None:
            return

        self._size -= 1

        replacing_node = Dictionary._get_left_most(node_to_be_erased.right_son)
        if replacing_node is None:
            replacing_node = node_to_be_erased

        node_to_be_erased.key, node_to_be_erased.value = replacing_node.key, replacing_node.value

        if replacing_node != self._root:
            self._erase(replacing_node)
        else:
            # Delete the root
            self._root = None
Ejemplo n.º 32
0
 def __init__(self, aHt, aId, aCode, aLnotab, aArgs):
     self.hT = aHt
     self.locals = Dictionary(self.hT)
     self.argument = ()
     self.lnotab = aLnotab
     self.code = aCode
     self.name = aCode.co_name
     self.Id = aId
     self.__updateArgument__(aArgs)
    def action21(self, lineNumber, symbol):
        ' not done '
#        symbol = symbol['name']
        dictionary = Dictionary()
        _attrProc = AtrClasses.AttrProc()
        ss1 = SS1()
        ss1.push(symbol)
        
        while not ss1.top() == 'proc_params':
            _type = ss1.pop()
            while not ss1.top() is None:
                attr = ss1.pop()
                attr.type = dictionary.get(symbol)
                _attrProc.addParam(attr)
            ss1.pop()
        ss1.pop()
        _name = ss1.pop()
        _attrProc.setName(_name)
        dictionary.setObject(_name, _attrProc)
        ss1.push(_attrProc)
Ejemplo n.º 34
0
class DictonaryTests(unittest.TestCase):
    
    def setUp(self):
        self.testDic                = Dictionary('../words')
        self.expectedTestFriends    = ('lest','vest','telt','tests','teste','text','nest','teat','rest','testa','testy','fest','pest','tost','jest','gest','yest','hest','tent')
        self.expectedLeviathens     = set()

    def test_size(self):
        self.assertEqual(len(self.testDic._dictonary), 380645, "The size of the dictionary is off.  Check file and constructor")
    
    def test_levenshtein(self):
        self.assertEqual(len(self.testDic._levenshtein('test')), 238)
        #self.assertItemsEqual(self.testDic._leviathens('test').sort(), self.expectedLeviathens,"Set of leviathens don't match")

    def test_friends(self):
        self.assertEqual(len(self.testDic.friends('test')), 19)
        self.assertItemsEqual(self.testDic.friends('test'), self.expectedTestFriends )
        
    def test_networkSize(self):
        self.assertEqual(len(self.testDic.network('test')), 64413)
        pass
    def action19000(self, lineNumber, symbol):
#        symbol = symbol['name']
        ss1 = SS1()
        _attrRecord = AtrClasses.AttrRecord()
        
        dictionary = Dictionary()
        while not ss1.top() == 'record':
            field_type = ss1.pop()
            while not ss1.top() is None:
                field = ss1.pop()
                _attrField = AtrClasses.AttrField(field)
                # local variables in record
                dictionary.deleteSymbol(field.name)
                _attrField.bindType(field_type['name'])
                _attrRecord.addField( _attrField )
            ss1.pop()

        ss1.pop()
        name = ss1.pop()
        _attrRecord.setName(name)
        ss1.push( _attrRecord )
Ejemplo n.º 36
0
def main(dir_name):
    dict_inst = Dictionary()
    bigram_filename = "CAE_bigrams.txt"
    dict_inst.build_english_bigrams(bigram_filename, sys.argv[1])

    trigram_filename = "CAE_trigrams.txt"
    dict_inst.build_english_trigrams(trigram_filename, sys.argv[1])

    # fourgram_filename = "CAE_fourgrams.txt"
    # dict_inst.build_english_fourgrams(fourgram_filename, sys.argv[1])

    # fivegram_filename = "CAE_fivegrams.txt"
    # dict_inst.build_english_fivegrams(fivegram_filename, sys.argv[1])

    # validate_dictionary(dict_inst, 2)
    # validate_dictionary(dict_inst, 3)
    # validate_dictionary(dict_inst, 4)
    # validate_dictionary(dict_inst, 5)

    # custom_doc = "Test_English_Corpus_Read.txt"
    # dict_inst.build_english_corpus(custom_doc, sys.argv[1])
    # validate_custom_bigram_dict(dict_inst)
    # validate_custom_trigram_dict(dict_inst)

    fluency_processing_inst = FluencyProcessing()
    validate_fluency_processor(dict_inst, fluency_processing_inst)
Ejemplo n.º 37
0
def main(preprocessed_node_path, argument_path, cfg_path, dictionary_path):
    preprocessed_node_path = Path(args.preprocessed_node_path)
    argument_path = Path(args.argument_path)
    cfg_path = Path(cfg_path)
    dictionary_path = Path(args.dictionary_path)

    dictionary_parameters = cfg.load(cfg_path)['dictionary']

    #argument_generator_getter = lambda: utils.load(argument_path)

    #argument_nodes_ids = set((
    #        node_id
    #        for argument in argument_generator_getter()
    #        for node_id in argument[0].values()))

    # Use the set of ids to select only the relevant nodes
    # (and not train nlp models on all documents).
    #preprocessed_node_generator_getter = lambda : filter(
    #        lambda node: node['id'] in argument_nodes_ids,
    #        utils.load( preprocessed_node_path))

    corpus = ([token for token in node['lemmas'] if token.isalpha()]
              for node in utils.load(preprocessed_node_path))

    dictionary = Dictionary()
    dictionary.fit(corpus)
    dictionary.save(dictionary_path)
Ejemplo n.º 38
0
def data_batch(data, params, dictionary_path=None):
    cutoff = params["vocab_cutoff"]
    if dictionary_path is None:
        dictionary = Dictionary(data, cutoff=cutoff)
    else:
        dictionary = Dictionary()
        dictionary.load(data)
    pad_id = dictionary.pad_id()
    batch_manager = BatchManager(data, params, pad_id)
    
    return batch_manager
Ejemplo n.º 39
0
	def __init__(self):
		self.dict = Dictionary()
		#build CCAE dictionaries:
		bigram_filename = "CAE_bigrams.txt"
		trigram_filename = "CAE_trigrams.txt"
		# self.dict.build_english_bigrams(bigram_filename, "data")
		# self.dict.build_english_trigrams(trigram_filename, "data")
		self.dict.build_english_corpus("google_translate.txt", "data")
		# self.stem_helper_inst = StemHelper()
		# self.preProcessors = [ConjugationPreProcessor(), PluralPreProcessor(), QuePreProcessor()]
		self.preProcessors = [ConjugationPreProcessor(), PluralPreProcessor(), QuePreProcessor()]
		# self.preProcessors = []
		#add plural processor back in
		# self.postProcessors = [AdjectivePostProcessor(), ArticlePostProcessor(), ConjugationPostProcessor(), PluralPostProcessor()]
		self.postProcessors = [AdjectivePostProcessor(), ArticlePostProcessor(), ConjugationPostProcessor(), PluralPostProcessor()]
		# self.postProcessors = []

		corpusFilename = "Project_Dev_Sentences.txt"
		googleTranslate = "Translation_Strict_Keys.txt"
		self.dict.build_custom_dictionary(corpusFilename, "data", googleTranslate)
		self.spanish_stemmer = snowballstemmer.stemmer('spanish');
		self.fluency_processor_inst = FluencyProcessing()
Ejemplo n.º 40
0
    def initialize(self, depth=int):
        root = Dictionary()
        io = IOHandler()

        io.countLetter()

        lst = io.getLetter()
        
        while True:
            try:
                tmp = lst.popitem()
                self.__totalLetter += int(tmp[1])
                root.addChild(Dictionary(tmp[1], io.mapCharacter(tmp[0]), tmp[0]))
            except KeyError:
               break
        self.dictionary = root
        self.dictionary.setCount(self.__totalLetter)



        if depth <= 2:
            return False

        for currentDepth in range(2, depth + 1):
            io.loadWords(currentDepth)
            lst = io.getWords()
            while True:
                try:
                    tmp = lst.popitem()
                    #if not root.probe(tmp[0]):
                        #word is unknown
                    self.__addWord(tmp, root, io)
                except KeyError:
                    break
            self.dictionary = root
            self.depth = currentDepth
            self.save("TrainerDepth", currentDepth)
            print("DEBUG Current depth: ", currentDepth)
Ejemplo n.º 41
0
    def preproc(self):
        """ normalize the data (clean/remove problematic characters) for processing """

        print 'Pre-processing dictionary...'
        
        # remove words that contain non-alphanumeric characters
        dicto_new = Dictionary()
        alpha = re.compile('[\W]')
        num = re.compile('[0-9]')
        for word in self.dicto.get_words_iter():
            word_str = word.string
            
            # remove numbers from words
            word_str = num.sub('', word_str)
            if len(word_str) <= 0:
                continue
            
            if not alpha.search(word_str):
                word.set_string(word_str)
                dicto_new.add_word(word)
                
        print 'Done.'
        
        self.dicto = dicto_new
Ejemplo n.º 42
0
 def __init__(self, aHt, aClassId, aCode, aLnotab):
     self.hT = aHt
     self.staticField = Dictionary(self.hT)
     self.attributes = Dictionary(self.hT)
     self.method = Dictionary(self.hT)
     self.lnotab = aLnotab
     self.code = aCode
     self.name = aCode.co_name
     self.Id = aClassId
     self.SpecialBehaviorId = -1
Ejemplo n.º 43
0
    def main(self):
        dic = Dictionary()  #inicializa dicionário
        dic.load()  #carrega palavras
        self.__load_document()  #carrega documento de entrada

        for txt in self.__mtext:
            for i in range(0, len(txt)):
                if not dic.contains(txt[i]) and txt[i] not in self.__ignored:
                    self.__consult_user(dic, txt, i)
                else:
                    pass

        self.__save_document()  #salva texto corrigido no documento de saída
        dic.save()  #salva dicionário
	def add_terms_weight(self):
		con = DB.connect()
		table = self._table
		cur = con.cursor()

		email_count = Emails.get_email_count()

		dictionary = Dictionary.fetch_all()

		tokens = Tokens.fetch_token_all()

		for token in tokens:
			tfidf = self.calc_tfidf(email_count, token[Tokens.tf], dictionary[token[Tokens.term]])
			cur.execute("INSERT INTO %s VALUES (%ld, %s, %d, %s)" % (table, token[Tokens.email_id], token[Tokens.term], tfidf, token[Tokens.classs]))

		DB.close(con)
Ejemplo n.º 45
0
class DirectTranslator:
    def __init__(self):
        self.dictionary = Dictionary()
    
    def translateSentence(self, foreignSentence):
        translatedTokens = []
        translatedSentence = ""
        spanishTokens = re.compile('(\W+)', re.UNICODE).split(unicode(foreignSentence, 'utf-8'))
                
        for token in spanishTokens:
            translatedWords = self.dictionary.englishWordsForSpanishWord(token)
            if translatedWords:
                translatedWord = translatedWords[0]
                translatedTokens.append(translatedWord)
            else:
                translatedTokens.append(token)
                
        for token in translatedTokens:
            translatedSentence = translatedSentence + token
        
        return translatedSentence
Ejemplo n.º 46
0
    def __init__(self, processes=1):
        #number of threads
        if processes > 0:
            self.processes = processes
        else:
            self.processes = 1
        #load word frequency and spell checker
        self.spelling = Spelling()
        #load the dictionaries
        self.jargon = Dictionary("slang")
        self.dictionary = self.spelling.dictionary
        self.stopwords = self.spelling.stopwords

        self.a = [
            "a", "4", "@", "/-\\", "/\\", "/_\\", "^", "aye", "ci", "λ", "∂",
            "//-\\\\", "/=\\", "ae"
        ]
        self.b = [
            "b", "8", "|3", "6", "13", "l3", "]3", "|o", "1o", "lo", "ß",
            "]]3", "|8", "l8", "18", "]8"
        ]
        self.c = ["c", "(", "<", "[", "{", "sea", "see", "k", "©", "¢", "€"]
        self.d = [
            "d", "|]", "l]", "1]", "|)", "l)", "1)", "[)", "|}", "l]", "1}",
            "])", "i>", "|>", "l>", "1>", "0", "cl", "o|", "o1", "ol", "Ð",
            "∂", "ð"
        ]
        self.e = ["e", "3", "&", "[-", "€", "ii", "ə", "£", "iii"]
        self.f = ["f", "|=", "]=", "}", "ph", "(=", "[=", "ʃ", "eph", "ph"]
        self.g = [
            "g", "6", "9", "&", "(_+", "C-", "gee", "jee", "(Y,", "cj", "[",
            "-", "(γ,", "(_-"
        ]
        self.h = [
            "h", "|-|", "#", "[-]", "{-}", "]-[", ")-(", "(-)", ":-:", "}{",
            "}-{", "aych", "╫", "]]-[[", "aech"
        ]
        self.i = ["!", "1", "|", "l", "eye", "3y3", "ai", "i"]
        self.j = [
            "j", "_|", "_/", "]", "</", "_)", "_l", "_1", "¿", "ʝ", "ul", "u1",
            "u|", "jay", "(/", "_]"
        ]
        self.k = [
            "k", "x", "|<", "|x", "|{", "/<", "\\<", "/x", "\\x", "ɮ", "kay"
        ]
        self.l = ["l", "1", "7", "|_", "1_", "l_", "lJ", "£", "¬", "el"]
        self.m = [
            "m", "/\/\\", "|\\/|", "em", "|v|", "[v]", "^^", "nn",
            "//\\\\//\\\\", "(V)", "(\/)", "/|\\", "/|/|", ".\\\\", "/^^\\",
            "/V\\", "|^^|", "JVL", "][\\\\//][", "[]\/[]", "[]v[]", "(t)"
        ]
        self.n = [
            "n", "|\\|", "/\\/", "//\\\\//", "[\\]", "<\\>", "{\\}", "//",
            "[]\\[]", "]\\[", "~", "₪", "/|/", "in"
        ]
        #the ω is because Ω is mistakenly taken as that character sometimes...
        self.o = [
            "o", "0", "()", "oh", "[]", "{}", "¤", "Ω", "ω", "*", "[[]]", "oh"
        ]
        self.p = [
            "p", "|*", "l*", "1*", "|o", "lo", "1o", "|>", "l>", "1>", "|\"",
            "l\"", "1\"", "?", "9", "[]d", "|7", "l7", "17", "q", "|d", "ld",
            "1d", "℗", "|º", "1º", "lº", "þ", "¶", "pee"
        ]
        self.q = [
            "q", "0_", "o_", "0,", "o,", "(,)", "[,]", "<|", "<l", "<1", "cue",
            "9", "¶", "kew"
        ]
        self.r = [
            "r", "|2", "l2", "12", "2", "/2", "I2", "|^", "l^", "1^", "|~",
            "l~", "1~", "lz", "[z", "|`", "l`", "1`", ".-", "®", "Я", "ʁ",
            "|?", "l?", "1?", "arr"
        ]
        self.s = ["s", "5", "$", "z", "es", "2", "§", "š", ",,\\``"]
        self.t = ["t", "7", "+", "-|-", "-l-", "-1-", "1", "']['", "†"]
        self.u = [
            "u", "|_|", "l_l", "1_1", "(_)", "[_]", "{_}", "y3w", "m", "\\_/",
            "\\_\\", "/_/", "µ", "yew", "yoo", "yuu"
        ]
        self.v = ["v", "\\/", "\\\\//", "√"]
        self.w = [
            "w", "\\/\\/", "vv", "'//", "\\\\'", "\\^/", "(n)", "\\x/", "\\|/",
            "\\_|_/", "\\_l_/", "\\_1_/", "\\//\\//", "\\_:_/", "]i[", "uu",
            "Ш", "ɰ", "1/\\/", "\\/1/", "1/1/"
        ]
        self.x = [
            "x", "%", "><", "><,", "}{", "ecks", "x", "*", ")(", "ex", "Ж", "×"
        ]
        self.y = [
            "y", "j", "`/", "`(", "-/", "'/", "\\-/", "Ψ", "φ", "λ", "Ч", "¥",
            "``//", "\\j", "wai"
        ]
        self.z = ["z", "2", "~/_", "%", "7_", "ʒ", "≥", "`/_"]
        self.zero = ["0", "o", "zero", "cero", "()"]
        self.one = ["1", "won", "one", "l", "|", "]["]
        self.two = ["two", "to", "too", "2", "z"]
        self.three = ["e", "3", "three"]
        self.four = ["4", "four", "for", "fore", "a"]
        self.five = ["5", "five", "s"]
        self.six = ["6", "six", "g"]
        self.seven = ["7", "seven", "t", "l"]
        self.eight = ["8", "eight", "b"]
        self.nine = ["9", "nine", "g"]

        #"0":self.zero,"1":self.one,"2":self.two,"3":self.three,"4":self.four,"5":self.five,"6":self.six,"7":self.seven,"8":self.eight,"9":self.nine
        self.alphabet = {
            "a": self.a,
            "b": self.b,
            "c": self.c,
            "d": self.d,
            "e": self.e,
            "f": self.f,
            "g": self.g,
            "h": self.h,
            "i": self.i,
            "j": self.j,
            "k": self.k,
            "l": self.l,
            "m": self.m,
            "n": self.n,
            "o": self.o,
            "p": self.p,
            "q": self.q,
            "r": self.r,
            "s": self.s,
            "t": self.t,
            "u": self.u,
            "v": self.v,
            "w": self.w,
            "x": self.x,
            "y": self.y,
            "z": self.z
        }
import tkinter as tk
import os
import cv2
import numpy as np
from PIL import Image
import re
import pickle
import sys
from HashTable import HashTable
from Dictionary import Dictionary

colors = Dictionary(
    zip(['blue', 'red', 'green', 'white', 'black', 'yellow'], [(255, 0, 0),
                                                               (0, 0, 255),
                                                               (0, 255, 0),
                                                               (255, 255, 255),
                                                               (0, 0, 0),
                                                               (0, 255, 255)]))


class Person:
    def __init__(self, lastName="", firstName="", ID="", email=""):
        self.lastName = lastName
        self.firstName = firstName
        self.ID = ID
        self.email = email

    def __str__(self):
        getframe_expr = 'sys._getframe({}).f_code.co_name'
        caller = eval(getframe_expr.format(2))
        if caller is "insert":
Ejemplo n.º 48
0
class LeetSpeak:
    def __init__(self, processes=1):
        #number of threads
        if processes > 0:
            self.processes = processes
        else:
            self.processes = 1
        #load word frequency and spell checker
        self.spelling = Spelling()
        #load the dictionaries
        self.jargon = Dictionary("slang")
        self.dictionary = self.spelling.dictionary
        self.stopwords = self.spelling.stopwords

        self.a = [
            "a", "4", "@", "/-\\", "/\\", "/_\\", "^", "aye", "ci", "λ", "∂",
            "//-\\\\", "/=\\", "ae"
        ]
        self.b = [
            "b", "8", "|3", "6", "13", "l3", "]3", "|o", "1o", "lo", "ß",
            "]]3", "|8", "l8", "18", "]8"
        ]
        self.c = ["c", "(", "<", "[", "{", "sea", "see", "k", "©", "¢", "€"]
        self.d = [
            "d", "|]", "l]", "1]", "|)", "l)", "1)", "[)", "|}", "l]", "1}",
            "])", "i>", "|>", "l>", "1>", "0", "cl", "o|", "o1", "ol", "Ð",
            "∂", "ð"
        ]
        self.e = ["e", "3", "&", "[-", "€", "ii", "ə", "£", "iii"]
        self.f = ["f", "|=", "]=", "}", "ph", "(=", "[=", "ʃ", "eph", "ph"]
        self.g = [
            "g", "6", "9", "&", "(_+", "C-", "gee", "jee", "(Y,", "cj", "[",
            "-", "(γ,", "(_-"
        ]
        self.h = [
            "h", "|-|", "#", "[-]", "{-}", "]-[", ")-(", "(-)", ":-:", "}{",
            "}-{", "aych", "╫", "]]-[[", "aech"
        ]
        self.i = ["!", "1", "|", "l", "eye", "3y3", "ai", "i"]
        self.j = [
            "j", "_|", "_/", "]", "</", "_)", "_l", "_1", "¿", "ʝ", "ul", "u1",
            "u|", "jay", "(/", "_]"
        ]
        self.k = [
            "k", "x", "|<", "|x", "|{", "/<", "\\<", "/x", "\\x", "ɮ", "kay"
        ]
        self.l = ["l", "1", "7", "|_", "1_", "l_", "lJ", "£", "¬", "el"]
        self.m = [
            "m", "/\/\\", "|\\/|", "em", "|v|", "[v]", "^^", "nn",
            "//\\\\//\\\\", "(V)", "(\/)", "/|\\", "/|/|", ".\\\\", "/^^\\",
            "/V\\", "|^^|", "JVL", "][\\\\//][", "[]\/[]", "[]v[]", "(t)"
        ]
        self.n = [
            "n", "|\\|", "/\\/", "//\\\\//", "[\\]", "<\\>", "{\\}", "//",
            "[]\\[]", "]\\[", "~", "₪", "/|/", "in"
        ]
        #the ω is because Ω is mistakenly taken as that character sometimes...
        self.o = [
            "o", "0", "()", "oh", "[]", "{}", "¤", "Ω", "ω", "*", "[[]]", "oh"
        ]
        self.p = [
            "p", "|*", "l*", "1*", "|o", "lo", "1o", "|>", "l>", "1>", "|\"",
            "l\"", "1\"", "?", "9", "[]d", "|7", "l7", "17", "q", "|d", "ld",
            "1d", "℗", "|º", "1º", "lº", "þ", "¶", "pee"
        ]
        self.q = [
            "q", "0_", "o_", "0,", "o,", "(,)", "[,]", "<|", "<l", "<1", "cue",
            "9", "¶", "kew"
        ]
        self.r = [
            "r", "|2", "l2", "12", "2", "/2", "I2", "|^", "l^", "1^", "|~",
            "l~", "1~", "lz", "[z", "|`", "l`", "1`", ".-", "®", "Я", "ʁ",
            "|?", "l?", "1?", "arr"
        ]
        self.s = ["s", "5", "$", "z", "es", "2", "§", "š", ",,\\``"]
        self.t = ["t", "7", "+", "-|-", "-l-", "-1-", "1", "']['", "†"]
        self.u = [
            "u", "|_|", "l_l", "1_1", "(_)", "[_]", "{_}", "y3w", "m", "\\_/",
            "\\_\\", "/_/", "µ", "yew", "yoo", "yuu"
        ]
        self.v = ["v", "\\/", "\\\\//", "√"]
        self.w = [
            "w", "\\/\\/", "vv", "'//", "\\\\'", "\\^/", "(n)", "\\x/", "\\|/",
            "\\_|_/", "\\_l_/", "\\_1_/", "\\//\\//", "\\_:_/", "]i[", "uu",
            "Ш", "ɰ", "1/\\/", "\\/1/", "1/1/"
        ]
        self.x = [
            "x", "%", "><", "><,", "}{", "ecks", "x", "*", ")(", "ex", "Ж", "×"
        ]
        self.y = [
            "y", "j", "`/", "`(", "-/", "'/", "\\-/", "Ψ", "φ", "λ", "Ч", "¥",
            "``//", "\\j", "wai"
        ]
        self.z = ["z", "2", "~/_", "%", "7_", "ʒ", "≥", "`/_"]
        self.zero = ["0", "o", "zero", "cero", "()"]
        self.one = ["1", "won", "one", "l", "|", "]["]
        self.two = ["two", "to", "too", "2", "z"]
        self.three = ["e", "3", "three"]
        self.four = ["4", "four", "for", "fore", "a"]
        self.five = ["5", "five", "s"]
        self.six = ["6", "six", "g"]
        self.seven = ["7", "seven", "t", "l"]
        self.eight = ["8", "eight", "b"]
        self.nine = ["9", "nine", "g"]

        #"0":self.zero,"1":self.one,"2":self.two,"3":self.three,"4":self.four,"5":self.five,"6":self.six,"7":self.seven,"8":self.eight,"9":self.nine
        self.alphabet = {
            "a": self.a,
            "b": self.b,
            "c": self.c,
            "d": self.d,
            "e": self.e,
            "f": self.f,
            "g": self.g,
            "h": self.h,
            "i": self.i,
            "j": self.j,
            "k": self.k,
            "l": self.l,
            "m": self.m,
            "n": self.n,
            "o": self.o,
            "p": self.p,
            "q": self.q,
            "r": self.r,
            "s": self.s,
            "t": self.t,
            "u": self.u,
            "v": self.v,
            "w": self.w,
            "x": self.x,
            "y": self.y,
            "z": self.z
        }

    def ConvertToLeet(self, text):
        """
        This is fairly straightforward. Randomly select letters from the array of letters and output it.
        """
        leet = ""

        for letter in list(text):
            if letter.isalpha() and self.alphabet[letter.lower()]:
                values = self.alphabet[letter.lower()]
                random.seed()
                number = random.randint(1, len(values))
                leet += values[number - 1]
            else:
                leet += letter

        return leet

    def rec_parse(self, text, previous=[]):
        """
        Input: 
        Output: 
        """
        possibilities = []
        text_length = len(list(text))

        if text_length > 7:
            length = 8
        else:
            length = text_length

        for q in range(1, length):
            if q < len(text):
                possibilities.append(previous +
                                     [text[0:q], text[q:text_length]])
                possibilities += self.rec_parse(text[q:text_length],
                                                previous + [text[0:q]])

        return possibilities

    def rec_scan_array(self, array, previous=[]):
        """
        Input: [['h'], ['e'], ['i', 'l', 't'], ['i', 'l', 't'], ['d', 'o']]
        Output:
         ['h','e','i','i','d'],
         ['h','e','i','i','o'],
         ['h','e','i','1','d'],
         ['h','e','i','1','o'],
         ...
        """

        words = []

        passon = copy.copy(array)
        passon.pop(0)

        if len(array) > 0:
            for let in array[0]:
                letters = copy.copy(previous)
                letters.append(let)

                if len(passon) > 0:
                    words += self.rec_scan_array(passon, letters)
                if len(array) == 1:
                    words.append("".join(letters))

                del letters

        del passon

        return words

    def ConvertFromLeet(self, text):
        """
        Convert leet to readable English text. Find all possible words, check which are English, check for misspellings, etc.
        
        Uses self.processes, so when creating the LeetSpeak() object, you can specify the number of threads to use: l=LeetSpeak(threads=3)
        """
        #figure out how many words each thread should work on
        split = text.split(" ")
        thread_count = {}
        thread_words = {}
        thread_num = 1

        for word in split:
            #add word to the array for the current thread
            if thread_num in thread_count:
                thread_count[thread_num] += 1
            else:
                thread_count[thread_num] = 1
                thread_words[thread_num] = []

            #up the thread_num unless it is currently at the number of threads we want, then set it to 1 to start over again
            if self.processes > thread_num:
                thread_num += 1
            else:
                thread_num = 1

        #compute what words each thread should decode
        for num, word in enumerate(split):
            for thread, words in thread_words.items():
                if len(words) < thread_count[thread]:
                    thread_words[thread].append(word)
                    break

        #INFORMATION:
        #if self.processes = 3 and text = "cows are cool or not", thread_words={1: ['cows', 'are'], 2: ['cool', 'or'], 3: ['not']}

        #create the processes
        threads = {}
        num_threads = len(thread_words)
        result_english = ""
        thread_results = {}
        receive_pipe, send_pipe = Pipe()

        for i in range(self.processes):
            if num_threads >= i + 1:
                threads[i] = Process(target=self.ConvertFromLeet_thread,
                                     args=(thread_words[i + 1], i, send_pipe))
                threads[i].start()

        #start and wait for threads
        for i in range(self.processes):
            if num_threads >= i + 1:
                threads[i].join()
                result = receive_pipe.recv()
                thread_results[result[0]] = result[1]

        #close the pipe
        send_pipe.close()

        #sort the results
        thread_results = sorted(thread_results.items())

        #make a string out of the results
        for thread, string in thread_results:
            result_english += string + " "

        return result_english.strip()

    def ConvertFromLeet_thread(self, text, thread_id, pipe):
        """
        The function that ConvertFromLeet() calls for each thread.
        """
        english = []

        #convert each word
        for word in text:
            #get all the character locations less than 8 (e.g. "c,ow", "co,w", and "cow" for "cow")
            #this uses some recursive substringing
            possibilities = self.rec_parse(word.lower())

            #append the actual "word" if it is less than 8 characters, since it might be a single letter (e.g. "n" for "and")
            if len(word) <= 8:
                possibilities.append([word.lower()])

            #calculate what this could be in leet (if it can be anything)
            validwords = []
            for possibility in possibilities:
                letters = []
                valid = 1
                for char in possibility:
                    chars = []
                    for let, val in self.alphabet.items():
                        if char in val:
                            chars.append(let)
                    if len(chars) == 0:
                        valid = 0
                        break
                    else:
                        letters.append(chars)

                    del chars
                if valid == 1 and len(letters) > 0:
                    #generate possible words from given letters
                    words = self.rec_scan_array(letters)
                    validwords += words
                    del words

            #print(validwords)

            #check which valid words are english if there's more than one option
            #go with the most frequently used english word
            if len(validwords) > 0:
                englishwords = {}

                for valid in validwords:
                    score = 1 + 5 / len(valid)

                    #computer talk
                    if self.jargon.Contains(valid) == True:
                        value = 2
                        jargon = self.jargon.Translate(valid)

                        if self.dictionary.Contains(jargon) == True:
                            value = 4

                        score += value

                        if len(jargon) > 0:
                            if jargon in englishwords:
                                englishwords[jargon] += value
                            else:
                                englishwords[jargon] = score

                            score = 0
                    #valid english
                    if len(valid) > 1 and self.dictionary.Contains(
                            valid) == True:
                        score += 5
                    #frequency words
                    if self.stopwords.Contains(valid):
                        score += self.spelling.Frequency(valid)
                    else:
                        score += 5 * self.spelling.Frequency(valid)
                    #same length
                    if len(word) == len(valid):
                        score += 0.1
                    #no numbers
                    if valid.isalpha() == True:
                        score += 1

                    englishwords[valid] = score

                #figure out what word is the most likely to be correctable
                check = []
                skip = 0
                for valid in englishwords:
                    if valid.isalpha():
                        #if there is already a good word in the list, then don't bother with looking up spell corrections
                        if self.dictionary.Contains(
                                valid) and len(valid) >= len(word) / 2:
                            skip = 1
                            check = []
                            break
                        else:
                            check.append(valid)
                if len(check) == 0 and skip == 0:
                    check.append(englishwords[0])
                #append the corrected version, hopefully
                for item in check:
                    corrected = self.spelling.Check(item,
                                                    dictionary=True,
                                                    fast=True)
                    if corrected != False and len(corrected) > 0:
                        word = corrected[0]

                        if word not in englishwords:
                            frequency = self.spelling.Frequency(word)
                            #if it is on the stop list, don't add as much weight
                            if self.stopwords.Contains(word):
                                value = frequency + 1
                            else:
                                value = 5 * frequency + 1
                            #add weight if in the dictionary
                            if self.dictionary.Contains(word) == True:
                                value += 1
                            #add weight if not numbers
                            if word.isalpha() == True:
                                value += 1
                            englishwords[word] = value
                        else:
                            #if one of the corrected words list is in the englishwords list then up that value by 0.1
                            for correct in corrected:
                                if correct in englishwords:
                                    englishwords[correct] += 0.1

                #get the most likely word
                final = sorted(englishwords.items(),
                               key=operator.itemgetter(1),
                               reverse=True)[0]
                #add word
                english.append(final[0])

        #send the result
        pipe.send([thread_id, " ".join(english)])
Ejemplo n.º 49
0
    def handle_message(self, message: Dict[str, Any],
                       bot_handler: Any) -> None:
        string = message['content'].split()
        content = "something went wrong"
        check = string[0].lower()
        if check == "calculate":
            content = Calculator.calculate(string)
        elif check == "coding_contest":
            content = Coding().getList()
        elif check.lower() == 'define':
            dictword = string[1]
            content = Dictionary.words(dictword)
        elif check.lower() == 'telljoke':
            content = Joke.tellJoke()
        elif check == "cricknews":
            content = Cricket().news()
        elif check == "proxy":
            if len(string) > 1:
                if string[1].lower() == "working":
                    content = Proxy.getWorkingProxy()
                    content = "Working Proxies in Your Area \n\n" + content
                elif string[1].lower() == "help":
                    content = Proxy.getHelpList()
                else:
                    content = WitHandler.getInfo(message['content'])
            else:
                content = Proxy.getProxyStatus()
                content = "Proxies Status--->\n\n" + content
        elif check.lower() == "play":
            try:
                pid = check_output(["pidof"], "mpg321")
                os.kill(int(pid), signal.SIGKILL)
                os.remove("hello.mp3")
                content = Music.main(string[1:])
            except:
                content = Music.main(string[1:])
            bot_handler.send_reply(message, "playing song ")
        elif check == "stop":
            pid = check_output(["pidof", "mpg321"])
            #print(int(pid))
            os.kill(int(pid), signal.SIGKILL)
            content = "Bye........:)"
            bot_handler.send_reply(message, content)
        elif check == "college_notice":
            content = Dean.getNotice()
        elif check == "add" and string[1] == "meeting":
            content = "Enter <Date> as <dd/mm/yyyy> <Time> as <hrs:min> and am/pm and purpose(one word)"

        elif len(string[0].split('/')) == 3:
            res = Meeting.AddMeeting(string)
            if res.lower() == "ok":
                content = "New Meeting successfully Added "
            else:
                content = res
        elif check == "show" and string[1].lower() == "meetings":
            content = Meeting.ShowMeeting()
        elif check == "pnr" and string[1].lower() == "status":
            content = Pnr.getpnr(string[2])
        elif check == "message" or check == "find" or check == "where":
            content = Send_message.sendMessage(string)
        # elif check=="mood":
        #     Mood.capture();
        elif check == "symptom":
            string_1 = " "
            gender = string[1]
            dob = string[2]
            st = string[3:]
            string_1 = string_1.join(st)
            content = Sympton.getExactSympton(string_1)
            try:
                content = "Please Tell me clearly\n" + content
            except:
                p = int(content)
                content = Sympton.getIssueId(str(p), gender, dob)
        elif check == "search":
            st = " "
            strlist = string[1:]
            st = st.join(strlist)
            st = FriendLocation.plot(st)
            if "https" in st:
                webbrowser.open(st)
                content = "check out below link \n" + st
            else:
                content = "Please type exact name :)\n" + st
        elif check == "getjobs":
            content = JOBS.getjobs()
        elif check == "translate":
            stri = " "
            stri = stri.join(list(string[1:]))
            content = Translate.translate(stri)
        elif check == "help":
            Help.Message()
            content = "Message sent"
        elif check == "nearby":
            content = Nearby.Place(string[1])
        else:
            #print(message['content'])
            content = WitHandler.getInfo(message['content'])
        bot_handler.send_reply(message, content)
Ejemplo n.º 50
0
    def writeOnDisk(self, dictionary, blockNum, postingLists):
        """
        private method to write sorted compressed dictionary and posting lists
        on desk, it will write 7 files for current block:
        first file is Dictionary[blockNum].txt which contains the compressed terms.
        second file is FCData[blockNum].txt which contains the compressed front coding
        data (block start, (term Length, Prefix length) ...).
        third file is DocsFreq[blockNum].txt which contains the documents frequencies.
        fourth file is PostingLists[blockNum].txt which contains the postings lists
        for all terms in dictionary.
        fifth file is TermsFreq[blockNum].txt which contains the terms frequency
        for each term in each posting list.
        sixth file is PostingListsPoitners[blockNum].txt which contains the pointers
        to the posting list for each term in the disc.
        seventh file is TermsFreqPointers[blockNum].txt which contains the pointers
        to the terms freq list for each term in the disc.
        :param dictionary: dictionary of words and docs frequency and pointer to posting lists
        :param blockNum: number of the block to write on the desk
        :param postingLists: list of posting lists for the dictionary
        :return: void
        """
        if not (os.path.isdir(self.indexDir)):
            os.mkdir(self.indexDir)

        DictionaryPath = PATH.format(self.indexDir, DICTIONARY_FILE_NAME + str(blockNum))
        PostingListsPath = PATH.format(self.indexDir, POSTING_LISTS_FILE_NAME + str(blockNum))
        FCDataPath = PATH.format(self.indexDir, FC_DATA_FILE_NAME + str(blockNum))
        DocsFreqPath = PATH.format(self.indexDir, DOCS_FREQ_FiLE_NAME + str(blockNum))
        TermsFreqPath = PATH.format(self.indexDir, TERMS_FREQ_FILE_NAME + str(blockNum))
        PostingListsPointersPath = PATH.format(self.indexDir, POSTING_LISTS_POINTERS_FILE_NAME + str(blockNum))
        TermsFreqPointersPath = PATH.format(self.indexDir, TERMS_FREQ_POINTERS_FILE_NAME + str(blockNum))

        terms, docsFreq = self.getDetailsFromDict(dictionary)
        FCObj = Dictionary(terms, (COMPRESSION_TYPE, COMPRESSION_BLOCKS))
        encodeObj = PostingList([], VARIANT_ENCODE_TYPE)
        dictionaryStream = FCObj.str
        docsFreqStream = encodeObj.getEncode(docsFreq)
        FCData = self.getFCDataFromDict(FCObj.dict)
        FCDataStream = encodeObj.getEncode(FCData)
        postingListsStream = bytearray()
        termsFreqStream = bytearray()

        self.writeToFile(DictionaryPath, dictionaryStream, 'a+')
        self.writeToFile(FCDataPath, FCDataStream, 'ab+')
        self.writeToFile(DocsFreqPath, docsFreqStream, 'ab+')

        postingListsPointer, termsFreqPointer = 0, 0
        postingListsPointersList, termsFreqPointersList = [], []
        for term in terms:
            currPostingList, currTermFreqList = [], []

            for pair in postingLists[dictionary[term][1]]:
                currPostingList.append(pair[0])
                currTermFreqList.append(pair[1])

            currEncode = PostingList(currPostingList, VARIANT_ENCODE_TYPE)
            currPostingListStream = currEncode.GetList()
            postingListsStream.extend(currPostingListStream)

            currTermFreqStream = currEncode.getEncode(currTermFreqList)
            termsFreqStream.extend(currTermFreqStream)

            postingListsPointersList.append(postingListsPointer)
            termsFreqPointersList.append(termsFreqPointer)

            postingListsPointer += len(currPostingListStream)
            termsFreqPointer += len(currTermFreqStream)

        postingPointersEncode = PostingList(postingListsPointersList, VARIANT_ENCODE_TYPE)
        termsFreqPointersEncode = PostingList(termsFreqPointersList, VARIANT_ENCODE_TYPE)

        postingListsPointersStream = postingPointersEncode.GetList()
        termsFreqPointersStream = termsFreqPointersEncode.GetList()

        self.writeToFile(PostingListsPointersPath, postingListsPointersStream, 'ab+')
        self.writeToFile(TermsFreqPointersPath, termsFreqPointersStream, 'ab+')

        self.writeToFile(PostingListsPath, postingListsStream, 'ab+')
        self.writeToFile(TermsFreqPath, termsFreqStream, 'ab+')
Ejemplo n.º 51
0
class GameManager(QtCore.QObject):

    start_move_first = QtCore.Signal()
    start_move_second = QtCore.Signal()
    ask_for_cells = QtCore.Signal()
    game_ended = QtCore.Signal(str)

    show_board = QtCore.Signal()

    @QtCore.Slot()
    def step_ended(self):
        if self.__current_id__ == FIRST_PLAYER:
            self.__current_id__ = SECOND_PLAYER
        else:
            self.__current_id__ = FIRST_PLAYER

        self.__number_of_spare_cells__ -= 1

    @QtCore.Slot(int)
    def get_number_of_cells(self, value):
        self.__number_of_spare_cells__ = value

    @QtCore.Slot()
    def game_ending(self):
        message = None
        if self.__players_number__ == 2:
            score1 = self.__player1__.get_score()
            score2 = self.__player2__.get_score()
            if score1 > score2:
                message = 'First player win'
            elif score1 == score2:
                message = 'Draw'
            else:
                message = 'Second player win'
        else:
            score1 = self.__player1__.get_score()
            score2 = self.__player2__.get_score()
            if score1 > score2:
                message = 'You win'
            elif score1 == score2:
                message = 'Draw'
            else:
                message = 'Computer win'
        self.game_ended.emit(message)

    def __init__(self,
                 language: Language,
                 width,
                 height,
                 players_number,
                 level=''):
        super(GameManager, self).__init__()
        self.__bot__ = Bot(language, width, height)
        self.__width__ = width
        self.__height__ = height
        self.__players_number__ = players_number

        self.__board__ = Board()
        self.__board__.init_board(width, height)

        self.__dictionary__ = Dictionary()
        self.__dictionary__.load_dictionary()
        self.__wc__ = WordCollector()
        self.__wc__.connect_to_dictionary(self.__dictionary__)
        self.__wc__.connect_to_board(self.__board__)

        self.__dictionary__.setup_connection(self.__wc__)
        self.__board__.setup_connection(self.__wc__)

        self.__first_word__ = self.__dictionary__.get_first_word(width)

        self.__player1__ = Player()
        self.__player2__ = Player()

        if players_number == 2:
            self.__player1__.connect_to_board(self.__board__)
            self.__player1__.connect_to_manager(self)

            self.__player2__.connect_to_board(self.__board__)
            self.__player2__.connect_to_manager(self)
        else:
            self.__player1__.connect_to_board(self.__board__)
            self.__player1__.connect_to_manager(self)
            self.__dictionary__.connect_to_bot(self.__bot__)
            self.__dictionary__.used_words_to_bot(self.__bot__)
            if level == 'EASY':
                self.__bot__.set_level(EASY)
            elif level == 'MEDIUM':
                self.__bot__.set_level(MEDIUM)
            elif level == 'HARD':
                self.__bot__.set_level(HARD)
            elif level == 'HARDEST':
                self.__bot__.set_level(HARDEST)
            self.__bot__.connect_to_board(self.__board__)
            self.__bot__.connect_to_manager(self)
            self.__bot__.connect_to_dictionary(self.__dictionary__)
            self.__bot__.get_dictionary()
            self.__bot__.connect_to_used_dictionary(self.__dictionary__)

        self.__current_player__ = self.__player1__
        self.__current_id__ = FIRST_PLAYER
        self.__number_of_spare_cells__ = width * (height - 1)

        if players_number == 2:
            self.__board__.connect_to_players(self.__player1__,
                                              self.__player2__)
            self.__board__.set_first_player(FIRST_PLAYER)
        else:
            self.__board__.connect_to_players(self.__player1__, self.__bot__)
            self.__board__.set_first_player(FIRST_PLAYER)

        self.ask_for_cells.connect(self.__board__.get_number_of_cells)
        self.start_move_first.connect(self.__player1__.begin_step)

        if players_number == 2:
            self.start_move_second.connect(self.__player2__.begin_step)
        else:
            self.start_move_second.connect(self.__bot__.begin_step)

        self.__board__.set_first_word(self.__first_word__)

    def run_game(self):
        if not self.is_game_ended():
            self.show_board.emit()
            if self.__players_number__ == 2:
                if self.__current_id__ == FIRST_PLAYER:
                    print("First player: your move")
                    self.start_move_first.emit()
                else:
                    print("Second player: your move")
                    self.start_move_second.emit()

            if self.__players_number__ == 1:
                if self.__current_id__ == FIRST_PLAYER:
                    print("First player: your move")
                    self.start_move_first.emit()
                else:
                    print("Second player: your move")
                    self.start_move_second.emit()
            return
        self.game_ending()

    def get_first_player(self):
        return self.__player1__

    def get_second_player(self):
        if self.__players_number__ == 2:
            return self.__player2__
        else:
            return self.__bot__

    def get_current_player(self):
        return self.__current_id__

    def get_first_word(self):
        return self.__first_word__

    def is_game_ended(self):
        self.ask_for_cells.emit()
        return self.__number_of_spare_cells__ == 0
Ejemplo n.º 52
0
class Class(object):
    def __init__(self, aHt, aClassId, aCode, aLnotab):
        self.hT = aHt
        self.staticField = Dictionary(self.hT)
        self.attributes = Dictionary(self.hT)
        self.method = Dictionary(self.hT)
        self.lnotab = aLnotab
        self.code = aCode
        self.name = aCode.co_name
        self.Id = aClassId
        self.SpecialBehaviorId = -1

    def __getId__(self):
        return self.Id

    def __getLnotab__(self):
        return self.lnotab

    def __addMethod__(self, aCode, aLocals):
        for theKey, theValue in aLocals.iteritems():
            if inspect.isfunction(theValue):
                if not (theKey == '__module__'):
                    theId = self.hT.itsId.__get__()
                    self.method.update({theKey: theId})
                    self.hT.itsId.__next__()

    def __addSpecialMethod__(self, aFileName):
        if self.method.has_key("%sStaticMethod" % self.name):
            return
        theId = self.hT.itsId.__get__()
        self.method.update({"%sStaticMethod" % self.name: theId})
        self.hT.itsId.__next__()
        self.hT.__registerSpecialMethod__("%sStaticMethod" % self.name, theId,
                                          self.Id, aFileName)
        self.SpecialBehaviorId = theId

    def __setStaticField__(self, aId, aValue, aFrameLineNo, aCurrentLasti,
                           aParentTimestamp, aDepth):
        theThreadId = self.hT.__getThreadId__(thread.get_ident())
        theCurrentTimestamp = self.hT.__convertTimestamp__(time.time())
        if not self.hT.itsProbe.has_key(
            (aCurrentLasti, self.SpecialBehaviorId)):
            theProbeId = self.hT.__registerProbe__(aCurrentLasti,
                                                   self.SpecialBehaviorId,
                                                   aFrameLineNo)
        else:
            theProbeId = hT.itsProbe[(aCurrentLasti, aTheSpecialBehaviorId)]
        self.hT.itsPacker.reset()
        self.hT.itsPacker.pack_int(self.hT.itsEvents['set'])
        self.hT.itsPacker.pack_int(self.hT.itsObjects['classAttribute'])
        self.hT.itsPacker.pack_int(aId)
        theDataType = self.hT.__getDataType__(aValue)
        self.hT.itsPacker.pack_int(theDataType)
        thePackValue = self.hT.__packValue__(theDataType, aValue)
        self.hT.itsPacker.pack_int(theProbeId)
        self.hT.itsPacker.pack_hyper(aParentTimestamp)
        self.hT.itsPacker.pack_int(aDepth)
        self.hT.itsPacker.pack_hyper(theCurrentTimestamp)
        self.hT.itsPacker.pack_int(theThreadId)
        if self.hT.FLAG_DEBUGG:
            print self.hT.itsEvents['set'],
            print self.hT.itsObjects['classAttribute'],
            print Id,
            print theDataType,
            print thePackValue,
            print theProbeId,
            print aParentTimestamp,
            print aCurrentDepth,
            print theCurrentTimestamp,
            print theThreadId
            raw_input()
        try:
            self.hT.itsSocket.sendall(self.hT.itsPacker.get_buffer())
            pass
        except:
            print 'TOD está durmiendo :-(', 'set static field'

    def __register_set_StaticField__(self, aLocals, aFrameLineNo,
                                     aParentTimestamp, aDepth, aFileName):
        theLower = 0
        theUpper = len(self.code.co_code)
        theCode = self.code.co_code
        while theLower < theUpper:
            theOp = ord(theCode[theLower])
            theNameOp = dis.opname[theOp]
            theLower = theLower + 1
            if theOp >= dis.HAVE_ARGUMENT:
                theValue = ord(theCode[theLower])
                theValue += ord(theCode[theLower + 1]) * 256
                if theNameOp == 'STORE_NAME':
                    #print self.code.co_names[theValue]
                    #registro el atributo estático
                    theStaticFieldName = self.code.co_names[theValue]
                    self.staticField.__updateStaticField__(
                        {theStaticFieldName: aLocals[theStaticFieldName]},
                        self.Id)
                    #creamos un metodo artificial para almacenar
                    #la definición de los atributos de clase
                    self.__addSpecialMethod__(aFileName)
                    #set para el atributo estático
                    if not re.search(self.hT.itsMethodPattern,
                                     theStaticFieldName):
                        if not inspect.isfunction(aLocals[theStaticFieldName]):
                            self.__setStaticField__(
                                self.staticField[theStaticFieldName],
                                aLocals[theStaticFieldName], aFrameLineNo,
                                theLower, aParentTimestamp, aDepth)
                theLower = theLower + 2

    def __addStaticField__(self, aLocals):
        self.staticField.__updateStaticField__(aLocals, self.Id)

    def __addAttribute__(self, aName, aObjectId):
        self.attributes.__updateAttr__({aName: -1}, aObjectId)
Ejemplo n.º 53
0
class Parser:
    word = ""                             # word from input stream
    inputSymbol = ""                     # word class (inputstream)
    ruleSymbol = ""                        # symbol from rule (Rules)

    # Source code
    inputstream = InputStream.InputStream("source.pas")

    # Recognizing
    recognizer = Recognizer.Recognizer()

    RepeatableSymbols = []

    words = []

    def __init__(self):
        self.lexema = Dictionary()


    def Verify(self, source):
        """
        Effects: Verifies source for syntax correctness
        Requires: file "source" should exist
        """
        self.inputstream = InputStream.InputStream(source)
        self.nextToken()
        SemanticActions().initStandardTypes()
        self.checkAccordingToRule(Rules["programme"][0])
        d2 = SS3().pop()
        d22 = SS4().pop()
        dvicpi = SS3().trace()
        dvicpi2 = SS4().trace()
        d2.printed()
#        d22.printed()
#        print "File successfully parsed"

    def checkAccordingToRule(self, inRuleItem):
        """
        Effects: Checks programme on syntax correctness (one RuleItem)
        """
        RuleItem = self.subSequence(inRuleItem, 0, len(inRuleItem)-1)
        i = 0
        while i < len(RuleItem):
            self.ruleSymbol = RuleItem[i]
            self.nextRuleSymbol = self.getRuleSymbol(RuleItem, i)
            if self.isSemanticAction(self.ruleSymbol):
                i += 1
                continue
#            verify next rule symbol @here
            self.doSemanticAction(RuleItem, i)
            if self.nextRuleSymbol == "0":      # 0 is a special rule symbol after non-terminals in case to do this rule AFTER recursive search of rules
                i = i + 1
                continue
            if self.ruleSymbol == self.inputSymbol: self.addToDictionary()      # after doSemanticAction, otherwise symbol is double defined
            if self.ruleSymbol == "[[":
                index = self.findElement(RuleItem, "]]", i)
                if self.isDerivedFrom(RuleItem[i+1]):
                    RuleItem.pop(i)
                    RuleItem.pop(index-1)
                    i -= 1
                else:
                    i= index
            elif self.ruleSymbol == "{":
                index = self.findElement(RuleItem, "}", i)
                self.RepeatableSymbols = self.subSequence(RuleItem, i+1, index-1)

                if self.isDerivedFrom(RuleItem[i+1]):
#                    Insert RepeatableSymbols into RuleItem with index i
                    _i = i
                    for reSymbol in self.RepeatableSymbols:
                        RuleItem.insert(_i, reSymbol)
                        _i = _i + 1
                    i -= 1
                else:
                    i = index
            else:
                try:
                    self.treatSymbol()
                except E.ENotFoundRule:
                    return None
#                if foundAppropriateRule == None: return None
            i += 1

    def treatSymbol(self):
        """
        Effects: if is terminal, compares ruleSymbol with inputSymbol
        and reads next word from input stream.
        otherwise applies rule with appropriate left part
        """
        if self.isTerminal(self.ruleSymbol):
            if self.inputSymbol == self.ruleSymbol:
                self.nextToken()
            else:
                E.E(self.inputstream.lineNumber, self.inputstream.code).syntactic(
                            self.inputstream.lineNumber, self.ruleSymbol, self.word, "Inconformity")
        else:
            AppropriateRule = self.chooseAppropriateRule(self.ruleSymbol)
            if AppropriateRule == None:
                raise E.ENotFoundRule
#                return None
#                E.E(self.inputstream.lineNumber, self.inputstream.code).syntactic(
#                            self.inputstream.lineNumber, self.ruleSymbol, self.inputSymbol, "Inconformity")
            self.checkAccordingToRule(AppropriateRule)

    def subSequence(self, Sequence, startIndex, endIndex):
        """
        Effects: Returns sequence [ startIndex .. endIndex ]
        """
        i = startIndex
        Seq = []
        while i <= endIndex:
            Seq.append(Sequence[i])
            i += 1
        return Seq

    def findElement(self, Sequence, element, startindex):
        """
        Effects: Finds index of the specified element
        """
        while Sequence[startindex] != element:
            startindex += 1
        return startindex

    def isTerminal(self, symbol):
        """
        Terminals are always in right part of rules
        Non terminals are keys (left) of rules
        """
        return not Rules.has_key(symbol)

    def nextToken(self):
        """
        Reads from input stream word and defines its class
        Modifies: word, inputSymbol
        """
        if not self.inputstream.eof() :
            if self.word: 
                self.words.append(self.word)
                Words().push(self.word)
            self.word = self.inputstream.getWord()
            self.inputSymbol = self.recognizer.getClass(self.word)
            if self.inputSymbol == None:
                E.E(self.inputstream.lineNumber, self.word).e("WrongWord")

    def chooseAppropriateRule(self, ruleSymbol):
        """
        Effects: Returns rule that begins with symbol "inputSymbol"
        if more than one rule has this left part.
        """
        RightRule = Rules[ruleSymbol][0]
        for RightRule in Rules[ruleSymbol]:
            if (self.inputSymbol in self.findFirstSymbols(RightRule)):
                return RightRule
        return None

    def PossiblePrefixes(self ,Rule):
        """
        Effects: Return possible prefixes (possible input symbols)
        """
        i = 0
        First = []
        while i < len(Rule):
            if (Rule[i] == "[["):
                First.append(Rule[i+1])
                i = self.findElement(Rule, "]]", i+1)
            elif (Rule[i] == "{"):
                First.append(Rule[i+1])
                i = self.findElement(Rule, "}", i+1)
            else:
                First.append(Rule[i])
                return First
            i += 1
        return First

    def nextLevel(self, List):
        """
        Returns list with next level - non-terminals
        """
        out = []
        for symbol in List:
            if (self.isTerminal(symbol)):
                out.append(symbol)
            else:
                for _R in Rules[symbol]:
                    for symbol2 in self.PossiblePrefixes(_R):
                        out.append(symbol2)
        return out

    def findFirstSymbols(self, Rule):
        """
        Effects: Return list of first symbol
        """
        List = self.PossiblePrefixes(Rule)
        while (self.hasnonTerminals(List)):
            List = self.nextLevel(List)
        return List

    def hasnonTerminals(self, List):
        """
        Effects: Return True if list contains at least one non-terminal
        """
        for symbol in List:
            if not self.isTerminal(symbol):
                return True
        return False

    def isDerivedFrom(self, ruleSymbol):
        if (self.isTerminal(ruleSymbol)):
            return ruleSymbol == self.inputSymbol
        else:
            if ruleSymbol == self.inputSymbol:
                return True
            else:
                return (not self.chooseAppropriateRule(ruleSymbol) == None)

    def isSemanticAction(self, ruleSymbol):
        """
        Effects: Returns True if semantic action occurs and needs to be done
        """
        return ruleSymbol[0] == '#'

    def extractSemanticAction(self, ruleSymbol):
        """
        Effects: Return number of semantic action
        Requires: ruleSymbol must be in format "#\d+"
        """
        return ruleSymbol[1:]

    def addToDictionary(self):
        if (not self.lexema.isFound(self.word)):
            # @todo extract in comfortable place
            self.lexema.addSymbol( {"name" : self.word, 'class' : self.inputSymbol} )
            if self.inputSymbol == 'intConst':
                self.lexema.setObject( self.word, AtrClasses.AttrIntConst(self.word) )
            if self.inputSymbol == 'floatConst':
                self.lexema.setObject( self.word, AtrClasses.AttrFloatConst(self.word) )
            if self.inputSymbol == 'StringConst':
                self.lexema.setObject( self.word, AtrClasses.AttrStringConst(self.word) )

    def doSemanticAction(self, RuleItem, i):
        currentRuleSymbol = None
        if i+1 < len(RuleItem): currentRuleSymbol = RuleItem[i+1]
        if currentRuleSymbol is None: return False

        if self.isSemanticAction(currentRuleSymbol):
            semanticActionNumber = self.extractSemanticAction(currentRuleSymbol)
            if (semanticActionNumber.find(',') == -1):
                SemanticActions().switchAction(self.inputstream.lineNumber, semanticActionNumber, self.word)
            else:
                (semanticActionNumber1, semanticActionNumber2) = semanticActionNumber.split(',')
                SemanticActions().switchAction(self.inputstream.lineNumber, semanticActionNumber1, self.word)
                SemanticActions().switchAction(self.inputstream.lineNumber, semanticActionNumber2, self.word)
                

    def getRuleSymbol(self, RuleItem, i):
        if (i < len(RuleItem)):
            return RuleItem[i]
        else:
            return None
Ejemplo n.º 54
0
class SpanishTranslator:
	def __init__(self):
		self.dict = Dictionary()
		#build CCAE dictionaries:
		bigram_filename = "CAE_bigrams.txt"
		trigram_filename = "CAE_trigrams.txt"
		# self.dict.build_english_bigrams(bigram_filename, "data")
		# self.dict.build_english_trigrams(trigram_filename, "data")
		self.dict.build_english_corpus("google_translate.txt", "data")
		# self.stem_helper_inst = StemHelper()
		# self.preProcessors = [ConjugationPreProcessor(), PluralPreProcessor(), QuePreProcessor()]
		self.preProcessors = [ConjugationPreProcessor(), PluralPreProcessor(), QuePreProcessor()]
		# self.preProcessors = []
		#add plural processor back in
		# self.postProcessors = [AdjectivePostProcessor(), ArticlePostProcessor(), ConjugationPostProcessor(), PluralPostProcessor()]
		self.postProcessors = [AdjectivePostProcessor(), ArticlePostProcessor(), ConjugationPostProcessor(), PluralPostProcessor()]
		# self.postProcessors = []

		corpusFilename = "Project_Dev_Sentences.txt"
		googleTranslate = "Translation_Strict_Keys.txt"
		self.dict.build_custom_dictionary(corpusFilename, "data", googleTranslate)
		self.spanish_stemmer = snowballstemmer.stemmer('spanish');
		self.fluency_processor_inst = FluencyProcessing()

	def translate(self, original):

		translated = ""

		#do all the tokenizing, POS-tagging, etc here
		tokens = TaggedWord.TagText(original)
		for t in tokens:
			t.lower()

		# apply preprocessing strategies
		for pre in self.preProcessors:
			tokens = pre.apply(tokens)

		#generate possible translations
		self.translations = []
		self.generateTranslations(tokens, 0)

		#post-processing
		# for i,translation in enumerate(self.translations):
		for i in xrange(0, len(self.translations)):
			for post in self.postProcessors:
				# translation = post.apply(translations)
				self.translations[i] = post.apply(self.translations[i])
				# self.translations[i] = translation

		# select best translation
		english_sentences = []
		for translation in self.translations:
			sentence = ""
			for token in translation:
				sentence += token.word.decode('utf-8') + " "
			sentence = sentence.replace(".","")
			english_sentences.append(sentence)

			# english_sentences.append(translation)

		for sentence in english_sentences:
			print sentence
			print

		# ccae_flag = True
		ccae_flag = False
		bigram_prob_list = self.fluency_processor_inst.find_fluent_translation_stupidbackoff(english_sentences, self.dict.custom_bigram_dict, self.dict.custom_bigram_dict_unigram_dict, ccae_flag)
		trigram_prob_list = self.fluency_processor_inst.find_fluent_translation_trigrams(english_sentences, self.dict.custom_trigram_dict, self.dict.custom_trigram_dict_unigram_dict, ccae_flag, self.dict.custom_bigram_dict, self.dict.custom_bigram_dict_unigram_dict)
		
		#can modify weight of each language model
		# bigram_weight = .5
		bigram_weight = .2
		trigram_weight = .8

		fluent_sentence = self.fluency_processor_inst.find_combined_fluency(english_sentences, bigram_prob_list, trigram_prob_list, bigram_weight, trigram_weight)

		return fluent_sentence
		#to test without the fluency_processor, comment out above line and add:
		# return english_sentences[0]

	def generateTranslations(self, tokens, position):
		# if (position == len(tokens)):
		if position == len(tokens):
			sentence = ""
			for token in tokens:
				sentence += token.word.decode('utf-8') + " "
			# print sentence
			print position

			self.translations.append(tokens)
		else:
			options = self.dict.custom_dict[tokens[position].word]
			newTokens = copy.deepcopy(tokens[:])
			# newTokens = tokens[:]

			match_options = []
			for opt in options:
				if tokens[position].posMatch(opt[1]):
					match_options.append(opt)

			if len(match_options) > 1:
				if random.random() <= .2:
					count = 0
					while (count < 2 and count < len(match_options)):

						newTokens[position].word = match_options[count][0]
						self.generateTranslations(newTokens, position + 1)
						count = count + 1
				else:
					newTokens[position].word = match_options[0][0]
					self.generateTranslations(newTokens, position + 1)

			elif match_options:
				newTokens[position].word = match_options[0][0]
				self.generateTranslations(newTokens, position + 1)
			else:
				self.generateTranslations(newTokens, position + 1)
Ejemplo n.º 55
0
from Counter import Counter

fileDirectory = os.path.dirname(
    os.path.abspath(inspect.getfile(inspect.currentframe())))

f = File(fileDirectory,
         "/WEBPAGES_RAW/bookkeeping.json")  #locate the json file
jsonData = f.readJson()  # read the json file

myCounter = Counter(len(jsonData))

# loop through the location:url from the bookkeeping.json file
for location, urlLink in jsonData.items():

    wPost = Posting()  # create posting
    wDict = Dictionary()  # create dictionary

    fileName = "/WEBPAGES_RAW/" + location  # generate a new location
    data = File(
        fileDirectory,
        fileName).readText()  # looking to the file and return html text

    parser = DataParser(data)  # create a parser class
    parser.processData()  #process the given data

    visibleText = parser.getProcessTexts(
    )  # get all the visibletext in the document

    wordList = Tokenize(visibleText).extractToken()  #extra all the text
    wPost.addWord(
        wordList)  #add the word, word's frequency, word's position to Posting
Ejemplo n.º 56
0
for i in range(db.getTextsSize()):
    tempText = db.getTextsData('baseText', i+1)[0][0]
    tempText = parser.parsing(tempText)
    db.updateTexts('formattedText', tempText, i+1)
    pb.inc()
# <- выполняется полный проход по всем сырым текстам в бд
# забираются сырые тексты, отправляются на очистку
# возвращаются тексты после фильтрации и отправляются в БД обратно
    
# op = None
# parser = None
# <- Очистка ненужных объектов (OpenTexts и CorpusParser)

print("Сохранение локальных словарей в базе данных...")
d = Dictionary(p.featureExtraction.getMetricType(),
               p.featureExtraction.getNgrammType(),
               p.featureExtraction.getIgnoreWordOrderStatus())
pb.new(maxValue=db.getTextsSize(), suffix='cохранено')
for i in range(db.getTextsSize()):
    d.addData(db.getTextsData('formattedText', i+1)[0][0])
    tempDict = d.getLastDictionary()
    tempStr = json.dumps(tempDict)
    tempStr = tempStr.replace('"', '""') 
    db.updateTexts('localDictionary', tempStr, i+1)
    pb.inc()
    # <- добавление в БД локальных словарей в виде json строки
print(1)
d.idfGlobalCalc()
print(2)
v = Vectorizer(p.featureExtraction.getMetricType())
v.addGlobDict(d.getGlobalDictionary())
Ejemplo n.º 57
0
 def __init__(self):
     self.lexema = Dictionary()
Ejemplo n.º 58
0
    def __init__(self,
                 language: Language,
                 width,
                 height,
                 players_number,
                 level=''):
        super(GameManager, self).__init__()
        self.__bot__ = Bot(language, width, height)
        self.__width__ = width
        self.__height__ = height
        self.__players_number__ = players_number

        self.__board__ = Board()
        self.__board__.init_board(width, height)

        self.__dictionary__ = Dictionary()
        self.__dictionary__.load_dictionary()
        self.__wc__ = WordCollector()
        self.__wc__.connect_to_dictionary(self.__dictionary__)
        self.__wc__.connect_to_board(self.__board__)

        self.__dictionary__.setup_connection(self.__wc__)
        self.__board__.setup_connection(self.__wc__)

        self.__first_word__ = self.__dictionary__.get_first_word(width)

        self.__player1__ = Player()
        self.__player2__ = Player()

        if players_number == 2:
            self.__player1__.connect_to_board(self.__board__)
            self.__player1__.connect_to_manager(self)

            self.__player2__.connect_to_board(self.__board__)
            self.__player2__.connect_to_manager(self)
        else:
            self.__player1__.connect_to_board(self.__board__)
            self.__player1__.connect_to_manager(self)
            self.__dictionary__.connect_to_bot(self.__bot__)
            self.__dictionary__.used_words_to_bot(self.__bot__)
            if level == 'EASY':
                self.__bot__.set_level(EASY)
            elif level == 'MEDIUM':
                self.__bot__.set_level(MEDIUM)
            elif level == 'HARD':
                self.__bot__.set_level(HARD)
            elif level == 'HARDEST':
                self.__bot__.set_level(HARDEST)
            self.__bot__.connect_to_board(self.__board__)
            self.__bot__.connect_to_manager(self)
            self.__bot__.connect_to_dictionary(self.__dictionary__)
            self.__bot__.get_dictionary()
            self.__bot__.connect_to_used_dictionary(self.__dictionary__)

        self.__current_player__ = self.__player1__
        self.__current_id__ = FIRST_PLAYER
        self.__number_of_spare_cells__ = width * (height - 1)

        if players_number == 2:
            self.__board__.connect_to_players(self.__player1__,
                                              self.__player2__)
            self.__board__.set_first_player(FIRST_PLAYER)
        else:
            self.__board__.connect_to_players(self.__player1__, self.__bot__)
            self.__board__.set_first_player(FIRST_PLAYER)

        self.ask_for_cells.connect(self.__board__.get_number_of_cells)
        self.start_move_first.connect(self.__player1__.begin_step)

        if players_number == 2:
            self.start_move_second.connect(self.__player2__.begin_step)
        else:
            self.start_move_second.connect(self.__bot__.begin_step)

        self.__board__.set_first_word(self.__first_word__)
Ejemplo n.º 59
0
 def __init__(self, name):
     self.dictionary = Dictionary()
     self.first_name, self.middle_name, self.last_name = self.__splitter(name)
     self.vector = self.__to_vector()