def the_test(test): (key, value, expected_result) = test calculated_result = Dictionary(key, value) print("input: {0}, output: {1}, expected: {2}".format([key, value], calculated_result, expected_result)) assert Dictionary(key, value) == expected_result
def action27(self, lineNumber, symbol): dictionary = Dictionary() ss1 = SS1() if not dictionary.isFound(symbol): E.E(lineNumber, 0).unknownIdentifier(symbol) else: ss1.push(symbol)
def get_dictionary( ): """ Build a Dictionary based on the Diceware data. """ dicto = Dictionary() print 'Parsing Diceware data...' i = 0; nLines = 7780 # open file for reading with open(Diceware.fname, 'r') as fid: for line in fid: tokens = Diceware.parse_line(line) if tokens is None: continue # save data to list word = Word(tokens['word'], -1, -1, i); dicto.add_word(word) # increment counter and show progress i = i + 1; progress = float(i) / float(nLines) if (progress % 0.05) < 1e-4: sys.stdout.write("\r%2.2f%%" %(progress*100)) sys.stdout.flush() print '\nDone.' return dicto
def action18(self, lineNumber, symbol): # symbol = symbol['name'] dictionary = Dictionary() ss1 = SS1() if dictionary.isFound(symbol): E.E(lineNumber, 0).doubleDefinition(symbol, 'procedure or function') ss1.push(symbol) ss1.push('proc_params')
def AddClick(): global operator DC = Dictionary() DC.AddToArray(self.operator, list) DC.PrintList(list) self.operator = " " self.text_Input.set(self.operator) return
def action20000(self, lineNumber, symbol): # symbol = symbol['name'] dictionary = Dictionary() if not dictionary.isFound(symbol): E.E(lineNumber, 0).typeUnknown(symbol) type = dictionary.get(symbol) ss1 = SS1() ss1.push(type)
def action8000(self, lineNumber, symbol): # symbol = symbol['name'] dictionary = Dictionary() if dictionary.isFound(symbol): E.E(lineNumber, 0).typeDoubleDefinition(symbol) _attrUserType = AtrClasses.AttrUserType(symbol) ss1 = SS1() ss1.push(_attrUserType)
def action4000(self, lineNumber, symbol): # symbol = symbol['name'] ss1 = SS1() value = ss1.pop() name = ss1.pop() name.bind(value) dictionary = Dictionary() dictionary.setObject(name.name, name)
def __init__(self, aHt, aId, aCode, aLnotab, aArgs): self.hT = aHt self.locals = Dictionary(self.hT) self.argument = () self.lnotab = aLnotab self.code = aCode self.name = aCode.co_name self.Id = aId self.__updateArgument__(aArgs)
def action17000(self, lineNumber, symbol): # symbol = symbol['name'] dictionary = Dictionary() if not dictionary.isFound(symbol): E.E(lineNumber, 0).typeUnknown(symbol) ss1 = SS1() _attrArray = ss1.pop() _attrArray['object'].setName(symbol) ss1.push( _attrArray )
def action7000(self, lineNumber, symbol): # symbol = symbol['name'] ss1 = SS1() record = ss1.pop(); dictionary = Dictionary() # @todo make interface for array, record, diapason and id _class = dictionary.get(record.name.name)['class'] dictionary.setObject(record.name.name, record) row = self.getRow(record.name.name, _class, record)
def action5000(self, lineNumber, symbol): # symbol = symbol['name'] dictionary = Dictionary() if dictionary.isFound(symbol): E.E(lineNumber, 0).varDoubleDefinition(symbol) else: _attrVar = AtrClasses.AttrVar(symbol) ss1 = SS1() ss1.push(_attrVar)
def test_get(self): d = Dictionary() d['raymond'] = 'red' self.assertEqual(d['raymond'], 'red') d['rachel'] = 'blue' self.assertEqual(d['rachel'], 'blue') d['critter'] = 'yellow' self.assertEqual(d.get('raymond', 'not found'), 'red') self.assertEqual(d.get('john', 'not found'), 'not found')
def test_pop(self): d = Dictionary() d['raymond'] = 'red' d['rachel'] = 'blue' self.assertEqual(d.pop('rachel'), 'blue') self.assertEqual(d['raymond'], 'red') self.assertEqual(len(d), 1) with self.assertRaises(KeyError): d.pop('john')
def action6000(self, lineNumber, symbol): # symbol = symbol['name'] ss1 = SS1() dictionary = Dictionary() type = ss1.pop() while ss1.top() != None: name = ss1.pop() name.bindType(type['name']) dictionary.setObject(name.name, name) ss1.pop()
def __init__(self, aHt, aClassId, aCode, aLnotab): self.hT = aHt self.staticField = Dictionary(self.hT) self.attributes = Dictionary(self.hT) self.method = Dictionary(self.hT) self.lnotab = aLnotab self.code = aCode self.name = aCode.co_name self.Id = aClassId self.SpecialBehaviorId = -1
class Method(object): def __init__(self, aHt, aId, aCode, aLnotab, aIdClass, aArgs): self.hT = aHt self.locals = Dictionary(self.hT) self.argument = () self.lnotab = aLnotab self.code = aCode self.name = aCode.co_name self.idClass = aIdClass self.Id = aId self.__updateArgument__(aArgs) def __getId__(self): return self.Id def __getLnotab__(self): return self.lnotab def __getLocals__(self): return self.locals def __getTarget__(self): return self.idClass def __getArgs__(self): return self.argument def __getArgsValues__(self, aLocals): argValues = () for name in self.argument: if aLocals.has_key(name): argValues = argValues + (aLocals[name],) #TODO: analizar caso para cuando sean tuple, list, dict return argValues def __updateArgument__(self, aArgs): #no se registra self como argumento valido for theArg in aArgs: if not theArg == 'self': self.argument += (theArg,) theParentId = self.Id if self.hT.FLAG_DEBUGG: for theIndex in range(len(aArgs)): if not aArgs[theIndex] == 'self': print self.hT.itsEvents['register'], print self.hT.itsObjects['local'], print theIndex + 1, print theParentId, print aArgs[theIndex] raw_input() def __registerLocals__(self, aLocal): self.locals.__update__(aLocal,self.Id,self.argument)
def __init__(self, filename): # game setting parameters self.__my_dictionary = Dictionary(filename) #print(self.__my_dictionary) self.__word_length = 0 self.__guess_num = 0 self.__want_remaining_num = False # guessing status self.__remaining_word_list = [] self.__guessed_letter_list = [] self.__current_blanked_out_version = ""
def action55000(self, lineNumber, symbol): # symbol = symbol['name'] dictionary = Dictionary() # @todo problem Fields in record cannot have name equal to usual variables if dictionary.isFound(symbol): E.E(lineNumber, 0).varDoubleDefinition(symbol) else: _attrVar = AtrClasses.AttrVar(symbol) ss1 = SS1() ss1.push(None) ss1.push(_attrVar)
def action22(self, lineNumber, symbol): ' not done ' # symbol = symbol['name'] dictionary = Dictionary() ss1 = SS1() ss1.push(symbol) _restype = ss1.pop() _attrProc = ss1.pop() _attrProc.setResType(_restype) dictionary.setObject(_attrProc.name, _attrProc)
def getType(self, right): dictionary = Dictionary() isright = isinstance(right, AtrClasses.AttrElemAction) or isinstance(right, AtrClasses.AttrVar) or isinstance(right, AtrClasses.AttrIntConst) or isinstance(right, AtrClasses.AttrFloatConst) or isinstance(right, AtrClasses.AttrStringConst) or isinstance(right, AtrClasses.AttrField) or isinstance(right, AtrClasses.AttrParam) or isinstance(right, AtrClasses.AttrRelation) if isright: rightType = right.type else: right = right.getValue() rightType = dictionary.get(right) if not rightType.has_key('object'): E.E(self.lineNumber, 0).unknownIdentifier(right) rightType = rightType['object'].type return rightType
def main(): dictionary = Dictionary(path) grid = process_input() word = '' result = [] i = 0 j = 0 dir = -1 startX = 0 startY = 0 while startX <= 3 and startY <= 3: letter = grid[i][j] word += letter # print(word) if add_valid_word(dictionary, result, word): word = '' dir = -1 startX, startY = calcStart(startX, startY) i = startY j = startX continue if dictionary.is_partial_match(word): if j == 3: if i == 3: break else: i += 1 dir = 1 else: j += 1 dir = 0 else: word = word[:-1] if len(word) == 0 or dir == -1: startX, startY = calcStart(startX, startY) i = startY j = startX continue if dir == 0: if startY - j == 0: j += 1 dir = 1 else: if dir == 1: if startX - i == 0: i += 1 else: word = '' dir = -1 print(result)
def __init__(self): self.alphabet='abcdefghijklmnopqrstuvwxyz' self.guten="data/gutenburg_small.txt" self.guten_pickle="data/gutenburg_small.pickle" #self.american="words/american-english" self.gutenburg={} self.learned={} self.dictionary=Dictionary("usa") self.stopwords=Dictionary("stopwords") #self.Load_dictionary() self.Load_gutenburg() self.Load_learned()
def test_keys_values_items(self): d = Dictionary() d['raymond'] = 'red' d['rachel'] = 'blue' keys = d.keys() self.assertTrue(isinstance(keys,list)) self.assertEqual(set(keys), {'raymond', 'rachel'}) values = d.values() self.assertTrue(isinstance(values,list)) self.assertEqual(set(values), {'red', 'blue'}) items = d.items() self.assertTrue(isinstance(values,list)) self.assertEqual(set(items), {('raymond','red'), ('rachel','blue')})
def writeMergeOnDisk(self, dictionary, blockNum, postingListsPointersList, termsFreqPointersList): """ private method to write the result block of merging on disk :param dictionary: result dictionary :param blockNum: number of the new block :param postingListsPointersList: list of pointers to posting lists of the terms :param termsFreqPointersList: list of pointers to terms frequencies :return: """ DictionaryPath = PATH.format(self.indexDir, DICTIONARY_FILE_NAME + str(blockNum)) FCDataPath = PATH.format(self.indexDir, FC_DATA_FILE_NAME + str(blockNum)) DocsFreqPath = PATH.format(self.indexDir, DOCS_FREQ_FiLE_NAME + str(blockNum)) PostingListsPointersPath = PATH.format(self.indexDir, POSTING_LISTS_POINTERS_FILE_NAME + str(blockNum)) TermsFreqPointersPath = PATH.format(self.indexDir, TERMS_FREQ_POINTERS_FILE_NAME + str(blockNum)) terms, docsFreq = self.getDetailsFromDict(dictionary) FCObj = Dictionary(terms, (COMPRESSION_TYPE, COMPRESSION_BLOCKS)) encodeObj = PostingList([], VARIANT_ENCODE_TYPE) dictionaryStream = FCObj.str docsFreqStream = encodeObj.getEncode(docsFreq) FCData = self.getFCDataFromDict(FCObj.dict) FCDataStream = encodeObj.getEncode(FCData) self.writeToFile(DictionaryPath, dictionaryStream, 'a+') self.writeToFile(FCDataPath, FCDataStream, 'ab+') self.writeToFile(DocsFreqPath, docsFreqStream, 'ab+') postingPointersEncode = PostingList(postingListsPointersList, VARIANT_ENCODE_TYPE) termsFreqPointersEncode = PostingList(termsFreqPointersList, VARIANT_ENCODE_TYPE) postingListsPointersStream = postingPointersEncode.GetList() termsFreqPointersStream = termsFreqPointersEncode.GetList() self.writeToFile(PostingListsPointersPath, postingListsPointersStream, 'ab+') self.writeToFile(TermsFreqPointersPath, termsFreqPointersStream, 'ab+')
def init_dictionary(train_path, min_token_count): """ Constructs a dictionary from Semantic Scholar JSONs found in 'train_path'. :param train_path: file path The path to the JSON documents meant for training / validation. :param min_token_count: The minimum number of times a word has to occur to be included. :return: A dictionary of training and development data. """ all_training_examples = os.listdir(train_path) tokens = [] for file in tqdm(all_training_examples): file_path = os.path.join(train_path, file) tokens += extract_tokens_from_json(file_path) # Map words to the number of times they occur in the dictionary. word_frequencies = dict(Counter(tokens)) # Sieve the dictionary by excluding all words that appear fewer # than min_token_count times. vocabulary = set( [w for w, f in word_frequencies.items() if f >= min_token_count]) # Construct the dictionary with the given vocabulary. dictionary = Dictionary(vocabulary) return dictionary
def __init__(self, corpus=None, stop_words=None, K=20, alpha=0.5, beta=0.5, iterations=50): self.__vocabulary = Dictionary(stop_words, excluds_stopwords=False) docs = [ self.__vocabulary.doc_to_ids(doc.get_text()) for doc in corpus.get_documents() ] self.__V = self.__vocabulary.size( ) # number of different words in the vocabulary self.__K = K self.__alpha = numpy.ones(K) * alpha # parameter of topics prior self.__docs = docs # a list of documents which include the words self.__pers = [] # Array for keeping perplexities over iterations self.__beta = numpy.ones( self.__vocabulary.size()) * beta # parameter of words prior self.__z_m_n = {} # topic assignements for documents self.__n_m_z = numpy.zeros( (len(self.__docs), K)) # number of words assigned to topic z in document m self.__n_z_t = numpy.zeros( (K, self.__vocabulary.size() )) + beta # number of times a word v is assigned to a topic z self.__theta = numpy.zeros( (len(self.__docs), K)) # topic distribution for each document self.__phi = numpy.zeros( (K, self.__vocabulary.size() )) # topic-words distribution for whole of corpus self.__n_z = numpy.zeros(K) + self.__vocabulary.size( ) * beta # total number of words assigned to a topic z self.__iterations = iterations for m, doc in enumerate(docs): # Initialization for n, w in enumerate(doc): z = numpy.random.randint( 0, K ) # Randomly assign a topic to a word and increase the counting array self.__n_m_z[m, z] += 1 self.__n_z_t[z, w] += 1 self.__z_m_n[(m, n)] = z self.__n_z[z] += 1
def getDictionaryFromFiles(self, blockNum): """ private method to get the dictionary from the files on the disc :param blockNum: the number of the block to get data from :return: dictionary of terms """ DictionaryPath = PATH.format(self.indexDir, DICTIONARY_FILE_NAME + str(blockNum)) FCDataPath = PATH.format(self.indexDir, FC_DATA_FILE_NAME + str(blockNum)) DocsFreqPath = PATH.format(self.indexDir, DOCS_FREQ_FiLE_NAME + str(blockNum)) PostingListsPointersPath = PATH.format(self.indexDir, POSTING_LISTS_POINTERS_FILE_NAME + str(blockNum)) TermsFreqPointersPath = PATH.format(self.indexDir, TERMS_FREQ_POINTERS_FILE_NAME + str(blockNum)) dict, termsStr, fcData, docsFreq, postingListsPointers, termsFreqPointers = {}, '', [], [], [], [] FCObj = Dictionary([], (COMPRESSION_TYPE, COMPRESSION_BLOCKS)) encodeObj = PostingList([], VARIANT_ENCODE_TYPE) with open(DictionaryPath, 'r') as dictFid, open(FCDataPath, 'rb') as fcDataFid: termsStr = dictFid.read() fcDataStream = fcDataFid.read() fcData = encodeObj.variantDecode(fcDataStream) with open(DocsFreqPath, 'rb') as docsFreqFid: docsFreqStream = docsFreqFid.read() docsFreq = encodeObj.variantDecode(docsFreqStream) with open(PostingListsPointersPath, 'rb') as postingListsPointersFid, open(TermsFreqPointersPath, 'rb') as termsFreqPointersFid: postingListsPointersStream = postingListsPointersFid.read() termsFreqPointersStream = termsFreqPointersFid.read() postingListsPointers = encodeObj.variantDecode(postingListsPointersStream) termsFreqPointers = encodeObj.variantDecode(termsFreqPointersStream) postingListsPointers = getListFromGaps(postingListsPointers) termsFreqPointers = getListFromGaps(termsFreqPointers) fcData = self.getFCDataFromFile(fcData) terms = [] FCObj.str = termsStr for block in fcData: terms.extend(FCObj.getListOfWords(block)) for i, term in enumerate(terms): dict[term] = [docsFreq[i], -1, postingListsPointers[i], termsFreqPointers[i]] return dict, terms
def getDictionaryFromFiles(self): DictionaryPath = PATH.format(self.indexDir, DICTIONARY_FILE_NAME) FCDataPath = PATH.format(self.indexDir, FC_DATA_FILE_NAME) DocsFreqPath = PATH.format(self.indexDir, DOCS_FREQ_FiLE_NAME) PostingListsPointersPath = PATH.format(self.indexDir, POSTING_LISTS_POINTERS_FILE_NAME) TermsFreqPointersPath = PATH.format(self.indexDir, TERMS_FREQ_POINTERS_FILE_NAME) if not (os.path.isfile(DictionaryPath)) or not (os.path.isfile(FCDataPath)) \ or not (os.path.isfile(DocsFreqPath)) or not (os.path.isfile(PostingListsPointersPath)) \ or not (os.path.isfile(TermsFreqPointersPath)): print('Error - Invalid File Path! Please Enter a Valid Path..') exit(0) dict, termsStr, fcData, docsFreq, postingListsPointers, termsFreqPointers = {}, '', [], [], [], [] FCObj = Dictionary([], (COMPRESSION_TYPE, COMPRESSION_BLOCKS)) encodeObj = PostingList([], VARIANT_ENCODE_TYPE) with open(DictionaryPath, 'r') as dictFid, open(FCDataPath, 'rb') as fcDataFid: termsStr = dictFid.read() fcDataStream = fcDataFid.read() fcData = encodeObj.variantDecode(fcDataStream) with open(DocsFreqPath, 'rb') as docsFreqFid: docsFreqStream = docsFreqFid.read() docsFreq = encodeObj.variantDecode(docsFreqStream) with open(PostingListsPointersPath, 'rb') as postingListsPointersFid, open(TermsFreqPointersPath, 'rb') as termsFreqPointersFid: postingListsPointersStream = postingListsPointersFid.read() termsFreqPointersStream = termsFreqPointersFid.read() postingListsPointers = encodeObj.variantDecode(postingListsPointersStream) termsFreqPointers = encodeObj.variantDecode(termsFreqPointersStream) postingListsPointers = getListFromGaps(postingListsPointers) termsFreqPointers = getListFromGaps(termsFreqPointers) fcData = self.getFCDataFromFile(fcData) terms = [] FCObj.str = termsStr for block in fcData: terms.extend(FCObj.getListOfWords(block)) self.listOfTerms = terms for i, term in enumerate(terms): dict[term] = [docsFreq[i], -1, postingListsPointers[i], termsFreqPointers[i]] return dict
def main(): path = "/path/to/corpus" # コーパスの読み込み # 前処理 documents = [] for filename in os.listdir(path): document = open(os.path.join(path, filename)).read().strip() document = preprocess(document) documents.append(document) # 辞書を作成 # 低頻度すぎるもの, 高頻度すぎる単語は除去 dictionary = Dictionary(documents) dictionary.filter_extremes(no_below=3, no_above=0.6) # 辞書の保存 dictionary.save("id2word.txt") # vocabulary の表示 vocabulary = dictionary.get_vocabulary() print vocabulary, "(%d words)" % len(vocabulary) # 特徴量(BoWベクトル)への変換 for document in documents: bowvec = dictionary.doc2bow(document)
def erase(self, key): node_to_be_erased = Dictionary._find(self._root, key)[0] if node_to_be_erased is None: return self._size -= 1 replacing_node = Dictionary._get_left_most(node_to_be_erased.right_son) if replacing_node is None: replacing_node = node_to_be_erased node_to_be_erased.key, node_to_be_erased.value = replacing_node.key, replacing_node.value if replacing_node != self._root: self._erase(replacing_node) else: # Delete the root self._root = None
def action21(self, lineNumber, symbol): ' not done ' # symbol = symbol['name'] dictionary = Dictionary() _attrProc = AtrClasses.AttrProc() ss1 = SS1() ss1.push(symbol) while not ss1.top() == 'proc_params': _type = ss1.pop() while not ss1.top() is None: attr = ss1.pop() attr.type = dictionary.get(symbol) _attrProc.addParam(attr) ss1.pop() ss1.pop() _name = ss1.pop() _attrProc.setName(_name) dictionary.setObject(_name, _attrProc) ss1.push(_attrProc)
class DictonaryTests(unittest.TestCase): def setUp(self): self.testDic = Dictionary('../words') self.expectedTestFriends = ('lest','vest','telt','tests','teste','text','nest','teat','rest','testa','testy','fest','pest','tost','jest','gest','yest','hest','tent') self.expectedLeviathens = set() def test_size(self): self.assertEqual(len(self.testDic._dictonary), 380645, "The size of the dictionary is off. Check file and constructor") def test_levenshtein(self): self.assertEqual(len(self.testDic._levenshtein('test')), 238) #self.assertItemsEqual(self.testDic._leviathens('test').sort(), self.expectedLeviathens,"Set of leviathens don't match") def test_friends(self): self.assertEqual(len(self.testDic.friends('test')), 19) self.assertItemsEqual(self.testDic.friends('test'), self.expectedTestFriends ) def test_networkSize(self): self.assertEqual(len(self.testDic.network('test')), 64413) pass
def action19000(self, lineNumber, symbol): # symbol = symbol['name'] ss1 = SS1() _attrRecord = AtrClasses.AttrRecord() dictionary = Dictionary() while not ss1.top() == 'record': field_type = ss1.pop() while not ss1.top() is None: field = ss1.pop() _attrField = AtrClasses.AttrField(field) # local variables in record dictionary.deleteSymbol(field.name) _attrField.bindType(field_type['name']) _attrRecord.addField( _attrField ) ss1.pop() ss1.pop() name = ss1.pop() _attrRecord.setName(name) ss1.push( _attrRecord )
def main(dir_name): dict_inst = Dictionary() bigram_filename = "CAE_bigrams.txt" dict_inst.build_english_bigrams(bigram_filename, sys.argv[1]) trigram_filename = "CAE_trigrams.txt" dict_inst.build_english_trigrams(trigram_filename, sys.argv[1]) # fourgram_filename = "CAE_fourgrams.txt" # dict_inst.build_english_fourgrams(fourgram_filename, sys.argv[1]) # fivegram_filename = "CAE_fivegrams.txt" # dict_inst.build_english_fivegrams(fivegram_filename, sys.argv[1]) # validate_dictionary(dict_inst, 2) # validate_dictionary(dict_inst, 3) # validate_dictionary(dict_inst, 4) # validate_dictionary(dict_inst, 5) # custom_doc = "Test_English_Corpus_Read.txt" # dict_inst.build_english_corpus(custom_doc, sys.argv[1]) # validate_custom_bigram_dict(dict_inst) # validate_custom_trigram_dict(dict_inst) fluency_processing_inst = FluencyProcessing() validate_fluency_processor(dict_inst, fluency_processing_inst)
def main(preprocessed_node_path, argument_path, cfg_path, dictionary_path): preprocessed_node_path = Path(args.preprocessed_node_path) argument_path = Path(args.argument_path) cfg_path = Path(cfg_path) dictionary_path = Path(args.dictionary_path) dictionary_parameters = cfg.load(cfg_path)['dictionary'] #argument_generator_getter = lambda: utils.load(argument_path) #argument_nodes_ids = set(( # node_id # for argument in argument_generator_getter() # for node_id in argument[0].values())) # Use the set of ids to select only the relevant nodes # (and not train nlp models on all documents). #preprocessed_node_generator_getter = lambda : filter( # lambda node: node['id'] in argument_nodes_ids, # utils.load( preprocessed_node_path)) corpus = ([token for token in node['lemmas'] if token.isalpha()] for node in utils.load(preprocessed_node_path)) dictionary = Dictionary() dictionary.fit(corpus) dictionary.save(dictionary_path)
def data_batch(data, params, dictionary_path=None): cutoff = params["vocab_cutoff"] if dictionary_path is None: dictionary = Dictionary(data, cutoff=cutoff) else: dictionary = Dictionary() dictionary.load(data) pad_id = dictionary.pad_id() batch_manager = BatchManager(data, params, pad_id) return batch_manager
def __init__(self): self.dict = Dictionary() #build CCAE dictionaries: bigram_filename = "CAE_bigrams.txt" trigram_filename = "CAE_trigrams.txt" # self.dict.build_english_bigrams(bigram_filename, "data") # self.dict.build_english_trigrams(trigram_filename, "data") self.dict.build_english_corpus("google_translate.txt", "data") # self.stem_helper_inst = StemHelper() # self.preProcessors = [ConjugationPreProcessor(), PluralPreProcessor(), QuePreProcessor()] self.preProcessors = [ConjugationPreProcessor(), PluralPreProcessor(), QuePreProcessor()] # self.preProcessors = [] #add plural processor back in # self.postProcessors = [AdjectivePostProcessor(), ArticlePostProcessor(), ConjugationPostProcessor(), PluralPostProcessor()] self.postProcessors = [AdjectivePostProcessor(), ArticlePostProcessor(), ConjugationPostProcessor(), PluralPostProcessor()] # self.postProcessors = [] corpusFilename = "Project_Dev_Sentences.txt" googleTranslate = "Translation_Strict_Keys.txt" self.dict.build_custom_dictionary(corpusFilename, "data", googleTranslate) self.spanish_stemmer = snowballstemmer.stemmer('spanish'); self.fluency_processor_inst = FluencyProcessing()
def initialize(self, depth=int): root = Dictionary() io = IOHandler() io.countLetter() lst = io.getLetter() while True: try: tmp = lst.popitem() self.__totalLetter += int(tmp[1]) root.addChild(Dictionary(tmp[1], io.mapCharacter(tmp[0]), tmp[0])) except KeyError: break self.dictionary = root self.dictionary.setCount(self.__totalLetter) if depth <= 2: return False for currentDepth in range(2, depth + 1): io.loadWords(currentDepth) lst = io.getWords() while True: try: tmp = lst.popitem() #if not root.probe(tmp[0]): #word is unknown self.__addWord(tmp, root, io) except KeyError: break self.dictionary = root self.depth = currentDepth self.save("TrainerDepth", currentDepth) print("DEBUG Current depth: ", currentDepth)
def preproc(self): """ normalize the data (clean/remove problematic characters) for processing """ print 'Pre-processing dictionary...' # remove words that contain non-alphanumeric characters dicto_new = Dictionary() alpha = re.compile('[\W]') num = re.compile('[0-9]') for word in self.dicto.get_words_iter(): word_str = word.string # remove numbers from words word_str = num.sub('', word_str) if len(word_str) <= 0: continue if not alpha.search(word_str): word.set_string(word_str) dicto_new.add_word(word) print 'Done.' self.dicto = dicto_new
def main(self): dic = Dictionary() #inicializa dicionário dic.load() #carrega palavras self.__load_document() #carrega documento de entrada for txt in self.__mtext: for i in range(0, len(txt)): if not dic.contains(txt[i]) and txt[i] not in self.__ignored: self.__consult_user(dic, txt, i) else: pass self.__save_document() #salva texto corrigido no documento de saída dic.save() #salva dicionário
def add_terms_weight(self): con = DB.connect() table = self._table cur = con.cursor() email_count = Emails.get_email_count() dictionary = Dictionary.fetch_all() tokens = Tokens.fetch_token_all() for token in tokens: tfidf = self.calc_tfidf(email_count, token[Tokens.tf], dictionary[token[Tokens.term]]) cur.execute("INSERT INTO %s VALUES (%ld, %s, %d, %s)" % (table, token[Tokens.email_id], token[Tokens.term], tfidf, token[Tokens.classs])) DB.close(con)
class DirectTranslator: def __init__(self): self.dictionary = Dictionary() def translateSentence(self, foreignSentence): translatedTokens = [] translatedSentence = "" spanishTokens = re.compile('(\W+)', re.UNICODE).split(unicode(foreignSentence, 'utf-8')) for token in spanishTokens: translatedWords = self.dictionary.englishWordsForSpanishWord(token) if translatedWords: translatedWord = translatedWords[0] translatedTokens.append(translatedWord) else: translatedTokens.append(token) for token in translatedTokens: translatedSentence = translatedSentence + token return translatedSentence
def __init__(self, processes=1): #number of threads if processes > 0: self.processes = processes else: self.processes = 1 #load word frequency and spell checker self.spelling = Spelling() #load the dictionaries self.jargon = Dictionary("slang") self.dictionary = self.spelling.dictionary self.stopwords = self.spelling.stopwords self.a = [ "a", "4", "@", "/-\\", "/\\", "/_\\", "^", "aye", "ci", "λ", "∂", "//-\\\\", "/=\\", "ae" ] self.b = [ "b", "8", "|3", "6", "13", "l3", "]3", "|o", "1o", "lo", "ß", "]]3", "|8", "l8", "18", "]8" ] self.c = ["c", "(", "<", "[", "{", "sea", "see", "k", "©", "¢", "€"] self.d = [ "d", "|]", "l]", "1]", "|)", "l)", "1)", "[)", "|}", "l]", "1}", "])", "i>", "|>", "l>", "1>", "0", "cl", "o|", "o1", "ol", "Ð", "∂", "ð" ] self.e = ["e", "3", "&", "[-", "€", "ii", "ə", "£", "iii"] self.f = ["f", "|=", "]=", "}", "ph", "(=", "[=", "ʃ", "eph", "ph"] self.g = [ "g", "6", "9", "&", "(_+", "C-", "gee", "jee", "(Y,", "cj", "[", "-", "(γ,", "(_-" ] self.h = [ "h", "|-|", "#", "[-]", "{-}", "]-[", ")-(", "(-)", ":-:", "}{", "}-{", "aych", "╫", "]]-[[", "aech" ] self.i = ["!", "1", "|", "l", "eye", "3y3", "ai", "i"] self.j = [ "j", "_|", "_/", "]", "</", "_)", "_l", "_1", "¿", "ʝ", "ul", "u1", "u|", "jay", "(/", "_]" ] self.k = [ "k", "x", "|<", "|x", "|{", "/<", "\\<", "/x", "\\x", "ɮ", "kay" ] self.l = ["l", "1", "7", "|_", "1_", "l_", "lJ", "£", "¬", "el"] self.m = [ "m", "/\/\\", "|\\/|", "em", "|v|", "[v]", "^^", "nn", "//\\\\//\\\\", "(V)", "(\/)", "/|\\", "/|/|", ".\\\\", "/^^\\", "/V\\", "|^^|", "JVL", "][\\\\//][", "[]\/[]", "[]v[]", "(t)" ] self.n = [ "n", "|\\|", "/\\/", "//\\\\//", "[\\]", "<\\>", "{\\}", "//", "[]\\[]", "]\\[", "~", "₪", "/|/", "in" ] #the ω is because Ω is mistakenly taken as that character sometimes... self.o = [ "o", "0", "()", "oh", "[]", "{}", "¤", "Ω", "ω", "*", "[[]]", "oh" ] self.p = [ "p", "|*", "l*", "1*", "|o", "lo", "1o", "|>", "l>", "1>", "|\"", "l\"", "1\"", "?", "9", "[]d", "|7", "l7", "17", "q", "|d", "ld", "1d", "℗", "|º", "1º", "lº", "þ", "¶", "pee" ] self.q = [ "q", "0_", "o_", "0,", "o,", "(,)", "[,]", "<|", "<l", "<1", "cue", "9", "¶", "kew" ] self.r = [ "r", "|2", "l2", "12", "2", "/2", "I2", "|^", "l^", "1^", "|~", "l~", "1~", "lz", "[z", "|`", "l`", "1`", ".-", "®", "Я", "ʁ", "|?", "l?", "1?", "arr" ] self.s = ["s", "5", "$", "z", "es", "2", "§", "š", ",,\\``"] self.t = ["t", "7", "+", "-|-", "-l-", "-1-", "1", "']['", "†"] self.u = [ "u", "|_|", "l_l", "1_1", "(_)", "[_]", "{_}", "y3w", "m", "\\_/", "\\_\\", "/_/", "µ", "yew", "yoo", "yuu" ] self.v = ["v", "\\/", "\\\\//", "√"] self.w = [ "w", "\\/\\/", "vv", "'//", "\\\\'", "\\^/", "(n)", "\\x/", "\\|/", "\\_|_/", "\\_l_/", "\\_1_/", "\\//\\//", "\\_:_/", "]i[", "uu", "Ш", "ɰ", "1/\\/", "\\/1/", "1/1/" ] self.x = [ "x", "%", "><", "><,", "}{", "ecks", "x", "*", ")(", "ex", "Ж", "×" ] self.y = [ "y", "j", "`/", "`(", "-/", "'/", "\\-/", "Ψ", "φ", "λ", "Ч", "¥", "``//", "\\j", "wai" ] self.z = ["z", "2", "~/_", "%", "7_", "ʒ", "≥", "`/_"] self.zero = ["0", "o", "zero", "cero", "()"] self.one = ["1", "won", "one", "l", "|", "]["] self.two = ["two", "to", "too", "2", "z"] self.three = ["e", "3", "three"] self.four = ["4", "four", "for", "fore", "a"] self.five = ["5", "five", "s"] self.six = ["6", "six", "g"] self.seven = ["7", "seven", "t", "l"] self.eight = ["8", "eight", "b"] self.nine = ["9", "nine", "g"] #"0":self.zero,"1":self.one,"2":self.two,"3":self.three,"4":self.four,"5":self.five,"6":self.six,"7":self.seven,"8":self.eight,"9":self.nine self.alphabet = { "a": self.a, "b": self.b, "c": self.c, "d": self.d, "e": self.e, "f": self.f, "g": self.g, "h": self.h, "i": self.i, "j": self.j, "k": self.k, "l": self.l, "m": self.m, "n": self.n, "o": self.o, "p": self.p, "q": self.q, "r": self.r, "s": self.s, "t": self.t, "u": self.u, "v": self.v, "w": self.w, "x": self.x, "y": self.y, "z": self.z }
import tkinter as tk import os import cv2 import numpy as np from PIL import Image import re import pickle import sys from HashTable import HashTable from Dictionary import Dictionary colors = Dictionary( zip(['blue', 'red', 'green', 'white', 'black', 'yellow'], [(255, 0, 0), (0, 0, 255), (0, 255, 0), (255, 255, 255), (0, 0, 0), (0, 255, 255)])) class Person: def __init__(self, lastName="", firstName="", ID="", email=""): self.lastName = lastName self.firstName = firstName self.ID = ID self.email = email def __str__(self): getframe_expr = 'sys._getframe({}).f_code.co_name' caller = eval(getframe_expr.format(2)) if caller is "insert":
class LeetSpeak: def __init__(self, processes=1): #number of threads if processes > 0: self.processes = processes else: self.processes = 1 #load word frequency and spell checker self.spelling = Spelling() #load the dictionaries self.jargon = Dictionary("slang") self.dictionary = self.spelling.dictionary self.stopwords = self.spelling.stopwords self.a = [ "a", "4", "@", "/-\\", "/\\", "/_\\", "^", "aye", "ci", "λ", "∂", "//-\\\\", "/=\\", "ae" ] self.b = [ "b", "8", "|3", "6", "13", "l3", "]3", "|o", "1o", "lo", "ß", "]]3", "|8", "l8", "18", "]8" ] self.c = ["c", "(", "<", "[", "{", "sea", "see", "k", "©", "¢", "€"] self.d = [ "d", "|]", "l]", "1]", "|)", "l)", "1)", "[)", "|}", "l]", "1}", "])", "i>", "|>", "l>", "1>", "0", "cl", "o|", "o1", "ol", "Ð", "∂", "ð" ] self.e = ["e", "3", "&", "[-", "€", "ii", "ə", "£", "iii"] self.f = ["f", "|=", "]=", "}", "ph", "(=", "[=", "ʃ", "eph", "ph"] self.g = [ "g", "6", "9", "&", "(_+", "C-", "gee", "jee", "(Y,", "cj", "[", "-", "(γ,", "(_-" ] self.h = [ "h", "|-|", "#", "[-]", "{-}", "]-[", ")-(", "(-)", ":-:", "}{", "}-{", "aych", "╫", "]]-[[", "aech" ] self.i = ["!", "1", "|", "l", "eye", "3y3", "ai", "i"] self.j = [ "j", "_|", "_/", "]", "</", "_)", "_l", "_1", "¿", "ʝ", "ul", "u1", "u|", "jay", "(/", "_]" ] self.k = [ "k", "x", "|<", "|x", "|{", "/<", "\\<", "/x", "\\x", "ɮ", "kay" ] self.l = ["l", "1", "7", "|_", "1_", "l_", "lJ", "£", "¬", "el"] self.m = [ "m", "/\/\\", "|\\/|", "em", "|v|", "[v]", "^^", "nn", "//\\\\//\\\\", "(V)", "(\/)", "/|\\", "/|/|", ".\\\\", "/^^\\", "/V\\", "|^^|", "JVL", "][\\\\//][", "[]\/[]", "[]v[]", "(t)" ] self.n = [ "n", "|\\|", "/\\/", "//\\\\//", "[\\]", "<\\>", "{\\}", "//", "[]\\[]", "]\\[", "~", "₪", "/|/", "in" ] #the ω is because Ω is mistakenly taken as that character sometimes... self.o = [ "o", "0", "()", "oh", "[]", "{}", "¤", "Ω", "ω", "*", "[[]]", "oh" ] self.p = [ "p", "|*", "l*", "1*", "|o", "lo", "1o", "|>", "l>", "1>", "|\"", "l\"", "1\"", "?", "9", "[]d", "|7", "l7", "17", "q", "|d", "ld", "1d", "℗", "|º", "1º", "lº", "þ", "¶", "pee" ] self.q = [ "q", "0_", "o_", "0,", "o,", "(,)", "[,]", "<|", "<l", "<1", "cue", "9", "¶", "kew" ] self.r = [ "r", "|2", "l2", "12", "2", "/2", "I2", "|^", "l^", "1^", "|~", "l~", "1~", "lz", "[z", "|`", "l`", "1`", ".-", "®", "Я", "ʁ", "|?", "l?", "1?", "arr" ] self.s = ["s", "5", "$", "z", "es", "2", "§", "š", ",,\\``"] self.t = ["t", "7", "+", "-|-", "-l-", "-1-", "1", "']['", "†"] self.u = [ "u", "|_|", "l_l", "1_1", "(_)", "[_]", "{_}", "y3w", "m", "\\_/", "\\_\\", "/_/", "µ", "yew", "yoo", "yuu" ] self.v = ["v", "\\/", "\\\\//", "√"] self.w = [ "w", "\\/\\/", "vv", "'//", "\\\\'", "\\^/", "(n)", "\\x/", "\\|/", "\\_|_/", "\\_l_/", "\\_1_/", "\\//\\//", "\\_:_/", "]i[", "uu", "Ш", "ɰ", "1/\\/", "\\/1/", "1/1/" ] self.x = [ "x", "%", "><", "><,", "}{", "ecks", "x", "*", ")(", "ex", "Ж", "×" ] self.y = [ "y", "j", "`/", "`(", "-/", "'/", "\\-/", "Ψ", "φ", "λ", "Ч", "¥", "``//", "\\j", "wai" ] self.z = ["z", "2", "~/_", "%", "7_", "ʒ", "≥", "`/_"] self.zero = ["0", "o", "zero", "cero", "()"] self.one = ["1", "won", "one", "l", "|", "]["] self.two = ["two", "to", "too", "2", "z"] self.three = ["e", "3", "three"] self.four = ["4", "four", "for", "fore", "a"] self.five = ["5", "five", "s"] self.six = ["6", "six", "g"] self.seven = ["7", "seven", "t", "l"] self.eight = ["8", "eight", "b"] self.nine = ["9", "nine", "g"] #"0":self.zero,"1":self.one,"2":self.two,"3":self.three,"4":self.four,"5":self.five,"6":self.six,"7":self.seven,"8":self.eight,"9":self.nine self.alphabet = { "a": self.a, "b": self.b, "c": self.c, "d": self.d, "e": self.e, "f": self.f, "g": self.g, "h": self.h, "i": self.i, "j": self.j, "k": self.k, "l": self.l, "m": self.m, "n": self.n, "o": self.o, "p": self.p, "q": self.q, "r": self.r, "s": self.s, "t": self.t, "u": self.u, "v": self.v, "w": self.w, "x": self.x, "y": self.y, "z": self.z } def ConvertToLeet(self, text): """ This is fairly straightforward. Randomly select letters from the array of letters and output it. """ leet = "" for letter in list(text): if letter.isalpha() and self.alphabet[letter.lower()]: values = self.alphabet[letter.lower()] random.seed() number = random.randint(1, len(values)) leet += values[number - 1] else: leet += letter return leet def rec_parse(self, text, previous=[]): """ Input: Output: """ possibilities = [] text_length = len(list(text)) if text_length > 7: length = 8 else: length = text_length for q in range(1, length): if q < len(text): possibilities.append(previous + [text[0:q], text[q:text_length]]) possibilities += self.rec_parse(text[q:text_length], previous + [text[0:q]]) return possibilities def rec_scan_array(self, array, previous=[]): """ Input: [['h'], ['e'], ['i', 'l', 't'], ['i', 'l', 't'], ['d', 'o']] Output: ['h','e','i','i','d'], ['h','e','i','i','o'], ['h','e','i','1','d'], ['h','e','i','1','o'], ... """ words = [] passon = copy.copy(array) passon.pop(0) if len(array) > 0: for let in array[0]: letters = copy.copy(previous) letters.append(let) if len(passon) > 0: words += self.rec_scan_array(passon, letters) if len(array) == 1: words.append("".join(letters)) del letters del passon return words def ConvertFromLeet(self, text): """ Convert leet to readable English text. Find all possible words, check which are English, check for misspellings, etc. Uses self.processes, so when creating the LeetSpeak() object, you can specify the number of threads to use: l=LeetSpeak(threads=3) """ #figure out how many words each thread should work on split = text.split(" ") thread_count = {} thread_words = {} thread_num = 1 for word in split: #add word to the array for the current thread if thread_num in thread_count: thread_count[thread_num] += 1 else: thread_count[thread_num] = 1 thread_words[thread_num] = [] #up the thread_num unless it is currently at the number of threads we want, then set it to 1 to start over again if self.processes > thread_num: thread_num += 1 else: thread_num = 1 #compute what words each thread should decode for num, word in enumerate(split): for thread, words in thread_words.items(): if len(words) < thread_count[thread]: thread_words[thread].append(word) break #INFORMATION: #if self.processes = 3 and text = "cows are cool or not", thread_words={1: ['cows', 'are'], 2: ['cool', 'or'], 3: ['not']} #create the processes threads = {} num_threads = len(thread_words) result_english = "" thread_results = {} receive_pipe, send_pipe = Pipe() for i in range(self.processes): if num_threads >= i + 1: threads[i] = Process(target=self.ConvertFromLeet_thread, args=(thread_words[i + 1], i, send_pipe)) threads[i].start() #start and wait for threads for i in range(self.processes): if num_threads >= i + 1: threads[i].join() result = receive_pipe.recv() thread_results[result[0]] = result[1] #close the pipe send_pipe.close() #sort the results thread_results = sorted(thread_results.items()) #make a string out of the results for thread, string in thread_results: result_english += string + " " return result_english.strip() def ConvertFromLeet_thread(self, text, thread_id, pipe): """ The function that ConvertFromLeet() calls for each thread. """ english = [] #convert each word for word in text: #get all the character locations less than 8 (e.g. "c,ow", "co,w", and "cow" for "cow") #this uses some recursive substringing possibilities = self.rec_parse(word.lower()) #append the actual "word" if it is less than 8 characters, since it might be a single letter (e.g. "n" for "and") if len(word) <= 8: possibilities.append([word.lower()]) #calculate what this could be in leet (if it can be anything) validwords = [] for possibility in possibilities: letters = [] valid = 1 for char in possibility: chars = [] for let, val in self.alphabet.items(): if char in val: chars.append(let) if len(chars) == 0: valid = 0 break else: letters.append(chars) del chars if valid == 1 and len(letters) > 0: #generate possible words from given letters words = self.rec_scan_array(letters) validwords += words del words #print(validwords) #check which valid words are english if there's more than one option #go with the most frequently used english word if len(validwords) > 0: englishwords = {} for valid in validwords: score = 1 + 5 / len(valid) #computer talk if self.jargon.Contains(valid) == True: value = 2 jargon = self.jargon.Translate(valid) if self.dictionary.Contains(jargon) == True: value = 4 score += value if len(jargon) > 0: if jargon in englishwords: englishwords[jargon] += value else: englishwords[jargon] = score score = 0 #valid english if len(valid) > 1 and self.dictionary.Contains( valid) == True: score += 5 #frequency words if self.stopwords.Contains(valid): score += self.spelling.Frequency(valid) else: score += 5 * self.spelling.Frequency(valid) #same length if len(word) == len(valid): score += 0.1 #no numbers if valid.isalpha() == True: score += 1 englishwords[valid] = score #figure out what word is the most likely to be correctable check = [] skip = 0 for valid in englishwords: if valid.isalpha(): #if there is already a good word in the list, then don't bother with looking up spell corrections if self.dictionary.Contains( valid) and len(valid) >= len(word) / 2: skip = 1 check = [] break else: check.append(valid) if len(check) == 0 and skip == 0: check.append(englishwords[0]) #append the corrected version, hopefully for item in check: corrected = self.spelling.Check(item, dictionary=True, fast=True) if corrected != False and len(corrected) > 0: word = corrected[0] if word not in englishwords: frequency = self.spelling.Frequency(word) #if it is on the stop list, don't add as much weight if self.stopwords.Contains(word): value = frequency + 1 else: value = 5 * frequency + 1 #add weight if in the dictionary if self.dictionary.Contains(word) == True: value += 1 #add weight if not numbers if word.isalpha() == True: value += 1 englishwords[word] = value else: #if one of the corrected words list is in the englishwords list then up that value by 0.1 for correct in corrected: if correct in englishwords: englishwords[correct] += 0.1 #get the most likely word final = sorted(englishwords.items(), key=operator.itemgetter(1), reverse=True)[0] #add word english.append(final[0]) #send the result pipe.send([thread_id, " ".join(english)])
def handle_message(self, message: Dict[str, Any], bot_handler: Any) -> None: string = message['content'].split() content = "something went wrong" check = string[0].lower() if check == "calculate": content = Calculator.calculate(string) elif check == "coding_contest": content = Coding().getList() elif check.lower() == 'define': dictword = string[1] content = Dictionary.words(dictword) elif check.lower() == 'telljoke': content = Joke.tellJoke() elif check == "cricknews": content = Cricket().news() elif check == "proxy": if len(string) > 1: if string[1].lower() == "working": content = Proxy.getWorkingProxy() content = "Working Proxies in Your Area \n\n" + content elif string[1].lower() == "help": content = Proxy.getHelpList() else: content = WitHandler.getInfo(message['content']) else: content = Proxy.getProxyStatus() content = "Proxies Status--->\n\n" + content elif check.lower() == "play": try: pid = check_output(["pidof"], "mpg321") os.kill(int(pid), signal.SIGKILL) os.remove("hello.mp3") content = Music.main(string[1:]) except: content = Music.main(string[1:]) bot_handler.send_reply(message, "playing song ") elif check == "stop": pid = check_output(["pidof", "mpg321"]) #print(int(pid)) os.kill(int(pid), signal.SIGKILL) content = "Bye........:)" bot_handler.send_reply(message, content) elif check == "college_notice": content = Dean.getNotice() elif check == "add" and string[1] == "meeting": content = "Enter <Date> as <dd/mm/yyyy> <Time> as <hrs:min> and am/pm and purpose(one word)" elif len(string[0].split('/')) == 3: res = Meeting.AddMeeting(string) if res.lower() == "ok": content = "New Meeting successfully Added " else: content = res elif check == "show" and string[1].lower() == "meetings": content = Meeting.ShowMeeting() elif check == "pnr" and string[1].lower() == "status": content = Pnr.getpnr(string[2]) elif check == "message" or check == "find" or check == "where": content = Send_message.sendMessage(string) # elif check=="mood": # Mood.capture(); elif check == "symptom": string_1 = " " gender = string[1] dob = string[2] st = string[3:] string_1 = string_1.join(st) content = Sympton.getExactSympton(string_1) try: content = "Please Tell me clearly\n" + content except: p = int(content) content = Sympton.getIssueId(str(p), gender, dob) elif check == "search": st = " " strlist = string[1:] st = st.join(strlist) st = FriendLocation.plot(st) if "https" in st: webbrowser.open(st) content = "check out below link \n" + st else: content = "Please type exact name :)\n" + st elif check == "getjobs": content = JOBS.getjobs() elif check == "translate": stri = " " stri = stri.join(list(string[1:])) content = Translate.translate(stri) elif check == "help": Help.Message() content = "Message sent" elif check == "nearby": content = Nearby.Place(string[1]) else: #print(message['content']) content = WitHandler.getInfo(message['content']) bot_handler.send_reply(message, content)
def writeOnDisk(self, dictionary, blockNum, postingLists): """ private method to write sorted compressed dictionary and posting lists on desk, it will write 7 files for current block: first file is Dictionary[blockNum].txt which contains the compressed terms. second file is FCData[blockNum].txt which contains the compressed front coding data (block start, (term Length, Prefix length) ...). third file is DocsFreq[blockNum].txt which contains the documents frequencies. fourth file is PostingLists[blockNum].txt which contains the postings lists for all terms in dictionary. fifth file is TermsFreq[blockNum].txt which contains the terms frequency for each term in each posting list. sixth file is PostingListsPoitners[blockNum].txt which contains the pointers to the posting list for each term in the disc. seventh file is TermsFreqPointers[blockNum].txt which contains the pointers to the terms freq list for each term in the disc. :param dictionary: dictionary of words and docs frequency and pointer to posting lists :param blockNum: number of the block to write on the desk :param postingLists: list of posting lists for the dictionary :return: void """ if not (os.path.isdir(self.indexDir)): os.mkdir(self.indexDir) DictionaryPath = PATH.format(self.indexDir, DICTIONARY_FILE_NAME + str(blockNum)) PostingListsPath = PATH.format(self.indexDir, POSTING_LISTS_FILE_NAME + str(blockNum)) FCDataPath = PATH.format(self.indexDir, FC_DATA_FILE_NAME + str(blockNum)) DocsFreqPath = PATH.format(self.indexDir, DOCS_FREQ_FiLE_NAME + str(blockNum)) TermsFreqPath = PATH.format(self.indexDir, TERMS_FREQ_FILE_NAME + str(blockNum)) PostingListsPointersPath = PATH.format(self.indexDir, POSTING_LISTS_POINTERS_FILE_NAME + str(blockNum)) TermsFreqPointersPath = PATH.format(self.indexDir, TERMS_FREQ_POINTERS_FILE_NAME + str(blockNum)) terms, docsFreq = self.getDetailsFromDict(dictionary) FCObj = Dictionary(terms, (COMPRESSION_TYPE, COMPRESSION_BLOCKS)) encodeObj = PostingList([], VARIANT_ENCODE_TYPE) dictionaryStream = FCObj.str docsFreqStream = encodeObj.getEncode(docsFreq) FCData = self.getFCDataFromDict(FCObj.dict) FCDataStream = encodeObj.getEncode(FCData) postingListsStream = bytearray() termsFreqStream = bytearray() self.writeToFile(DictionaryPath, dictionaryStream, 'a+') self.writeToFile(FCDataPath, FCDataStream, 'ab+') self.writeToFile(DocsFreqPath, docsFreqStream, 'ab+') postingListsPointer, termsFreqPointer = 0, 0 postingListsPointersList, termsFreqPointersList = [], [] for term in terms: currPostingList, currTermFreqList = [], [] for pair in postingLists[dictionary[term][1]]: currPostingList.append(pair[0]) currTermFreqList.append(pair[1]) currEncode = PostingList(currPostingList, VARIANT_ENCODE_TYPE) currPostingListStream = currEncode.GetList() postingListsStream.extend(currPostingListStream) currTermFreqStream = currEncode.getEncode(currTermFreqList) termsFreqStream.extend(currTermFreqStream) postingListsPointersList.append(postingListsPointer) termsFreqPointersList.append(termsFreqPointer) postingListsPointer += len(currPostingListStream) termsFreqPointer += len(currTermFreqStream) postingPointersEncode = PostingList(postingListsPointersList, VARIANT_ENCODE_TYPE) termsFreqPointersEncode = PostingList(termsFreqPointersList, VARIANT_ENCODE_TYPE) postingListsPointersStream = postingPointersEncode.GetList() termsFreqPointersStream = termsFreqPointersEncode.GetList() self.writeToFile(PostingListsPointersPath, postingListsPointersStream, 'ab+') self.writeToFile(TermsFreqPointersPath, termsFreqPointersStream, 'ab+') self.writeToFile(PostingListsPath, postingListsStream, 'ab+') self.writeToFile(TermsFreqPath, termsFreqStream, 'ab+')
class GameManager(QtCore.QObject): start_move_first = QtCore.Signal() start_move_second = QtCore.Signal() ask_for_cells = QtCore.Signal() game_ended = QtCore.Signal(str) show_board = QtCore.Signal() @QtCore.Slot() def step_ended(self): if self.__current_id__ == FIRST_PLAYER: self.__current_id__ = SECOND_PLAYER else: self.__current_id__ = FIRST_PLAYER self.__number_of_spare_cells__ -= 1 @QtCore.Slot(int) def get_number_of_cells(self, value): self.__number_of_spare_cells__ = value @QtCore.Slot() def game_ending(self): message = None if self.__players_number__ == 2: score1 = self.__player1__.get_score() score2 = self.__player2__.get_score() if score1 > score2: message = 'First player win' elif score1 == score2: message = 'Draw' else: message = 'Second player win' else: score1 = self.__player1__.get_score() score2 = self.__player2__.get_score() if score1 > score2: message = 'You win' elif score1 == score2: message = 'Draw' else: message = 'Computer win' self.game_ended.emit(message) def __init__(self, language: Language, width, height, players_number, level=''): super(GameManager, self).__init__() self.__bot__ = Bot(language, width, height) self.__width__ = width self.__height__ = height self.__players_number__ = players_number self.__board__ = Board() self.__board__.init_board(width, height) self.__dictionary__ = Dictionary() self.__dictionary__.load_dictionary() self.__wc__ = WordCollector() self.__wc__.connect_to_dictionary(self.__dictionary__) self.__wc__.connect_to_board(self.__board__) self.__dictionary__.setup_connection(self.__wc__) self.__board__.setup_connection(self.__wc__) self.__first_word__ = self.__dictionary__.get_first_word(width) self.__player1__ = Player() self.__player2__ = Player() if players_number == 2: self.__player1__.connect_to_board(self.__board__) self.__player1__.connect_to_manager(self) self.__player2__.connect_to_board(self.__board__) self.__player2__.connect_to_manager(self) else: self.__player1__.connect_to_board(self.__board__) self.__player1__.connect_to_manager(self) self.__dictionary__.connect_to_bot(self.__bot__) self.__dictionary__.used_words_to_bot(self.__bot__) if level == 'EASY': self.__bot__.set_level(EASY) elif level == 'MEDIUM': self.__bot__.set_level(MEDIUM) elif level == 'HARD': self.__bot__.set_level(HARD) elif level == 'HARDEST': self.__bot__.set_level(HARDEST) self.__bot__.connect_to_board(self.__board__) self.__bot__.connect_to_manager(self) self.__bot__.connect_to_dictionary(self.__dictionary__) self.__bot__.get_dictionary() self.__bot__.connect_to_used_dictionary(self.__dictionary__) self.__current_player__ = self.__player1__ self.__current_id__ = FIRST_PLAYER self.__number_of_spare_cells__ = width * (height - 1) if players_number == 2: self.__board__.connect_to_players(self.__player1__, self.__player2__) self.__board__.set_first_player(FIRST_PLAYER) else: self.__board__.connect_to_players(self.__player1__, self.__bot__) self.__board__.set_first_player(FIRST_PLAYER) self.ask_for_cells.connect(self.__board__.get_number_of_cells) self.start_move_first.connect(self.__player1__.begin_step) if players_number == 2: self.start_move_second.connect(self.__player2__.begin_step) else: self.start_move_second.connect(self.__bot__.begin_step) self.__board__.set_first_word(self.__first_word__) def run_game(self): if not self.is_game_ended(): self.show_board.emit() if self.__players_number__ == 2: if self.__current_id__ == FIRST_PLAYER: print("First player: your move") self.start_move_first.emit() else: print("Second player: your move") self.start_move_second.emit() if self.__players_number__ == 1: if self.__current_id__ == FIRST_PLAYER: print("First player: your move") self.start_move_first.emit() else: print("Second player: your move") self.start_move_second.emit() return self.game_ending() def get_first_player(self): return self.__player1__ def get_second_player(self): if self.__players_number__ == 2: return self.__player2__ else: return self.__bot__ def get_current_player(self): return self.__current_id__ def get_first_word(self): return self.__first_word__ def is_game_ended(self): self.ask_for_cells.emit() return self.__number_of_spare_cells__ == 0
class Class(object): def __init__(self, aHt, aClassId, aCode, aLnotab): self.hT = aHt self.staticField = Dictionary(self.hT) self.attributes = Dictionary(self.hT) self.method = Dictionary(self.hT) self.lnotab = aLnotab self.code = aCode self.name = aCode.co_name self.Id = aClassId self.SpecialBehaviorId = -1 def __getId__(self): return self.Id def __getLnotab__(self): return self.lnotab def __addMethod__(self, aCode, aLocals): for theKey, theValue in aLocals.iteritems(): if inspect.isfunction(theValue): if not (theKey == '__module__'): theId = self.hT.itsId.__get__() self.method.update({theKey: theId}) self.hT.itsId.__next__() def __addSpecialMethod__(self, aFileName): if self.method.has_key("%sStaticMethod" % self.name): return theId = self.hT.itsId.__get__() self.method.update({"%sStaticMethod" % self.name: theId}) self.hT.itsId.__next__() self.hT.__registerSpecialMethod__("%sStaticMethod" % self.name, theId, self.Id, aFileName) self.SpecialBehaviorId = theId def __setStaticField__(self, aId, aValue, aFrameLineNo, aCurrentLasti, aParentTimestamp, aDepth): theThreadId = self.hT.__getThreadId__(thread.get_ident()) theCurrentTimestamp = self.hT.__convertTimestamp__(time.time()) if not self.hT.itsProbe.has_key( (aCurrentLasti, self.SpecialBehaviorId)): theProbeId = self.hT.__registerProbe__(aCurrentLasti, self.SpecialBehaviorId, aFrameLineNo) else: theProbeId = hT.itsProbe[(aCurrentLasti, aTheSpecialBehaviorId)] self.hT.itsPacker.reset() self.hT.itsPacker.pack_int(self.hT.itsEvents['set']) self.hT.itsPacker.pack_int(self.hT.itsObjects['classAttribute']) self.hT.itsPacker.pack_int(aId) theDataType = self.hT.__getDataType__(aValue) self.hT.itsPacker.pack_int(theDataType) thePackValue = self.hT.__packValue__(theDataType, aValue) self.hT.itsPacker.pack_int(theProbeId) self.hT.itsPacker.pack_hyper(aParentTimestamp) self.hT.itsPacker.pack_int(aDepth) self.hT.itsPacker.pack_hyper(theCurrentTimestamp) self.hT.itsPacker.pack_int(theThreadId) if self.hT.FLAG_DEBUGG: print self.hT.itsEvents['set'], print self.hT.itsObjects['classAttribute'], print Id, print theDataType, print thePackValue, print theProbeId, print aParentTimestamp, print aCurrentDepth, print theCurrentTimestamp, print theThreadId raw_input() try: self.hT.itsSocket.sendall(self.hT.itsPacker.get_buffer()) pass except: print 'TOD está durmiendo :-(', 'set static field' def __register_set_StaticField__(self, aLocals, aFrameLineNo, aParentTimestamp, aDepth, aFileName): theLower = 0 theUpper = len(self.code.co_code) theCode = self.code.co_code while theLower < theUpper: theOp = ord(theCode[theLower]) theNameOp = dis.opname[theOp] theLower = theLower + 1 if theOp >= dis.HAVE_ARGUMENT: theValue = ord(theCode[theLower]) theValue += ord(theCode[theLower + 1]) * 256 if theNameOp == 'STORE_NAME': #print self.code.co_names[theValue] #registro el atributo estático theStaticFieldName = self.code.co_names[theValue] self.staticField.__updateStaticField__( {theStaticFieldName: aLocals[theStaticFieldName]}, self.Id) #creamos un metodo artificial para almacenar #la definición de los atributos de clase self.__addSpecialMethod__(aFileName) #set para el atributo estático if not re.search(self.hT.itsMethodPattern, theStaticFieldName): if not inspect.isfunction(aLocals[theStaticFieldName]): self.__setStaticField__( self.staticField[theStaticFieldName], aLocals[theStaticFieldName], aFrameLineNo, theLower, aParentTimestamp, aDepth) theLower = theLower + 2 def __addStaticField__(self, aLocals): self.staticField.__updateStaticField__(aLocals, self.Id) def __addAttribute__(self, aName, aObjectId): self.attributes.__updateAttr__({aName: -1}, aObjectId)
class Parser: word = "" # word from input stream inputSymbol = "" # word class (inputstream) ruleSymbol = "" # symbol from rule (Rules) # Source code inputstream = InputStream.InputStream("source.pas") # Recognizing recognizer = Recognizer.Recognizer() RepeatableSymbols = [] words = [] def __init__(self): self.lexema = Dictionary() def Verify(self, source): """ Effects: Verifies source for syntax correctness Requires: file "source" should exist """ self.inputstream = InputStream.InputStream(source) self.nextToken() SemanticActions().initStandardTypes() self.checkAccordingToRule(Rules["programme"][0]) d2 = SS3().pop() d22 = SS4().pop() dvicpi = SS3().trace() dvicpi2 = SS4().trace() d2.printed() # d22.printed() # print "File successfully parsed" def checkAccordingToRule(self, inRuleItem): """ Effects: Checks programme on syntax correctness (one RuleItem) """ RuleItem = self.subSequence(inRuleItem, 0, len(inRuleItem)-1) i = 0 while i < len(RuleItem): self.ruleSymbol = RuleItem[i] self.nextRuleSymbol = self.getRuleSymbol(RuleItem, i) if self.isSemanticAction(self.ruleSymbol): i += 1 continue # verify next rule symbol @here self.doSemanticAction(RuleItem, i) if self.nextRuleSymbol == "0": # 0 is a special rule symbol after non-terminals in case to do this rule AFTER recursive search of rules i = i + 1 continue if self.ruleSymbol == self.inputSymbol: self.addToDictionary() # after doSemanticAction, otherwise symbol is double defined if self.ruleSymbol == "[[": index = self.findElement(RuleItem, "]]", i) if self.isDerivedFrom(RuleItem[i+1]): RuleItem.pop(i) RuleItem.pop(index-1) i -= 1 else: i= index elif self.ruleSymbol == "{": index = self.findElement(RuleItem, "}", i) self.RepeatableSymbols = self.subSequence(RuleItem, i+1, index-1) if self.isDerivedFrom(RuleItem[i+1]): # Insert RepeatableSymbols into RuleItem with index i _i = i for reSymbol in self.RepeatableSymbols: RuleItem.insert(_i, reSymbol) _i = _i + 1 i -= 1 else: i = index else: try: self.treatSymbol() except E.ENotFoundRule: return None # if foundAppropriateRule == None: return None i += 1 def treatSymbol(self): """ Effects: if is terminal, compares ruleSymbol with inputSymbol and reads next word from input stream. otherwise applies rule with appropriate left part """ if self.isTerminal(self.ruleSymbol): if self.inputSymbol == self.ruleSymbol: self.nextToken() else: E.E(self.inputstream.lineNumber, self.inputstream.code).syntactic( self.inputstream.lineNumber, self.ruleSymbol, self.word, "Inconformity") else: AppropriateRule = self.chooseAppropriateRule(self.ruleSymbol) if AppropriateRule == None: raise E.ENotFoundRule # return None # E.E(self.inputstream.lineNumber, self.inputstream.code).syntactic( # self.inputstream.lineNumber, self.ruleSymbol, self.inputSymbol, "Inconformity") self.checkAccordingToRule(AppropriateRule) def subSequence(self, Sequence, startIndex, endIndex): """ Effects: Returns sequence [ startIndex .. endIndex ] """ i = startIndex Seq = [] while i <= endIndex: Seq.append(Sequence[i]) i += 1 return Seq def findElement(self, Sequence, element, startindex): """ Effects: Finds index of the specified element """ while Sequence[startindex] != element: startindex += 1 return startindex def isTerminal(self, symbol): """ Terminals are always in right part of rules Non terminals are keys (left) of rules """ return not Rules.has_key(symbol) def nextToken(self): """ Reads from input stream word and defines its class Modifies: word, inputSymbol """ if not self.inputstream.eof() : if self.word: self.words.append(self.word) Words().push(self.word) self.word = self.inputstream.getWord() self.inputSymbol = self.recognizer.getClass(self.word) if self.inputSymbol == None: E.E(self.inputstream.lineNumber, self.word).e("WrongWord") def chooseAppropriateRule(self, ruleSymbol): """ Effects: Returns rule that begins with symbol "inputSymbol" if more than one rule has this left part. """ RightRule = Rules[ruleSymbol][0] for RightRule in Rules[ruleSymbol]: if (self.inputSymbol in self.findFirstSymbols(RightRule)): return RightRule return None def PossiblePrefixes(self ,Rule): """ Effects: Return possible prefixes (possible input symbols) """ i = 0 First = [] while i < len(Rule): if (Rule[i] == "[["): First.append(Rule[i+1]) i = self.findElement(Rule, "]]", i+1) elif (Rule[i] == "{"): First.append(Rule[i+1]) i = self.findElement(Rule, "}", i+1) else: First.append(Rule[i]) return First i += 1 return First def nextLevel(self, List): """ Returns list with next level - non-terminals """ out = [] for symbol in List: if (self.isTerminal(symbol)): out.append(symbol) else: for _R in Rules[symbol]: for symbol2 in self.PossiblePrefixes(_R): out.append(symbol2) return out def findFirstSymbols(self, Rule): """ Effects: Return list of first symbol """ List = self.PossiblePrefixes(Rule) while (self.hasnonTerminals(List)): List = self.nextLevel(List) return List def hasnonTerminals(self, List): """ Effects: Return True if list contains at least one non-terminal """ for symbol in List: if not self.isTerminal(symbol): return True return False def isDerivedFrom(self, ruleSymbol): if (self.isTerminal(ruleSymbol)): return ruleSymbol == self.inputSymbol else: if ruleSymbol == self.inputSymbol: return True else: return (not self.chooseAppropriateRule(ruleSymbol) == None) def isSemanticAction(self, ruleSymbol): """ Effects: Returns True if semantic action occurs and needs to be done """ return ruleSymbol[0] == '#' def extractSemanticAction(self, ruleSymbol): """ Effects: Return number of semantic action Requires: ruleSymbol must be in format "#\d+" """ return ruleSymbol[1:] def addToDictionary(self): if (not self.lexema.isFound(self.word)): # @todo extract in comfortable place self.lexema.addSymbol( {"name" : self.word, 'class' : self.inputSymbol} ) if self.inputSymbol == 'intConst': self.lexema.setObject( self.word, AtrClasses.AttrIntConst(self.word) ) if self.inputSymbol == 'floatConst': self.lexema.setObject( self.word, AtrClasses.AttrFloatConst(self.word) ) if self.inputSymbol == 'StringConst': self.lexema.setObject( self.word, AtrClasses.AttrStringConst(self.word) ) def doSemanticAction(self, RuleItem, i): currentRuleSymbol = None if i+1 < len(RuleItem): currentRuleSymbol = RuleItem[i+1] if currentRuleSymbol is None: return False if self.isSemanticAction(currentRuleSymbol): semanticActionNumber = self.extractSemanticAction(currentRuleSymbol) if (semanticActionNumber.find(',') == -1): SemanticActions().switchAction(self.inputstream.lineNumber, semanticActionNumber, self.word) else: (semanticActionNumber1, semanticActionNumber2) = semanticActionNumber.split(',') SemanticActions().switchAction(self.inputstream.lineNumber, semanticActionNumber1, self.word) SemanticActions().switchAction(self.inputstream.lineNumber, semanticActionNumber2, self.word) def getRuleSymbol(self, RuleItem, i): if (i < len(RuleItem)): return RuleItem[i] else: return None
class SpanishTranslator: def __init__(self): self.dict = Dictionary() #build CCAE dictionaries: bigram_filename = "CAE_bigrams.txt" trigram_filename = "CAE_trigrams.txt" # self.dict.build_english_bigrams(bigram_filename, "data") # self.dict.build_english_trigrams(trigram_filename, "data") self.dict.build_english_corpus("google_translate.txt", "data") # self.stem_helper_inst = StemHelper() # self.preProcessors = [ConjugationPreProcessor(), PluralPreProcessor(), QuePreProcessor()] self.preProcessors = [ConjugationPreProcessor(), PluralPreProcessor(), QuePreProcessor()] # self.preProcessors = [] #add plural processor back in # self.postProcessors = [AdjectivePostProcessor(), ArticlePostProcessor(), ConjugationPostProcessor(), PluralPostProcessor()] self.postProcessors = [AdjectivePostProcessor(), ArticlePostProcessor(), ConjugationPostProcessor(), PluralPostProcessor()] # self.postProcessors = [] corpusFilename = "Project_Dev_Sentences.txt" googleTranslate = "Translation_Strict_Keys.txt" self.dict.build_custom_dictionary(corpusFilename, "data", googleTranslate) self.spanish_stemmer = snowballstemmer.stemmer('spanish'); self.fluency_processor_inst = FluencyProcessing() def translate(self, original): translated = "" #do all the tokenizing, POS-tagging, etc here tokens = TaggedWord.TagText(original) for t in tokens: t.lower() # apply preprocessing strategies for pre in self.preProcessors: tokens = pre.apply(tokens) #generate possible translations self.translations = [] self.generateTranslations(tokens, 0) #post-processing # for i,translation in enumerate(self.translations): for i in xrange(0, len(self.translations)): for post in self.postProcessors: # translation = post.apply(translations) self.translations[i] = post.apply(self.translations[i]) # self.translations[i] = translation # select best translation english_sentences = [] for translation in self.translations: sentence = "" for token in translation: sentence += token.word.decode('utf-8') + " " sentence = sentence.replace(".","") english_sentences.append(sentence) # english_sentences.append(translation) for sentence in english_sentences: print sentence print # ccae_flag = True ccae_flag = False bigram_prob_list = self.fluency_processor_inst.find_fluent_translation_stupidbackoff(english_sentences, self.dict.custom_bigram_dict, self.dict.custom_bigram_dict_unigram_dict, ccae_flag) trigram_prob_list = self.fluency_processor_inst.find_fluent_translation_trigrams(english_sentences, self.dict.custom_trigram_dict, self.dict.custom_trigram_dict_unigram_dict, ccae_flag, self.dict.custom_bigram_dict, self.dict.custom_bigram_dict_unigram_dict) #can modify weight of each language model # bigram_weight = .5 bigram_weight = .2 trigram_weight = .8 fluent_sentence = self.fluency_processor_inst.find_combined_fluency(english_sentences, bigram_prob_list, trigram_prob_list, bigram_weight, trigram_weight) return fluent_sentence #to test without the fluency_processor, comment out above line and add: # return english_sentences[0] def generateTranslations(self, tokens, position): # if (position == len(tokens)): if position == len(tokens): sentence = "" for token in tokens: sentence += token.word.decode('utf-8') + " " # print sentence print position self.translations.append(tokens) else: options = self.dict.custom_dict[tokens[position].word] newTokens = copy.deepcopy(tokens[:]) # newTokens = tokens[:] match_options = [] for opt in options: if tokens[position].posMatch(opt[1]): match_options.append(opt) if len(match_options) > 1: if random.random() <= .2: count = 0 while (count < 2 and count < len(match_options)): newTokens[position].word = match_options[count][0] self.generateTranslations(newTokens, position + 1) count = count + 1 else: newTokens[position].word = match_options[0][0] self.generateTranslations(newTokens, position + 1) elif match_options: newTokens[position].word = match_options[0][0] self.generateTranslations(newTokens, position + 1) else: self.generateTranslations(newTokens, position + 1)
from Counter import Counter fileDirectory = os.path.dirname( os.path.abspath(inspect.getfile(inspect.currentframe()))) f = File(fileDirectory, "/WEBPAGES_RAW/bookkeeping.json") #locate the json file jsonData = f.readJson() # read the json file myCounter = Counter(len(jsonData)) # loop through the location:url from the bookkeeping.json file for location, urlLink in jsonData.items(): wPost = Posting() # create posting wDict = Dictionary() # create dictionary fileName = "/WEBPAGES_RAW/" + location # generate a new location data = File( fileDirectory, fileName).readText() # looking to the file and return html text parser = DataParser(data) # create a parser class parser.processData() #process the given data visibleText = parser.getProcessTexts( ) # get all the visibletext in the document wordList = Tokenize(visibleText).extractToken() #extra all the text wPost.addWord( wordList) #add the word, word's frequency, word's position to Posting
for i in range(db.getTextsSize()): tempText = db.getTextsData('baseText', i+1)[0][0] tempText = parser.parsing(tempText) db.updateTexts('formattedText', tempText, i+1) pb.inc() # <- выполняется полный проход по всем сырым текстам в бд # забираются сырые тексты, отправляются на очистку # возвращаются тексты после фильтрации и отправляются в БД обратно # op = None # parser = None # <- Очистка ненужных объектов (OpenTexts и CorpusParser) print("Сохранение локальных словарей в базе данных...") d = Dictionary(p.featureExtraction.getMetricType(), p.featureExtraction.getNgrammType(), p.featureExtraction.getIgnoreWordOrderStatus()) pb.new(maxValue=db.getTextsSize(), suffix='cохранено') for i in range(db.getTextsSize()): d.addData(db.getTextsData('formattedText', i+1)[0][0]) tempDict = d.getLastDictionary() tempStr = json.dumps(tempDict) tempStr = tempStr.replace('"', '""') db.updateTexts('localDictionary', tempStr, i+1) pb.inc() # <- добавление в БД локальных словарей в виде json строки print(1) d.idfGlobalCalc() print(2) v = Vectorizer(p.featureExtraction.getMetricType()) v.addGlobDict(d.getGlobalDictionary())
def __init__(self): self.lexema = Dictionary()
def __init__(self, language: Language, width, height, players_number, level=''): super(GameManager, self).__init__() self.__bot__ = Bot(language, width, height) self.__width__ = width self.__height__ = height self.__players_number__ = players_number self.__board__ = Board() self.__board__.init_board(width, height) self.__dictionary__ = Dictionary() self.__dictionary__.load_dictionary() self.__wc__ = WordCollector() self.__wc__.connect_to_dictionary(self.__dictionary__) self.__wc__.connect_to_board(self.__board__) self.__dictionary__.setup_connection(self.__wc__) self.__board__.setup_connection(self.__wc__) self.__first_word__ = self.__dictionary__.get_first_word(width) self.__player1__ = Player() self.__player2__ = Player() if players_number == 2: self.__player1__.connect_to_board(self.__board__) self.__player1__.connect_to_manager(self) self.__player2__.connect_to_board(self.__board__) self.__player2__.connect_to_manager(self) else: self.__player1__.connect_to_board(self.__board__) self.__player1__.connect_to_manager(self) self.__dictionary__.connect_to_bot(self.__bot__) self.__dictionary__.used_words_to_bot(self.__bot__) if level == 'EASY': self.__bot__.set_level(EASY) elif level == 'MEDIUM': self.__bot__.set_level(MEDIUM) elif level == 'HARD': self.__bot__.set_level(HARD) elif level == 'HARDEST': self.__bot__.set_level(HARDEST) self.__bot__.connect_to_board(self.__board__) self.__bot__.connect_to_manager(self) self.__bot__.connect_to_dictionary(self.__dictionary__) self.__bot__.get_dictionary() self.__bot__.connect_to_used_dictionary(self.__dictionary__) self.__current_player__ = self.__player1__ self.__current_id__ = FIRST_PLAYER self.__number_of_spare_cells__ = width * (height - 1) if players_number == 2: self.__board__.connect_to_players(self.__player1__, self.__player2__) self.__board__.set_first_player(FIRST_PLAYER) else: self.__board__.connect_to_players(self.__player1__, self.__bot__) self.__board__.set_first_player(FIRST_PLAYER) self.ask_for_cells.connect(self.__board__.get_number_of_cells) self.start_move_first.connect(self.__player1__.begin_step) if players_number == 2: self.start_move_second.connect(self.__player2__.begin_step) else: self.start_move_second.connect(self.__bot__.begin_step) self.__board__.set_first_word(self.__first_word__)
def __init__(self, name): self.dictionary = Dictionary() self.first_name, self.middle_name, self.last_name = self.__splitter(name) self.vector = self.__to_vector()