def generateResults(input, message): result = "Rating : " count = 1 if (input == '' or input == None): return sentences = tokenizer.tokenize(input.get("1.0", 'end-1c')) for sentence in sentences: #Split the sentence into word wordList = nltk.pos_tag(specialCharacter(sentence), tagset='universal') words = [] for i in wordList: if (i[0] in wordDataset): words.append(word.Word(i[0], wordDataset[i[0]], i[1])) else: words.append(word.Word(i[0], 0, "X")) ans = generateTree(words) if (words[-1].word == "?"): ans = 0.00 if (ans > 10): ans = 10.00 elif (ans < -10): ans = -10.00 result += "\nSentence {} = {}".format(count, round(ans, 2)) count += 1 message.configure(text=result)
def test___init__(self): # test with str word_test = word.Word("man") self.assertEqual(list(word_test.descriptor.keys())[0], "man") # test with integer with self.assertRaises(TypeError): word.Word(1) # test with float with self.assertRaises(TypeError): word.Word(1.0) # test with object with self.assertRaises(TypeError): word.Word(object) # test with dict with self.assertRaises(TypeError): word.Word({"test": 123}) # test with list with self.assertRaises(TypeError): word.Word([1]) # test with tuple with self.assertRaises(TypeError): word.Word((1, 2)) # test with empty string with self.assertRaises(ValueError): word.Word("")
def test_magnitude(self): word_test = word.Word("man") # test with no components with self.assertRaises(IndexError): word_test.magnitude # test with 1 component with a value of 3 word_test.append("i") word_test.append("i") word_test.append("i") self.assertAlmostEqual(word_test.magnitude, 3.0) # test with 2 components both with values of 3 word_test.append("am") word_test.append("am") word_test.append("am") self.assertAlmostEqual(word_test.magnitude, math.sqrt(18.0)) # test with 3 components with a total value of 22 word_test.append("a") word_test.append("a") self.assertAlmostEqual(word_test.magnitude, math.sqrt(22.0))
def loadWords(files): global C global dTerms C = int(input("Insert the value of C: ")) print(files) words = {} i = 1 for fil in files: # Reading the files f = open(fil, "r") dTerms[fil] = 0 fileWords = {} for line in f: # Eliminating non valid characters for word in re.split('; |, |\*|\n|;|!|\?|\.|\t| ', line): # Checking if the word's smaller than the parameter C if len(word) < C: continue theWord = un.unidecode(word.lower()) if theWord not in words: words[theWord] = wd.Word(theWord, len(files)) if theWord not in fileWords: fileWords[theWord] = theWord dTerms[fil] += 1 words[theWord].incrementOccurs(i) i = i + 1 f.close() return words
def __init__(self, wordname): self._word = word.Word() self._word.set_name(wordname) SGMLParser.__init__(self) self._phonetic_found = 0 self._etcTrans_found = 0 self._etcTrans_li_found = 0 self._p_additional_found = 0 self._synonyms_found = 0 self._synonyms_a_found = 0 self._tPowerTrans_found = 0 self._tPowerTrans_ul_sense_ex_found = 0 self._tPowerTrans_ul_sense_ex_p_found = 0 self._examplesToggle_found = 0 self._examplesToggle_bilingual_found = 0 self._examplesToggle_bilingual_p_found = 0 self._examplesToggle_bilingual_p_noattrs_found = 0 #用来解决“报错”这个<p> self._tmp_sentence_list = [] self._tmp_sentence = ''
def createWords(self): num = 1 while num < self.numberOfWords: myWord = self.solicitWord() gameWord = word.Word(myWord) self.gameWords.append(gameWord) num += 1
def groupWords(dir): group = [] index = 1 crossover = [] for i in set(gridIndexs): if gridIndexs.count(i) == 2 and i not in crossover: crossover.append(i) words = [] for i in set(crossover): plane = [] num = -1 pos = i while pos != '': plane.insert(0, pos) pos = isOneBeside(i, dir, num) num -= 1 pos = '_' num = 1 while pos != '': if pos == '_': pos = isOneBeside(i, dir, num) num += 1 plane.append(pos) pos = isOneBeside(i, dir, num) num += 1 else: plane.append(pos) pos = isOneBeside(i, dir, num) num += 1 word = getWord(plane) if word not in words: words.append(word) group.append(crosswordAnswer.Word(word, index, plane[0])) index += 1 return group
def single_run_partA(pid, articles): ''' First part of a single run of a single process: Stops after interestingness was computed. ''' logging.info("Starting PID {}".format(pid)) if pid > 0: # create random sample and compute correlation articles = mng.shuffle_publicationdates(articles) logging.debug("PID {}: A".format(pid)) #Get a dict of dicts for each calendar week with word frequencies from getWordCounts wordCounts = mng.getWordCounts(articles) #List of distinct words distinctWords = mng.getDistinctWords(wordCounts) logging.debug("PID {}: B".format(pid)) #List of lists of tuples containing weekly word frequency countsPerWeek = [] for w in distinctWords: countsPerWeek.append((w, mng.getCountPerWeek(wordCounts, w))) logging.debug("PID {}: C".format(pid)) #Create list of word objects for each keyword words = [] for c in countsPerWeek: words.append(wpy.Word(c[0], ts_articles=timeseries.Timeseries(c[1]))) logging.debug("PID {}: D".format(pid)) interestingWords = mng.filter_interestingness(articles, 10, 5) keywords = [] for k in interestingWords: keywords.append(k) logging.debug("PID {}: E".format(pid)) output.put([pid, keywords, words])
def train(): w = word.Word('data/dgk_shooter.conv', 'data/dgk_segment.conv', 'data/dgk_segment.conv', 'model/dgk_gensim_model') q, a = w.QA() generate = w.generate_vector(q, a) i, q_v, a_v = next(generate) # 生成数据, i用于判断结束 ds = dataset.VecDataSet(q_v, a_v) train_loader = data.DataLoader(ds, batch_size=64, shuffle=True) encoder = model.EncoderRNN(q_v.shape[2], args.hidden_size, q_v.shape[2], args.num_layers, args.batch_size).to(device) decoder = model.DecoderRNN(q_v.shape[2], args.hidden_size, q_v.shape[2], args.num_layers, args.batch_size).to(device) encoder_optimizer = optim.Adam(encoder.parameters(), lr=args.learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=args.learning_rate) loss_func = nn.CrossEntropyLoss() for i, (x, y) in enumerate(train_loader): x = x.to(device) y = y.to(device) h0 = encoder.initHidden() output, hidden = encoder(x, (h0, h0)) # 把encoder的输出作为decoder的第一个输入 y = y[:, :-1, :] # 去掉y最后一个词,也可以在生成数据时把y少填充一个 y = torch.cat((output, y), 1) h0 = decoder.initHidden() output, hidden = decoder(y, (h0, h0)) loss = loss_func(output, y) encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() loss.backward() encoder_optimizer.step() decoder_optimizer.step()
def startPhase1(self): self.phase = 1 self.main.get(graphics.SwimBackground(self.main)) self.main.status = status.Status(self.main) self.main.get(self.main.status) self.main.get(word.Word(self.main)) self.main.get(graphics.Shower(self.main))
def new_game(self): ''' starts a new game ''' for i in range(wordsOnScreen): w = wordDictionary.pick_word() x = random.randint(scrn.width) y = 0 self.words = word.Word(w, x, y)
def find_random_word(): """ Return type Word object which is random word from database """ random_word = find_word_by_number( random.randrange(1, length_of_collection() + 1)) game_word = word.Word(random_word['number'], random_word['english_word'], random_word['description'], random_word['polish_word']) return game_word
def generate_word(cls): import word from random import choice random_word = choice(assets.words) print('YOU GOT A NEW WORD!') cls.new_word = word.Word(random_word) # print(cls.new_word.word) # for testing only cls.new_word.split_letters() .generate_attempts() .display_word()
def get_word(word_list, lookahead=1, start=None, max_length=math.inf): """Return a generated wordlike string. Arguments: word_list -- list of '\\n' terminated strings lookahead -- index used to match subsequent characters in the string start -- string of characters to start generation """ generated_word = word.Word(word_list, lookahead, start, max_length) return str(generated_word)
def add_words(self): ''' add words to scr if not enough are there ''' if len(self.words) < self.wordsOnScreen: w = self.wordDictionary.pick_word() y = random.choice(self.poss_vals) self.poss_vals.remove(y) w = word.Word(w, 1, y) self.words.append(w)
def __init__(self, para_file): self.para = parameter.yaml_loader().load(para_file) self.io = IO(para_file) self.lat_pattern = re.compile(r',4\d\.\d+,') self.lng_pattern = re.compile(r',-7\d\.\d+,') self.parser = word.Word(stem=True, min_length=3) self.parser.load_stopwords(self.io.stopword_file) self.checkins = {} self.word_dict = {} self.sequences = []
def add_to_vocabulary(words, post_type, vocab): for w in words: index = get_word_index(w, vocab) # Word exists, increment its frequency. if index != -1: vocab[index].increment_count(post_type) # Word does not exist, add it. else: new_word = word.Word(w) new_word.increment_count(post_type) vocab.append(new_word)
def score_words(words): result = "" scrabble_words = scrabble.Scrabble("./words.txt") scored = [] for w in words: scored.append(word.Word(w, scrabble_words.score_word(w))) scored.sort(key = lambda x: x.score, reverse = True) for each in scored: result += str(each) return result
def load_dictionary(self, filename): with open(filename, encoding='utf8') as file: lines = file.readlines() for line in lines: word_parts = line.split(',') native = word_parts[0] alternate = word_parts[1] latin = word_parts[2] translation = word_parts[3].strip() new_word = word.Word(native, alternate, latin, translation) self.add_word(new_word)
def remove_words_out_of_bounds(self): for wor in self.words: if wor.x + len(wor.word) > self.scrn.width - 3: self.stat.missedWords += 1 self.hp -= 1 self.poss_vals.append(wor.y) self.words.remove(wor) if wor in self.probableWords: self.probableWords.remove(wor) empty = word.Word(' ' * len(wor.word), wor.x, wor.y) self.scrn.render_word(empty)
def parse_lines_AccentedTischendorf(self, lines): mystring = "\n".join(lines) words = mystring.split() self.end_monad += 1 index = -1 break_kind = "." for w in words: index += 1 if w[0] in "0123456789": myarr = w.split(":") mychapter = int(myarr[0]) try: myverse = int(myarr[1]) except: print "UP100: w = '%s' words[index:+30] = '%s'" % ( w, words[index:index + 30]) chapter_end = self.end_monad - 1 if self.chapter != mychapter: if self.chapter <> 0: self.parseChapter(self.chapter, chapter_end) self.chapter = mychapter self.verse = myverse verse = Verse([], self.bookname, self.booknumber) verse.chapter = mychapter verse.verse = myverse verse.first_monad = self.end_monad verse.last_monad = self.end_monad self.verses.append(verse) elif w == "{P}": break_kind = "P" elif w == "{C}": break_kind = "C" elif w == "-": pass # Remove silently. FIXME: Do something more intelligent! else: wd = word.Word(self.end_monad, variant_none) wd.surface = w if "&" in wd.surface: [wd.surface, wd.qere] = wd.surface.split("&") else: wd.qere = wd.surface wd.qere_noaccents = word.RemoveAccents( word.BETAtoOLBtranslate(wd.qere)) #wd.parsing = "CONJ" #wd.Strongs1 = 9999 wd.makeSurfacesAccentedTischendorf() wd.break_kind = break_kind break_kind = "." #print wd.surface, wd.accented_surface self.verses[-1].words.append(wd) self.verses[-1].last_monad = self.end_monad self.end_monad += 1 self.parseChapter(self.chapter, chapter_end)
def process_linear_word(self, mybreak_kind, mysurface, myqere, mytag, mystrongs, strongslemma, ANLEXlemma): w = word.Word(self.end_monad, variant_none) w.break_kind = mybreak_kind w.surface = mysurface w.qere = myqere w.accented_surface = mysurface w.parsing = mytag w.Strongs1 = mystrongs w.strongslemma = strongslemma w.ANLEXlemma = ANLEXlemma self.verses[-1].words.append(w) self.verses[-1].last_monad = self.end_monad
def test_descriptor_components(self): word_test = word.Word("man") word_test.append("i") word_test.append("i") word_test.append("i") word_test.append("am") word_test.append("am") word_test.append("am") word_test.append("a") word_test.append("a") word_test.append("sick") word_test.append("spiteful") word_test.append("an") word_test.append("unattractive") # hard coded version of the dictionary created above hard_coded = { "man": { "i": 3, "am": 3, "a": 2, # these have the same value "sick": 1, "spiteful": 1, "an": 1, "unattractive": 1, } } # these have the same value self.assertEqual(word_test.descriptor_components, hard_coded["man"]) # these no longer have the same value word_test.append("unattractive") self.assertNotEqual(word_test.descriptor_components, hard_coded["man"]) # now they both have the same value again hard_coded["man"]["unattractive"] += 1 self.assertEqual(word_test.descriptor_components, hard_coded["man"]) # these no longer have the same value hard_coded["man"]["unattractive"] += 1 self.assertNotEqual(word_test.descriptor_components, hard_coded["man"])
def __init__(self, native_word, translated_word, word_known_level=0, lower_case=True): """Initialize the object. :native_word: The word in the native language. :translated_word: The translation of the word in the new learned language. :word_known_level: The level in which the word is already known. For new words this value should remain empty. :lower_case: True if you want to save the lower case of the word, False otherwise. """ if lower_case: native_word = native_word.lower() translated_word = translated_word.lower() self.native_word = word.Word(native_word) self.translated_word = word.Word(translated_word) self.word_known_level = word_known_level self._current_stage_in_learned_level = 0
def new_game(self): ''' starts a new game by choosing random words to be displayed on scr ''' for i in range(self.wordsOnScreen): w = self.wordDictionary.pick_word() x = 1 y = random.choice(self.poss_vals) self.poss_vals.remove(y) self.words.append(word.Word(w, x, y)) self.scrn.render_title() self.game_loop()
def test_append(self): # add new component word_test = word.Word("man") self.assertEqual(len(word_test.descriptor_components), 0) # test with integer with self.assertRaises(TypeError): word_test.append(1) # test with float with self.assertRaises(TypeError): word_test.append(1.0) # test with object with self.assertRaises(TypeError): word_test.append(object) # test with dict with self.assertRaises(TypeError): word_test.append({"test": 123}) # test with list with self.assertRaises(TypeError): word_test.append([1]) # test with tuple with self.assertRaises(TypeError): word_test.append((1, 2)) # test with empty string with self.assertRaises(ValueError): word_test.append("") # increment existing component word_test.append("i") self.assertEqual(len(word_test.descriptor_components), 1) word_test.append("i") self.assertEqual(len(word_test.descriptor_components), 1) # add new component word_test.append("am") self.assertEqual(len(word_test.descriptor_components), 2) # increment existing component word_test.append("am") self.assertEqual(len(word_test.descriptor_components), 2)
def add_words(self): '''Create word object for each word in the chunk.''' words = self.label.split(' ') self.words = [] for i, w in enumerate(words): if w: w = word.Word(w, i, self.chunk_number, self.st, self.et, self.filename, self.fid, self.sid, self.cid, corpus=self.corpus, register=self.register, chunk=self) self.words.append(w)
def game_loop(self): while self.hp > 0: if (not self.paused): #push the words along new_time = datetime.datetime.now() micro_seconds_passed = (new_time - self.last_time).microseconds #move the words if enough time has passed for wor in self.words: empty = word.Word(' ' * len(wor.word), wor.x, wor.y) self.scrn.render_word(empty) if micro_seconds_passed >= (0.1) * (1000000): self.last_time = new_time for wor in self.words: wor.x += self.stat.modifier #render words for wor in self.words: if (wor in self.probableWords): self.scrn.render_word_typed(wor, self.pos_in_word) else: self.scrn.render_word(wor) #self.scrn.render_stats() self.scrn.scr.refresh() #take in input key_pressed = self.process_input() #call whatever uses keypressed self.pause(key_pressed) self.game_mode(key_pressed) if (not self.should_keep_going): break #update data self.remove_words_out_of_bounds() self.add_words() return self.stat
def add_to_database(self, entry): (itemA, itemB) = entry.split(",", 1) # max splits is one word1 = word.Word(self.columnheads[0], itemA.rstrip()) word2 = word.Word(self.columnheads[1], itemB.rstrip()) new_entry = (word1, word2) self.languagedb.add(new_entry)
def compute_mostInterestingKeywordsTable(path_source, path_result, min_weektotal=10, min_changerate=5): """ Creates a Markdown table at ``path_results`` with the most interesting keywords of the database at ``path_source``. Keywords must fulfill the following two conditions in at least one (and the same) week: 1. Keyword was mentioned at least ``min_weektotal`` in that week. 2. Keyword has a relative changerate of mentions of ``min_changerate`` compared to the previous week. Parameters ---------- path_source : str Path on local machine where article database is located. path_result : str Path on local machine where resulting Markdown table should be stored. min_weektotal : int Minimum number of total mentions in at least one week (default is 10). min_changerate : int Minimum number for the relative changerate of mentions compared to the previous week (default is 5). Returns ------- None """ articles = mng.load_articles(path_source) # Get a dict of dicts for each calendar week with word frequencies from getWordCounts wordCounts = mng.getWordCounts(articles) # List of distinct words distinctWords = mng.getDistinctWords(wordCounts) # List of lists of tuples containing weekly word frequency countsPerWeek = [] for w in distinctWords: countsPerWeek.append((w, mng.getCountPerWeek(wordCounts, w))) # Create list of word objects for each keyword words = [] for c in countsPerWeek: words.append(wpy.Word(c[0], ts_articles=timeseries.Timeseries(c[1]))) # Compute interestingness interestingWords = mng.filter_interestingness( articles, min_weektotal=min_weektotal, min_changerate=min_changerate) res = {} keywords = [] for k in interestingWords: keywords.append(k) # Delete year but keep calendar week and round numbers: res[k] = {} for el in interestingWords[k]: res[k][el[1]] = [ interestingWords[k][el][0], round(interestingWords[k][el][1], 2) ] m = matching.groupmatch(keywords, articles) writer = open(path_result, 'a', encoding="utf-8") writer.write( "|row|Keyword|week: [ total , changerate ]| computed query (advanced) | matching result (advanced) |" ) writer.write("\n|---|---|---|---|---|") for i in range(0, len(keywords)): if len(m[keywords[i]]["link"]) > 0: writer.write("\n| {}. | {} | {} | {} | {} |".format( i + 1, keywords[i], res[keywords[i]], m[keywords[i]]["query"], m[keywords[i]]["link"][1]).replace("{", "").replace("}", "")) else: writer.write("\n| {}. | {} | {} | {} | EMPTY MATCHING |".format( i + 1, keywords[i], res[keywords[i]], m[keywords[i]]["query"]).replace("{", "").replace("}", "")) writer.close()