def readAllWords(self): tmp_wds = [ ("word", "단어"), ("asd", "ㅁㄴㄹ"), ("qwer", "ㅂㅈㄷㄱ, ㅂㅈㄷㄱ"), ("zcxvzxcv", "ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ, ㅋㅌㅊㅍ" ), ("rtyu", "ㄱ쇼ㅕ, ㄱ쇼ㅕ"), ("vbcbv", "ㅠㅊ풏ㅊ"), ("vbcbvyy", "ㅠㅊ풏ㅊㅊㅊ"), ("realword", "진짜 단어"), ("bback", "빡빡이아저씨"), ("aaaa", "아아아아"), ] for w in tmp_wds: self.words.append(Word(w[0], w[1], False)) tmp_fwds = [ ("focus", "집중하다"), ("wow", "놀라운"), ("amazing", "엄청난"), ("awesome", "개쩌는"), ("verylonglonglongandlongword", "매우 길고 길고 또 길고 그리고 긴 단어를 아주 길고 길게 적는중"), ] for w in tmp_fwds: ww = Word(w[0], w[1], True) self.words.append(ww) self.focusedWords.append(ww) print("Data Loaded")
def trainTweets(type, tweet, grams): tokens = tweet.split() fooSet = set() ''' fooset is to avoid adding same word to dictionary twice. I am sorry that I couldn't come up with a better name ''' for gram in grams: for i in range(len(tokens)): if (i + gram > len(tokens)): break #break the code, if you don't want to break the code. word = " ".join(tokens[i:i + gram]) if (word in fooSet): continue fooSet.add(word) ''' The 2 lines below are important. If I didn't write it, it wouldn't get bigger-grams or unigrams. Why not both? ''' if (gram == 1): count = tokens.count(word) else: count = tweet.count(word) ''' I really like using try instead of "if bla bla in bla bla". It is much faster this way in dictionaries. Actually, the difference was much bigger in python2, but seems like they tried fixing it in python3 But I like doing things fast, so I didn't check if a word is in dictionary. I just put it. ''' try: wordDict[word].addWord(count, int(type)) except KeyError: inst = Word(word) inst.addWord(count, int(type)) wordDict[word] = inst
def insert(self, value): """ --------------------------------------------------------- Inserts value into the hashset, allows only one copy of value. Calls _rehash if the hashset _LOAD_FACTOR is exceeded. Use: inserted = hs.insert( value ) ------------------------------------------------------- Preconditions: value - a comparable data element (?) Postconditions: returns inserted - True if value is inserted, False otherwise. ------------------------------------------------------- """ hash_slot = self._find_slot(value) val = Word("no") for i in hash_slot: if i == val: val = Word("yes") if hash_slot.is_empty() and val.word == "no": hash_slot.insert(value) inserted = True self._count += 1 else: inserted = False if self._count > self._total: self._rehash() return inserted
def integrate(self, concept: Concept) -> Concept: integrated = self.get_concept(concept) if integrated is None: ip = [] # type: list[Concept] for p in concept.parents: ip.append(self.integrate(p)) integrated = Concept(concept.name, concept.relation, ip, concept.probability) integrated.store = self integrated.register_with_parents() self.add_concept(integrated) if integrated.relation == Relation.Word: word = integrated.name if word not in self._words: w = Word(word) w.add_meaning(integrated) self._words[word] = w elif integrated.relation == Relation.Implication: integrated.propagate_probability_from_parent( integrated.parents[0]) else: integrated.merge_probability(concept.probability) return integrated
def get_senses(sense_file, words, t_to_use): senses = {} if os.path.isfile(sense_file): senses = pickle.load(open(sense_file, 'rb')) else: get_sense = Word('', '') senses = {w: get_sense.make_sense_list(w, 'all') for w in words} pickle.dump(senses, open(sense_file, 'wb')) num_senses, first = {}, {} for w in words: if not w in senses or len(senses[w]) == 0: print('No HTE entry:', w) continue senses_processed = [hack_sense_objs(s) for s in senses[w]] senses_processed = [ sense for pos, sense in senses_processed if pos == NOUN ] processed_times = [] for i, s in enumerate(senses_processed): for j, t in enumerate(s['times']): if not 'ending_time' in t: t['ending_time'] = t['starting_time'] processed_time = (t['starting_time'], t['ending_time']) processed_times.append(processed_time) if len(processed_times) == 0: continue first[w] = sorted([t[0] for t in processed_times])[0] num_senses[w] = len([ t for t in processed_times if t[0] <= t_to_use and t[-1] >= t_to_use ]) return (num_senses, first)
def spellMed(word): w = Word(word) candidates = isMedicine([word]) if len(candidates) != 0: for x in candidates: x = x.encode('utf-8') return max(candidates, key=MED_COUNTS.get).encode('utf-8') candidates = isMedicine(w.typos()) if len(candidates) is not 0: for x in candidates: x = x.encode('utf-8') return max(candidates, key=MED_COUNTS.get).encode('utf-8') candidates = isMedicine(w.double_typos()) if len(candidates) is not 0: for x in candidates: x = x.encode('utf-8') return max(candidates, key=MED_COUNTS.get).encode('utf-8') # candidates = (isMedicine([word]) or isMedicine(w.typos()) or isMedicine(w.double_typos())) if len(candidates) is 0: return -1
def lookup(self, word, return_distances=False): results = set() candidates = set([(word, 0)]) # a set of tuples (candidate, candidate_distance) for delete in Word.deletes(word, self.edit_distance_max): delete_distance = len(word) - len(delete) candidates.update([(delete, delete_distance)]) candidates = sorted(candidates, key=lambda x: x[1]) # sort by increasing distance while candidates: candidate, candidate_distance = candidates.pop() # the distance of the candidate from `word` candidate_count = self._terms[candidate] # the (possibly 0) no. of occurrences for candidate if candidate_count > 0: # there is an entry for this item in the dictionary # candidate is an original word! results.update([(candidate, candidate_distance)]) suggestions = self._suggestions[candidate] # the (possibly not existing) suggestions for candidate for suggestion in suggestions: if not suggestion in [r[0] for r in results]: # the sugg. exists and hasn't been found yet if suggestion == word: # suggestion _is_ the word we are looking for real_distance = 0 elif candidate_distance == 0: # candidate _is_ the word we are looking up for real_distance = len(suggestion) - len(candidate) # suggestion_distance else: # candidate is a delete edit of the word we are looking up for real_distance = Word.damerau_levenshtein_distance(word, suggestion) if real_distance <= self.edit_distance_max: results.update([(suggestion, real_distance)]) # sort the results first by increasing distance, then by decreasing frequency results = sorted(list(results), key=lambda r: (r[1], -self._terms[r[0]])) if self.best_suggestions_only and len(results) > 1: # only take the original word (if present) and the suggestions with minimum distance from `word` min_index = 0 if results[0][1] != 0 else 1 # possibly exclude `word` from the minimum distance best_dist = min(results[min_index:], key=lambda r: r[1])[1] # results[0] may be the original word results = [r for r in results if r[1] <= best_dist] if not return_distances: results = [r[0] for r in results] # pop out the distances and keep only the suggestions return results
def gameMain(): word = Word('words.txt') guess = Guess(word.randFromDB()) hangman = Hangman() while hangman.remainingLives > 0: display = hangman.currentShape() print(display) display = guess.displayCurrent() print('Current: ' + display) display = guess.displayGuessed() print('Already Used: ' + display) guessedChar = input('Select a letter: ') if len(guessedChar) != 1: print('One character at a time!') continue if guessedChar in guess.guessedChars: print('You already guessed \"' + guessedChar + '\"') continue success = guess.guess(guessedChar) if success == False: hangman.decreaseLife() if guess.finished() == True: print('**** ' + guess.displayCurrent() + ' ****') print('Success') break else: print(hangman.currentShape()) print('word [' + guess.secretWord + ']') print('guess [' + guess.displayCurrent() + ']') print('Fail')
def make_optimal_paths(transducer_input): transducer = pickle.loads(pickle.dumps(transducer_input, -1)) alphabet = transducer.get_alphabet() new_arcs = list() for segment in alphabet: word = Word(segment.get_symbol()) word_transducer = word.get_transducer() #print(word_transducer.dot_representation()) intersected_machine = Transducer.intersection(word_transducer, transducer) states = transducer.get_states() for state1, state2 in itertools.product(states, states): initial_state = word_transducer.initial_state & state1 final_state = word_transducer.get_a_final_state() & state2 temp_transducer = pickle.loads(pickle.dumps(intersected_machine, -1)) temp_transducer.initial_state = initial_state temp_transducer.set_final_state(final_state) temp_transducer.clear_dead_states() if final_state in temp_transducer.get_final_states(): # otherwise no path. try: temp_transducer = remove_suboptimal_paths(temp_transducer) range = temp_transducer.get_range() arc = Arc(state1, segment, range, _get_path_cost(temp_transducer), state2) new_arcs.append(arc) except KeyError: pass #print("****") #print(temp_transducer.dot_representation()) transducer.set_arcs(new_arcs) return transducer
def setUp(self): self.d1 = DataManager() self.wd1 = Word('software', '소프트웨어', (True, self.d1)) self.wd2 = Word('project', '프로젝트', (True, self.d1)) self.d1.words = [self.wd1, self.wd2]
def valid_close_word(self, file_path): df_close_word = pd.read_csv(file_path) w = Word() for row in df_close_word.itertuples(): words = w.preprocess(row.word) if len(words) > 0: assert words[0] == row.replace_word
def gameMain(): word = Word('words.txt') guess = Guess(word.randFromDB()) finished = False hangman = Hangman() maxTries = hangman.getLife() while guess.numTries < maxTries: display = hangman.get(maxTries - guess.numTries) print(display) guess.display() guessedChar = input('Select a letter: ') if len(guessedChar) != 1: print('One character at a time!') continue if guessedChar in guess.guessedChars: print('You already guessed \"' + guessedChar + '\"') continue finished = guess.guess(guessedChar) if finished == True: break if finished == True: print('Success') else: print(hangman.get(0)) print('word [' + guess.secretWord + ']') print('guess [' + guess.currentWord + ']') print('Fail')
def load(self, filename): """ Loads input file to self._data. :type filename: str """ # todo: should be easily configurable logger.debug("Trying to load file '%s'." % filename) with open(filename, 'r') as csvfile: reader = csv.reader(csvfile, delimiter='\t') # must match Anki fieldnames. If you want to ignore a field, just set it to "" field_names = ["Expression", "Kana", "Meaning", None] for row in reader: element = Word() element.line = '\t'.join(row) fields = [c.decode('utf8').strip() for c in row] if not len(fields) == len(field_names): raise ( ValueError, "The number of supplied field_names (%d) doesn't match the number of " "fields in the file %s (%d)." % (len(field_names), filename, len(fields))) for i in range(len(fields)): element[field_names[i]] = fields[i] if self.scan_for_duplicates: element.check_duplicate() self._data.append(element)
def analysis_one_segment(segment): detected_phrases = [] cut_words = cut(segment) first_seg_word, first_seg_pos = next(cut_words) logging.debug("{} {}".format(first_seg_word, first_seg_pos)) new_phrase_segments = Segment(init=Word(first_seg_word, first_seg_pos)) new_cut_words = [] for word, pos in cut_words: logging.debug("{} {}".format(word, pos)) _phrase, consistent = could_concatenate(new_phrase_segments, Word(word, pos)) if _phrase: new_phrase_segments.append(Word(word, pos, consistent)) else: new_cut_words.append(new_phrase_segments.merge()) detected_phrases = add_detected_new_phrase(new_phrase_segments, detected_phrases) new_phrase_segments = Segment(init=Word(word, pos)) new_cut_words.append(new_phrase_segments.merge()) if len(new_phrase_segments) > 1: detected_phrases = add_detected_new_phrase(new_phrase_segments, detected_phrases) return detected_phrases, new_cut_words
def gameMain(): word = Word('words.txt') guess = Guess(word.randFromDB()) finished = False hangman = Hangman() maxTries = hangman.getLife() while guess.numTries < maxTries: display = hangman.get(maxTries - guess.numTries) print(display) guess.display() guessedChar = input('Select a letter: ') finished = guess.guess(guessedChar) if finished: break if finished: print('Success') else: print(hangman.get(0)) print('word [' + guess.word + ']') print("Guess:", end=" ") for i in range(len(guess.current)): print(guess.current[i], end=" ") print() print('Fail')
class Display: def __init__(self, word, images): self.hangman = Hangman(images) self.word = Word(word) self.letter_buttons = LetterButtons() def is_letter_not_in_word(self, ltr): if ltr not in self.word.get_word(): self.hangman.set_hangman_status() def mouse_click(self, mouse_x, mouse_y): letters = self.letter_buttons.get_letters() for letter in letters: if letter.get_visible(): distance = math.sqrt((letter.get_x() - mouse_x)**2 + (letter.get_y() - mouse_y)**2) if distance < RADIUS: self.letter_buttons.if_inside_button(letter) self.is_letter_not_in_word(letter.get_letter()) def draw_window(self): WINDOW.fill(WHITE) self.word.draw(self.letter_buttons.get_guessed()) self.letter_buttons.draw() self.hangman.draw() pygame.display.update() def is_won(self): for letter in self.word.get_word(): if letter not in self.letter_buttons.get_guessed(): return False return True def is_lost(self): return self.hangman.get_hangman_status() == 6
def gameMain(): word = Word('words.txt') guess = Guess(word.randFromDB()) hangman = Hangman() UI = TextUI(guess, hangman) while hangman.getLife() > 0: guessedChar = input("Select a letter: ") # 잘못된 입력에 대한 처리 if len(guessedChar) is not 1: UI.errorPrint(""" ================================= =====Input just one character==== =================================""") continue if guessedChar in guess.guessedList: UI.errorPrint(""" ================================= =====Input another character===== =================================""") continue # Guess결과에 따른 처리 result = guess.guess(guessedChar) if result is 1: break if result is 0: hangman.minusLife() UI.display() UI.display() UI.endOfGame(hangman.getLife())
def get_basic_latin_word(self): """ >>> a = Algorithm('abc def c') >>> a.get_basic_latin_word() abc >>> a.get_basic_latin_word() def >>> a.get_basic_latin_word() c >>> a = Algorithm('abc 我 c') >>> a.get_basic_latin_word() abc >>> a.get_basic_latin_word() """ basicLatinWord = [] while (self.pos < self.length and is_basic_latin(self.text[self.pos])): current_char = self.text[self.pos] self.pos += 1 if current_char.isspace(): if len(basicLatinWord): return Word(u''.join(basicLatinWord), BASICLATIN_WORD) basicLatinWord.append(current_char) if len(basicLatinWord): return Word(u''.join(basicLatinWord), BASICLATIN_WORD) else: return None
def gameMain(): word = Word('words.txt') guess = Guess(word.randFromDB()) hangman = Hangman() maxTries = hangman.getLife() while (maxTries - guess.numTries): display = hangman.get(maxTries - guess.numTries) print(display) guess.display() guessedChar = input("Select a letter:") if len(guessedChar) != 1: print("One character at a time!") continue if guessedChar in guess.guessedChars: print("You already guessed \' %c \' " % (guessedChar)) continue if guess.guess(guessedChar) == True: print("Success!") break if guess.guess(guessedChar) == False: print(hangman.get(0)) print("word [ %s ]" % (guess.secretWord)) print("guess [ %s ]" % (guess.currentStatus)) print('Fail')
def test_clean_word(self): test_cases_replace = ( ('慶応大学', '大学'), ('朝日新聞', '新聞'), ('JR東日本', 'JR'), ('高校受験', '受験'), ('iPhone7', 'iPhone'), ) test_cases_exclude = ( '1日目', '2日目', '三日目', '1年生', '2013年', '1984年生まれ', '6月', '4人', '鹿児島出身', '鹿児島県', '58キロ', 'ごめんなさい。', '要注意', '1万円', '0円', '2期生', ) w = Word() for word, rep_word in test_cases_replace: assert w.preprocess(word)[0] == rep_word for word in test_cases_exclude: assert len(w.preprocess(word)) == 0
def count_words(self, is_canonical, list_test_pos, list_training_pos, list_test_neg, list_training_neg): if is_canonical: find_words = NaifBayes.find_words_tagged else: find_words = NaifBayes.find_words_untagged dict_words = dict() for file_name in list_training_pos: with codecs.open(file_name, "r", "utf-8") as file: for line in file.readlines(): for word in find_words(line): self.nbr_pos += 1 if word in dict_words.keys(): dict_words[word].incr_pos() else: dict_words[word] = Word(word) for file_name in list_training_neg: with codecs.open(file_name, "r", "utf-8") as file: for line in file.readlines(): for word in find_words(line): self.nbr_neg += 1 if word in dict_words.keys(): dict_words[word].incr_neg() else: dict_words[word] = Word(word) return dict_words
def gameMain(): word = Word('words.txt') guess = Guess(word.randFromDB()) # 랜덤하게 단어선택 finished = False hangman = Hangman() maxTries = hangman.getLife() # 목숨의 개수를 초기화 시킴 while guess.numTries < maxTries: # 목숨이 몇개 남았는지 체크해줌 display = hangman.get(maxTries - guess.numTries) print(display) guess.display() guessedChar = input('Select a letter: ') if len(guessedChar) != 1: # 한글자가 아니면 print('One character at a time!') continue if guessedChar in guess.guessedChars: # 이미 사용한 문자라면 print('You already guessed \"' + guessedChar + '\"') continue finished = guess.guess(guessedChar) if finished == True: break if finished == True: print('Success') print('word : ' + guess.secretWord) else: print(hangman.get(0)) print('word [' + guess.secretWord + ']') print('guess [' + guess.currentStatus + ']') print('Fail')
def gameMain(): word = Word('words.txt') guess = Guess(word.randFromDB()) hangman = Hangman() while hangman.remainingLives > 0: display = hangman.currentShape() print(display) display = guess.displayCurrent() print('Current: ' + display) display = guess.displayGuessed() print('Already Used: ' + display) success = guess.guess(getChar()) if success == 1: continue elif success == 2: continue elif success == False: hangman.decreaseLife() if guess.finished(): break if guess.finished() == True: print('**** ' + guess.displayCurrent() + ' ****') print('Success') else: print(hangman.currentShape()) print('word [' + guess.secretWord + ']') print('guess [' + guess.displayCurrent() + ']') print('Fail')
def __init__(self, word, guess_limit): self.guesses_remaining = guess_limit self.word = Word(word=word) self.settings = settings self.game_won = False self.game_in_progress = True self.previous_guesses = set()
def load(self): """ Read the lesson's path, loading and creating words """ if self.path is None: return False if (os.path.exists(os.path.join(self.path, self.word_list_file))): #new loading method with a words.list ifile = open(os.path.join(self.path, self.word_list_file), "r") for line in ifile: s = line.split(";") word_name = s[1].strip().decode('utf-8') full_file_name = os.path.join(self.path, s[0].decode('utf-8')) print "Filename: " + full_file_name print "Word: " + word_name self.words.append(Word(word_name, full_file_name)) else: #legacy loading method, kept for backward compatibility for file in os.listdir(self.path): full_file_name = os.path.join(self.path, file) if (os.path.isfile(full_file_name) and os.path.splitext(file)[1] == ".wav"): word_name = os.path.splitext(file)[0] self.words.append(Word(word_name, full_file_name)) return True
def __init__(self, data): self._data = data self._score = Score() self._category = None self._iter_category = None self._word = Word()
def __init__(self): self.word = Word('words.txt') self.secretWord = self.word.randFromDB() self.numTries = 0 # 7이되면 gameOver self.hangmanList = hangmanList self.currentStatus = "_"*len(self.secretWord) self.guessedChars = ""
def main(argv): #Parse out the commandline arguments parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, description=DESCRIPTION, epilog=EPILOG ) parser.add_argument("-t", "--test", nargs=1, default=["something"]) parser.add_argument("-d", "--debug", action="store_true", help="Enable Debug Messages") args = parser.parse_args() print "Running Script: %s" % NAME if args.debug: print "test: %s" % str(args.test[0]) my_string = "hello" print "Orginal String: %s" % my_string w = Word(my_string) print "Output: %s" % w.reverse()
def addWordEncounter(self, word): if word not in self.stochasticGrammar.wordData: newWord = Word(word, self.dataGranularity) newWord.initializeValues(self.initialValue) self.stochasticGrammar.wordData[word] = newWord return True return False
def test_filter_rules(): rule1 = { 'applies': { 'positive': ['nasal'] }, 'conditions': { 'negative': ['nasal'], 'positive': ['syllabic'] }, 'name': 'nasalization' } rule2 = { 'applies': { 'positive': ['tonal'] }, 'conditions': { 'positive': ['syllabic'] }, 'name': 'valid' } word1 = Word( [Segment(['consonantal'], ['tonal']), Segment(['sonorant'], ['high'])]) word2 = Word( [Segment(['syllabic', 'low'], []), Segment(['high'], ['sonorant'])]) assert filter_rules([word1, word2], [rule1, rule2]) == [rule2]
class WordTest(unittest.TestCase): def setUp(self): self.word = Word( os.path.normpath( os.path.join(os.path.dirname(__file__), '../words.txt'))) def test_get_word(self): assert self.word is not None def test_words_count(self): self.assertEqual(len(self.word.words), 19184) def test_get_guessed_characters(self): self.assertIsInstance(self.word.get_guessed_characters(), set) def test_guess(self): for char in string.ascii_lowercase: result = self.word.guess(char) if result == 1: self.assertEqual(self.word.current_status, self.word.get_word()) elif result is not None: self.assertEqual(result, -1) self.assertEqual(len(self.word.get_guessed_characters()), 26)
def __init__(self, word, score, remove): self.word = Word(word) self.question = self.word.define().option().output() self.symbols = ["A", "B", "C", "D", "E"] self.view = View() self.isCorrect = False self.score = score self.remove = remove
def __init__(self, sentence_block): self._tokens = [] for i in range(len(sentence_block)): word = Word(sentence_block[i]) if not word.get_lemma() or word.get_lemma() == '_': if i != 0 and self._tokens[i - 1].get_form() == '_': word.set_lemma(self._tokens[i - 1].get_lemma()) self._tokens.append(word)
def gen_word(element): word = Word( word=element["word"], coarse=CoarsePOS(element["coarse"]).name, fine=FinePOS(element["fine"]).name ) if has_children: word.children = [] return word
def test_apply_rule_deletion(): rule = {'applies': {'positive': ['deletion']}, 'conditions': {'positive': ['syllabic']}, 'name': 'deletion'} word = Word([Segment(['nasal'], ['syllabic']), Segment(['syllabic'], ['nasal'])]) target_word = Word([Segment(['nasal'], ['syllabic'])]) assert word.apply_rule(rule) == target_word
def getWord(self, word): if isinstance(word, basestring): w = self.collection.find_one({"word":word}) if w == None: return w wn = Word(word) wn.__dict__ = w return wn else: print "Error Querying Database with ", word return None
def find_best_word_outter_function(self, position_seed, board, rotated=False): from word import Word square = board.get_square(position_seed) tile = square.tile if tile is None: raise ValueError("Can't play off an empty square") for i in xrange(len(self.tile_set)): potential_word = Word(rotated=rotated) potential_word.extend_from_anchor(position_seed, i, board) self.find_best_word_inner_function(potential_word, self.tile_set, board) return
class Player(object): ''' classdocs ''' def __init__(self, name): ''' Constructor ''' self._name = name self._opponent = None self._word = None self._misses = [] def set_opponent(self,opponent): self._opponent = opponent def choose_word(self): done = False while (not done): word = input(self._name + ", enter a word : ") if (len(word) > 0): self._word = Word(word) done = True else: print("A word must have letters!") # scroll our word off the screen! for i in range(1, 30) : print("") def guess_letter(self, letter): return self._word.guess(letter) def guessed_letters(self): return self._word.guessed_for_display() def all_guessed(self): return self._word.all_guessed() def guess(self): guessed = self._opponent.guessed_letters() print("----------------------------------------------------") print("Word : " + guessed) print("Incorrect guesses : ", end='') print(self._misses) done = False while (not done): letter = input(self._name + ", enter a letter : ") done = len(letter) == 1 guessed = self._opponent.guess_letter(letter) if not guessed : self._misses.append(letter) return guessed
def test_index_applicable(): syllable_rule = {'before': {'negative': ['syllabic']}, 'conditions': {'positive': ['syllabic']}} word = Word([Segment(['consonantal', 'voice', 'labial', 'long'], ['syllabic', 'stress']), Segment(['syllabic', 'sonorant', 'continuant', 'approximant', 'voice', 'labial', 'round', 'dorsal', 'low', 'back'], ['stress', 'long']), Segment(['consonantal', 'coronal', 'anterior'], ['syllabic', 'stress'])]) assert word.index_applicable(1, syllable_rule)
def parseVerseTextLine(self, line): # Must also add it to current book if line == "": pass else: words = line.split(" ") words_list = [] for w in words: w2 = Word(0,variant_none) w2.surface = w words_list.append(w2) self.verse.words = words_list self.books[-1].verses.append(self.verse)
def load_words(self,files): for file_name in files: f = open(file_name) data = yaml.load(f) f.close() yamlhelp.merge(data,self.words) for word_id in self.words: word = self.words[word_id] word = Word(word_id, **word) self.words[word_id] = word word.color = yamlhelp.load_color(word.color)
def create_words(the_reflection): """ Create a list of words from reflection """ word_list = list() for word in word_tokenize(the_reflection.text_blob): if word not in string.punctuation: try: word = Word(id=word.lower()).save() except UniqueProperty: # if a word has already been stored at word, plot a relationship word = Word.nodes.get(id=word.lower()) word.reflections.connect(the_reflection) word_list.append(word) return word_list
def test_index_applicable_boundaries(): start_boundary = {'conditions': {'positive': ['syllabic']}, 'first': True} word = Word([Segment(['syllabic'], ['nasal']), Segment(['syllabic'], ['nasal'])]) assert word.index_applicable(0, start_boundary) assert not word.index_applicable(1, start_boundary) end_boundary = {'conditions': {'positive': ['syllabic']}, 'last': True} assert not word.index_applicable(0, end_boundary) assert word.index_applicable(1, end_boundary)
def create_entity(self, word, dictionary, transcription, translation): entity = Word() entity.word = word entity.transcription = transcription entity.translation = translation entity.dictionary = dictionary entity.usages = 1 entity.put()
def loop(self): #Main loop while (True): print(str(self.current_leader) + ", pick a word!") self.current_word = Word(input()) self.guesses = MAX_GUESSES print("\n"*20) while (True): #Loop through players which are non-leaders for player in [player for player in self.players if player != self.current_leader]: if (self.guesses > 0): self.current_word.print_word() player.turn(self.current_word) else: print("No guesses left, " + str(self.current_leader) + " +3 points!") self.current_leader.score += 3 break if (self.guessed): break else: continue; #Executed if the loop finished without breaking break #This never gets called until the above "continue" isn't called (which happens when break is called within the inner loop) print("Keep playing? y/n") choice = get_choice(("y","n")) if (choice == "n"): self.end_game() break elif (choice == "y"): self.clear_vars() continue
def test_damerau_levenshtein_distance(self): self.assertEqual(Word.damerau_levenshtein_distance('ciao', 'ciao'), 0) self.assertEqual(Word.damerau_levenshtein_distance('ciao', 'cia'), 1) self.assertEqual(Word.damerau_levenshtein_distance('cia', 'ciao'), 1) self.assertEqual(Word.damerau_levenshtein_distance('ciao', 'co'), 2) self.assertEqual(Word.damerau_levenshtein_distance('ciao', 'c'), 3) self.assertEqual(Word.damerau_levenshtein_distance('ciao', ''), 4) self.assertEqual(Word.damerau_levenshtein_distance('simone', 'siomne'), 1) self.assertEqual(Word.damerau_levenshtein_distance('simone', 'siomen'), 2)
def wordTest(inputPath, distributionGranularity, wordGranularity, tempFile): salaryDictionary = SalaryDistribution(inputPath, distributionGranularity) salaryDictionary.parse() test = Word("Potato", wordGranularity) test.initializeValues(salaryDictionary) with open(tempFile, 'w') as dataDump: dataDump.write(test.configure()) with open(tempFile, 'r') as reReading: for line in reReading: try: check=Word.fromFileString(line) print check.configure() check.increment(8200, 3, 0.75) print ("~~~~~~~") print check.configure() except NameError: continue
def get_word(self): sections = self.get_sections() false = False from_section_level = -1 word = Word(self.title) for section in sections: if section[1] == self.orig_section_name: from_section_level = section[0] continue elif from_section_level >= 0 and section[0] <= from_section_level: from_section_level = -1 if from_section_level >= 0: if self.is_pronunce_section(section[1]): ipa, audio = self._parse_pronunciation(section[2]) word.set_pronunciation(ipa) word.set_audio(audio) elif self.is_partos_section(section[1]): try: meanings = self._parse_part_of_speech(section[2]) except: print self.title, "!!!!!!", section[2] raise en_part_of_speach = self.get_en_part_of_speech(section[1]) word.set_part_of_speech(en_part_of_speach, meanings) return word
def write_sonnet(cls): cls.my_sonnet = [] for i in range(cls.desired_lines): line = [] following_word = Word("@") syllables = 0 if (cls.rhyme_lines[i] >= 0): following_word = cls.word_chain.get_rhyming_word(cls.my_sonnet[cls.rhyme_lines[i]][-1], cls.rhyme_level) line.insert(0, following_word.get_word()) syllables += following_word.count_syllables() while syllables < cls.desired_length: next_word = cls.word_chain.get_random_leader(following_word.get_word()) for k in range(1, 5): if next_word.get_word() != "@": break next_word = cls.word_chain.get_random_leader(following_word.get_word()) if next_word.get_word() == "@": break line.insert(0, next_word.get_word()) following_word = next_word following_word.get_word_stress() syllables += next_word.count_syllables() cls.print_progress(i+1, syllables) cls.my_sonnet.append(line)
def startElement(self, name, attrs): if name == "Image": self.image_name = str(attrs['name']) if name == "TextLine": self.cur_line = [] if name == "Word": word = Word() word.top = int(attrs['top']) word.bottom = int(attrs['bottom']) word.left = int(attrs['left']) word.right = int(attrs['right']) word.text = unicode(attrs['text']) word.shear = int(attrs['shear']) self.cur_line.append(word) if name == "Character": char = Character() if 'top' in attrs: char.top = int(attrs['top']) else: char.top = self.cur_line[-1].top if 'bottom' in attrs: char.bottom = int(attrs['bottom']) else: char.bottom = self.cur_line[-1].bottom char.left = int(attrs['left']) char.right = int(attrs['right']) char.text = unicode(attrs['text']) if 'shear' in attrs: char.shear = int(attrs['shear']) else: char.shear = self.cur_line[-1].shear self.cur_line[-1].characters.append(char)
def parse(self): with open(self.wordPath, 'r') as wordSource: for line in wordSource: currentWord = Word.fromFileString(line) self.wordData[currentWord.text] = currentWord with open(self.wordPairPath, 'r') as wordPairSource: for line in wordPairSource: currentWordPair = WordPair.fromFileString(line) self.wordPairData[currentWordPair.text] = currentWordPair
def choose_word(self): done = False while (not done): word = input(self._name + ", enter a word : ") if (len(word) > 0): self._word = Word(word) done = True else: print("A word must have letters!") # scroll our word off the screen! for i in range(1, 30) : print("")
def convert_spacy_format(text): parsed_text = [] # instantiate Spacy's parser parser = English() # parse text via Spacy's parser doc = parser(unicode(text, "utf-8")) for sent in doc.sents: s = Sentence() s.string = str(sent) word_index = 0 for token in sent: # problem: sometimes, spacy interprets a new line in a text-file wrongly and provides an empty token. # solved: by the following condition if len(token.orth_) > 1: # Spacy's tags for each word in the sentence are stored in a new Word-object w = Word() w.string = token.orth_ w.lemma = token.lemma_ w.index = word_index # less verbose tags are provided by "token.pos_" w.tag = token.tag_ w.entity = token.ent_type_ word_index += 1 # each word is appended to a Sentence-object s.words.append(w) # each Sentence-object is appended to an array parsed_text.append(s) return parsed_text
def post(self, *args): body = json.loads(self.request.body) word = urllib.unquote(body["word"].encode("ascii")).decode("utf8") translation = urllib.unquote(body["translation"].encode("ascii")).decode("utf8") transcription = urllib.unquote(body["transcription"].encode("ascii")).decode("utf8") dictionary = urllib.unquote(body["dictionary"].encode("ascii")).decode("utf8") entity = Word.gql("where word=:1", word).get() if entity is None: db.run_in_transaction(self.create_entity, word, dictionary, transcription, translation) else: db.run_in_transaction(self.update_usages, entity, translation)
def __init__(self, trainingDataPath, salaryProbabilityPath, distributionGranularity, dataGranularity, grammar, dropOffWidth = 0, stepDown = 0, increment = 5.0): self.salaryData = SalaryDistribution(salaryProbabilityPath, distributionGranularity) self.salaryData.parse() self.trainingDataPath = trainingDataPath self.dataGranularity = dataGranularity self.stochasticGrammar = grammar #print self.stochasticGrammar.wordData['surrey'].values[8000] #print self.stochasticGrammar.wordData['limited'].values[8000] #print self.stochasticGrammar.wordData['experience'].values[8000] #self.stochasticGrammar.wordData['surrey'].increment(8250, 0, 0) #self.stochasticGrammar.wordData['experience'].increment(8250, 0, 0) #print self.stochasticGrammar.wordData['surrey'].values[8000] #print self.stochasticGrammar.wordData['limited'].values[8000] #print self.stochasticGrammar.wordData['experience'].values[8000] self.dropOffWidth = dropOffWidth self.stepDown = stepDown self.increment = increment dummy = Word("", self.dataGranularity) dummy.initializeNewValues(self.salaryData) self.initialValue = dummy.values
def wordTest(inputPath, distributionGranularity, wordGranularity, tempFile): salaryDictionary = SalaryDistribution(inputPath, distributionGranularity) salaryDictionary.parse() test1 = Word("Potato", wordGranularity) test1.initializeNewValues(salaryDictionary) test2 = Word("Celery", wordGranularity) test2.initializeNewValues(salaryDictionary) with open(tempFile, 'w') as dataDump: dataDump.write(test1.configure()) dataDump.write(test2.configure()) with open(tempFile, 'r') as reReading: wordList = [] for line in reReading: try: wordList.append(Word.fromFileString(line)) except NameError: continue print sum(wordList[0].values.values()) print sum(wordList[1].values.values()) wordList[0].increment(8250,increment = 147.25) print sum(wordList[0].values.values()) print sum(wordList[1].values.values())
def test_deletes(self): self.assertSetEqual(Word.deletes('ciao', 3), set(['iao', 'cio', 'cao', 'cia', 'co', 'ca', 'ci', 'ao', 'io', 'ia', 'c', 'i', 'a', 'o'])) self.assertSetEqual(Word.deletes('ciao', 2), set(['iao', 'cio', 'cao', 'cia', 'co', 'ca', 'ci', 'ao', 'io', 'ia'])) self.assertSetEqual(Word.deletes('ciao', 1), set(['iao', 'cio', 'cao', 'cia'])) self.assertSetEqual(Word.deletes('ciao', 0), set()) self.assertSetEqual(Word.deletes('aaa', 30), set(['aa', 'a'])) self.assertSetEqual(Word.deletes('bbb', 3), set(['bb', 'b'])) self.assertSetEqual(Word.deletes('woho', 1), set(['woh', 'oho', 'who', 'woo']))
def populate_db(): print "Filling database with word/reading data from JMdict..." if not os.path.exists(JMDICT_PATH): print "No jmdict database found at %s" % JMDICT_PATH print "Cannot continue without it." return #get connection to jmdict database jm_conn = sqlite3.connect(JMDICT_PATH) jm_conn.row_factory = sqlite3.Row start = time.time() s = ''' SELECT r_ele.reb AS r_ele_reb, r_ele.re_nokanji AS r_ele_re_nokanji, re_restr.keb AS re_restr_keb, k_ele.keb AS k_ele_keb FROM r_ele LEFT OUTER JOIN re_restr ON re_restr.r_ele_id=r_ele.id LEFT OUTER JOIN k_ele ON k_ele.entry_ent_seq=r_ele.entry_ent_seq ''' results = jm_conn.execute(s) for r in results: #some words have no kanji elements if r['k_ele_keb'] is None: continue #if this entry has a restricted reb, only apply it to the #corresponding keb if r['re_restr_keb'] is not None: if r['re_restr_keb'] == r['k_ele_keb']: #print r['k_ele_keb'], r['r_ele_reb'] word = Word(conn, r['k_ele_keb'], r['r_ele_reb']) word.save() else: #otherwise, all rebs apply to all kebs in this entry #but some readings don't use kanji, so no related kebs if r['r_ele_re_nokanji'] is not None: #print r['k_ele_keb'], r['r_ele_reb'] word = Word(conn, r['k_ele_keb'], r['r_ele_reb']) word.save() conn.commit() print 'Filling database with word/reading data took '\ '%s seconds' % (time.time() - start)
def run(self): print "\nGetting word from Internet..." self.word=Word() while(self.correct==False and self.failures < 6): self.printState() letter=raw_input("\nWhich letter do you choose, "+str(self.playersList[self.playerTurn]['name'])+"?\n").upper() if self.word.matchLetter(letter): print "The word contains the letter '"+letter+"'\n" else: print "Oh, the word doesn't contain the letter '"+letter+"'\n" self.failures=self.failures+1 self.usedLetters.add(letter) self.correct=self.word.matchWord() if self.correct != True: self.playerTurn=(self.playerTurn+1)%len(self.playersList) self.printState() if self.correct == True: print "Congratulations! "+str(self.playersList[self.playerTurn]['name'])+" have guessed the word! :)" else: print "Oh no! You haven't guessed the word :(\nThe word was "+self.word.getWord()+"\n"
def convert_pattern_format(text): """ Text is parsed through pattern's parsing function into a standardized format. """ parsed_text = [] # parse text via Pattern's parser pattern_parsed_text = Text(parse(text, relations=True, lemmata=True)) for sentence in pattern_parsed_text: s = Sentence() s.string = remove_blanks(sentence.string) for word in sentence: # Patterns tags for each word in the sentence are stored in a new Word-object w = Word() w.string = word.string w.lemma = word.lemma w.index = word.index w.tag = word.type w.entity = "" # each word is appended to a Sentence-object s.words.append(w) # each Sentence-object is appended to an array parsed_text.append(s) return parsed_text