def setUp(self): dict = Dictionary() state = Follower() self.node = Node(0, state, [], dict, []) dict2 = Dictionary() state2 = Candidate() self.node2 = Node(1, state2, [], dict2, [self.node]) self.node2.neighbors.append(self.node2)
def get_postag_data(config, train_path, dev_path, vocab_path=None, label_path=None): use_se_marker = config.use_se_marker raw_train_sents = get_sentences(train_path, use_se_marker) raw_dev_sents = get_sentences(dev_path, use_se_marker) word_to_embeddings = get_pretrained_embeddings( WORD_EMBEDDINGS[config.word_embedding]) # Prepare word dictionary. word_dict = Dictionary(unknown_token=UNKNOWN_TOKEN) if use_se_marker: word_dict.add_all([START_MARKER, END_MARKER]) if vocab_path != None: with open(vocab_path, 'r') as f_vocab: for line in f_vocab: word_dict.add(line.strip()) f_vocab.close() word_dict.accept_new = False print 'Load {} words. Dictionary freezed.'.format(word_dict.size()) # Parpare label dictionary. label_dict = Dictionary() if label_path != None: with open(label_path, 'r') as f_labels: for line in f_labels: label_dict.add(line.strip()) f_labels.close() label_dict.set_unknown_token(UNKNOWN_LABEL) label_dict.accept_new = False print 'Load {} labels. Dictionary freezed.'.format(label_dict.size()) train_sents = [(string_sequence_to_ids(sent[0], word_dict, True, word_to_embeddings), string_sequence_to_ids(sent[1], label_dict)) for sent in raw_train_sents] dev_sents = [(string_sequence_to_ids(sent[0], word_dict, True, word_to_embeddings), string_sequence_to_ids(sent[1], label_dict)) for sent in raw_dev_sents] print("Extracted {} words and {} tags".format(word_dict.size(), label_dict.size())) print("Max training sentence length: {}".format( max([len(s[0]) for s in train_sents]))) print("Max development sentence length: {}".format( max([len(s[0]) for s in dev_sents]))) word_embedding = [word_to_embeddings[w] for w in word_dict.idx2str] word_embedding_shape = [len(word_embedding), len(word_embedding[0])] return (train_sents, dev_sents, word_dict, label_dict, [word_embedding], [word_embedding_shape])
def setUp(self): self.empty_dictionary = Dictionary([]) self.dictionary1 = Dictionary(['ana', 'ema', 'eganam']) self.dictionary2 = Dictionary(['ana', 'ema', 'ama', 'ame']) self.grid = create_grid(3, 3, ['e', 'm', 'a', 'g', 'a', 'n', 'g', 'g', 'g']) self.word = "" self.foundWords = set() self.visited = {} for x in range(0, 3): for y in range(0, 3): self.visited[(x, y)] = 0
def checkFile(file_name, dictionary_file="words.dat"): # Set up dictionary based on words.dat d = Dictionary(file_name=dictionary_file) file_in = open(file_name, 'r') file_out = open("{}.out".format(file_name), 'w') current_word = "" while True: # Read one character at a time from the input file next_char = file_in.read(1) # Exit the loop when there's nothing else to read if not next_char: break if next_char in d.ALLOWED_LETTERS: current_word += next_char else: # Verify the current_word with the dictionary resp, current_word = d.verify(current_word) if not resp: # Word was not found in dictionary resp, new_word = getUserResponse(current_word) d.update(resp, current_word, new_word) current_word = new_word file_out.write(current_word) current_word = "" file_out.write(next_char) file_in.close() file_out.close() print("Spellchecked file written to {}.out.".format(file_name))
def __init__(self, factory, fuzz_spec): assert factory, 'Factory not set.' self._factory = factory self._label = fuzz_spec['label'] self._package = fuzz_spec['package'] self._package_url = fuzz_spec['package_url'] self._package_path = None if 'fuzzer' in fuzz_spec: self._executable = fuzz_spec['fuzzer'] manifest = fuzz_spec['manifest'] self._is_test = False elif 'fuzzer_test' in fuzz_spec: # Infer the associated fuzzer metadata if it is currently being built as a fuzzer test. self._executable = re.sub(r'_test$', '', fuzz_spec['fuzzer_test']) manifest = re.sub(r'_test\.cmx$', '.cmx', fuzz_spec['test_manifest']) self._is_test = True self._executable_url = '{}#meta/{}'.format(self._package_url, manifest) self._ns = Namespace(self) self._corpus = Corpus(self, fuzz_spec.get('corpus')) self._dictionary = Dictionary(self) self._options = {'artifact_prefix': self.ns.data()} self._libfuzzer_opts = {} self._libfuzzer_inputs = [] self._subprocess_args = [] self._debug = False self._foreground = False self._output = None self._logbase = None self._last_known_pid = 0 self._clusterfuzz_gcs_url = \ 'gs://corpus.internal.clusterfuzz.com/libFuzzer/fuchsia_{}-{}'.format( self._package, self._executable) self._realm_label = ''
def init_module(self, configparser): # initialize text preparer hashtags = bool(configparser.get('NLP', 'hashtags')) links = bool(configparser.get('NLP', 'links')) emoji = bool(configparser.get('NLP', 'emoji')) pos_eng = configparser.get('NLP', 'pos_eng') pos_rus = configparser.get('NLP', 'pos_rus') text_preparer = TextPreparer(hashtags, links, emoji, pos_eng, pos_rus) dictionary_dir = configparser.get('NLP', 'dictionary_dir') # initialize bag creators bag_creators = dict() meta_file = configparser.get('NLP', 'meta_file') meta_reader = csv.reader(open(meta_file, 'r'), delimiter=';') next(meta_reader) for line in meta_reader: filename = os.path.join(dictionary_dir, line[0]) category = line[1] schema = ws.SCHEMES[line[2]] count = int(line[3]) dictionary_reader = csv.reader(open(filename, 'r'), delimiter=';') next(dictionary_reader) words_count = list() for index, word_data in enumerate(dictionary_reader): if index >= count: break words_count.append( (word_data[0], (int(word_data[1]), int(word_data[2])))) dictionary = Dictionary(words_count) bag_creator = BagCreator(dictionary, category, schema) bag_creators[category] = bag_creator return NLPPostHandler(text_preparer, bag_creators)
def test_get_slot(): map_buckets = Dictionary() bucket_object = map_buckets.get_bucket('9.0') key = map_buckets.set_key_to_value('9.0', 'Tesla') bucket_object, node = map_buckets.get_slot('9.0') assert node.value[1] == 'Tesla' assert node.value[0] == '9.0'
def build_index(training_data_dir, dictionary_file, postings_file, is_debug): training_files = sorted(os.listdir(training_data_dir), key=lambda x: x) if is_debug: training_files = training_files[:DEBUG_LIMIT] dictionary = Dictionary(dictionary_file) postings = Postings(postings_file) for training_file in training_files: doc_id = training_file doc_path = osp.join(training_data_dir, training_file) add_doc_to_index(doc_id, doc_path, dictionary, postings) postings.save() # turn line nos to byte offsets f = open(postings_file) current_line = 0 while True: term = dictionary.term_for_offset(current_line) dictionary.add_term(term, f.tell(), update_freq=False) line = f.readline() if not line: break current_line += 1 dictionary.generate_idf(len(training_files)) dictionary.save()
def test_dictionary_not_loaded(self): """ tests if DictionaryNotLoaded exception occurs when the dictionary is not loaded. :return: DictionaryNotLoaded """ d = Dictionary() self.assertRaises(DictionaryNotLoaded, d.query_definition, "hello")
def test_dictionary_add_term(): d = Dictionary() first_pointer = 10 d.add_term('asdf', 1, first_pointer) assert_eq(1, d.get_frequency('asdf')) assert_eq(first_pointer, d.get_head('asdf')) assert_eq(first_pointer, d.get_tail('asdf')) next_pointer = 20 d.add_term('asdf', 2, next_pointer) assert_eq(2, d.get_frequency('asdf')) assert_eq(first_pointer, d.get_head('asdf')) assert_eq(next_pointer, d.get_tail('asdf')) third_pointer = 30 d.add_term('qwer', 2, third_pointer) assert_eq(1, d.get_frequency('qwer')) assert_eq(third_pointer, d.get_head('qwer')) assert_eq(third_pointer, d.get_tail('qwer')) forth_pointer = 40 d.add_term('asdf', 2, forth_pointer) assert_eq(2, d.get_frequency('asdf')) assert_eq(first_pointer, d.get_head('asdf')) assert_eq(next_pointer, d.get_tail('asdf'))
def test_dictionary_has_entry(): d = Dictionary() assert not d.has_entry('asdf', 1) d.add_term('asdf', 1, 10) assert d.has_entry('asdf', 1) assert not d.has_entry('qwer', 1)
def __init__(self, data_dir, min_occurance=None, size=None, load_from=None): self.size = size data_dir = data_dir data_file = os.path.join(data_dir, 'dataset/review.json') dictionary_file = os.path.join(data_dir, 'dict.json') if not os.path.exists(dictionary_file): assert min_occurance is not None assert size is not None self.dictionary = Dictionary(data_file, min_occurance, size) self.dictionary.save(dictionary_file) else: self.dictionary = Dictionary.load(dictionary_file) if load_from is not None: self.data = self.load(load_from) else: dataset_file = os.path.join(data_dir, 'data.json') if not os.path.exists(dataset_file): self.data = self.create_dataset(data_file) self.save(dataset_file) self.data = self.load(dataset_file)
def main(): print('Initializing...', end='') dictionary = Dictionary() domain_set = text_file_to_set(DOMAIN_FILE_LOCATION) skip_list = text_file_to_set(DOMAIN_FILE_LOCATION) print('\r******************************') print('***** Word Domain Filter *****') print('******************************') print('By Emet Behrendt') # Menu loop while True: # List actions for user print("\nAvailable Actions:") print("[1] Search for one word domains") print("[2] Search for two word domains") print("[3] Search for domains of a specific length") print("[4] Exit") # Gets user input for action action = int(input("\nPlease select an action: ")) # Executes action as requested by user if action == 1: one_word_filter(domain_set, dictionary) elif action == 2: two_word_filter(domain_set, dictionary) elif action == 3: n = int(input('Enter a length to search for: ')) num_letter_filter(domain_set, n) elif action == 4: break # Informs user in their action was not valid else: print(f"Action '{action}' not found. Please try again.")
def main(): #main function if we want to check something that is not in the tests print('Input the number of words in dictionary') nd = int(input()) print('Input the words') inputsdict = [0]*nd for i in range(0,nd): inputsdict[i] = input() print('Input the number of rows and columns') n = int(input()) m = int(input()) print('Input the letters') inputsgrid = [0]*(n*m) for i in range(0,n*m): inputsgrid[i] = input() dictionary = Dictionary(inputsdict) grid = create_grid(n,m,inputsgrid) print('Input i and j start position') i = int(input()) j = int(input()) foundWords = set() word = "" visited = {} for x in range(0, n): for y in range(0, m): visited[(x, y)] = 0 find_words_from_grid(grid,i,j,n,m,word,dictionary,foundWords,visited) print(foundWords)
def main(): files = sys.argv[1:] d = Dictionary() for f in files: for word in parseWords(f): d.add_word(word) d.save("words.dat")
def main(): filename = 'boggle-dictionary.txt' dictionary = Dictionary(filename) boggle_board = get_board() print("Boggle board after shuffle:") for row in boggle_board: print(row) print("\nWords found in board:") word_list = find_words(boggle_board, dictionary) for word in word_list: print(word) #benchmarking print('\nAverage time taken to find words in standard 4x4 boggle board =') print(benchmarking(dictionary), 'seconds') #Create result object result = dict() result['score'] = calculate_score(word_list) result['words'] = sorted(word_list) print('\nResult object:') print(result) return result
def radius_challenge(username, password, host, secret, port, nasip, debug): hostname = gethostname() dict_path = sys.path[0] + "/lib/dicts/dictionary" radius = Client(server=host, secret=secret, authport=port, dict=Dictionary(dict_path)) request = radius.CreateAuthPacket(code=packet.AccessRequest) if debug: print "[DEBUG] assembling packet attributes" request["User-Name"] = username request["NAS-IP-Address"] = nasip request["NAS-Identifier"] = hostname if debug: print "[DEBUG] auth method: mscharpv2" auth = mschap2.MSCHAP2() authAttrs = {} authAttrs = auth.getAuthAttrs(username, password) for key in authAttrs.keys(): request[key] = authAttrs[key] if debug: print "[DEBUG] dumping request attributes..." for key in request.keys(): print "[DEBUG]\t\t %s : %s" % (key, request[key]) tsStart = time() try: reply = radius.SendPacket(request) except packet.PacketError, e: if debug: print e print "CRITICAL: Timeout sending Access-Request" return False
def test_load_dictionary(self) -> None: """ Reading a dictionary and ensuring the number of lines matches the number of words Also testing the various exceptions are raised correctly """ for filename in TestDictionary.FILENAMES: self.dictionary = Dictionary(TestDictionary.DEFAULT_HASH_BASE, TestDictionary.DEFAULT_TABLE_SIZE) words = self.dictionary.load_dictionary(filename) lines = file_len(filename) self.assertEqual(words, lines, "Number of words should match number of lines") # TODO: Add your own test cases (consider testing exceptions being raised) # test case 1: # checking it doesnt throw an erro for FileNotFoundError print("Testing load dictionary method......work on it") filename_2 = 'engli.txt' bucket = Dictionary(TestDictionary.DEFAULT_HASH_BASE, TestDictionary.DEFAULT_TABLE_SIZE) words = bucket.load_dictionary(filename_2) self.assertEqual(words, 0, "Number of words should be 0")
def creat_word_rel_dict(r_file, *q_files): word_dict = Dictionary() word_dict.add_unk_token() word_dict.add_pad_token() word_dict.add_start_token() for q_file in q_files: qa_data = pickle.load(open(q_file, 'rb')) for data in qa_data: q = data.question tokens = q.split(' ') for token in tokens: word_dict.add(token) print(len(word_dict)) rels = pickle.load(open(r_file, 'rb')) for rel in rels: rel_word = [] w = rel[3:].split('.') for i in w: rel_word.extend(i.split('_')) for word in rel_word: word_dict.add(word) print(len(word_dict)) return word_dict
def test_valid_query(self): """Tests to see if the the querying the definition is implemented correctly""" data = 'Children word for "father".' dictionary = Dictionary('../data.json') value = dictionary.query_definition("dad") print(value) self.assertEquals(value, data)
def reload(path, params): """ Create a sentence embedder from a pretrained model. """ # reload model reloaded = torch.load(path) state_dict = reloaded['model'] # handle models from multi-GPU checkpoints if 'checkpoint' in path: state_dict = {(k[7:] if k.startswith('module.') else k): v for k, v in state_dict.items()} # reload dictionary and model parameters dico = Dictionary(reloaded['dico_id2word'], reloaded['dico_word2id'], reloaded['dico_counts']) pretrain_params = AttrDict(reloaded['params']) pretrain_params.n_words = len(dico) pretrain_params.bos_index = dico.index(BOS_WORD) pretrain_params.eos_index = dico.index(EOS_WORD) pretrain_params.pad_index = dico.index(PAD_WORD) pretrain_params.unk_index = dico.index(UNK_WORD) pretrain_params.mask_index = dico.index(MASK_WORD) # build model and reload weights model = Trained_Model(pretrain_params, dico) model.load_state_dict(state_dict) model.eval() # adding missing parameters params.max_batch_size = 0 return SentenceEmbedder(model, dico, pretrain_params)
class Emotion: mood_min = -15 mood_max = 15 mood_recovery = 0.5 dictionary = Dictionary() def __init__(self): self.mood = 0 def clear(self): self.mood = 0 def adjust_mood(self, value): self.mood += value if self.mood > self.mood_max: self.mood = self.mood_max elif self.mood < self.mood_min: self.mood = self.mood_min def update(self, input_text): for item in self.dictionary.pattern: if item.match(input_text): self.adjust_mood(item.modify) break if self.mood < 0: self.mood += self.mood_recovery elif self.mood > 0: self.mood -= self.mood_recovery
def main(): # Init configuration = Dictionary() environment = Environment(configuration) learner = QLearning(configuration) # Learn configuration._debug = True strategy = learner.q_learn(environment, do_plot=True) configuration._debug = False # Test done = False total_reward = 0. configuration._debug = True state = environment.reset() while not done: action = environment.decide_next_action(state, strategy) state, reward, done, _ = environment.step(action) total_reward += reward configuration.display.results(environment.portfolio_, do_plot=True) # Save the model? if configuration.save_model is True: learner.nn.save_model(learner.model)
def addWord(self, key, myuser, word): dictionary_key = ndb.Key('Dictionary', key) dictionary = dictionary_key.get() fail = True if dictionary == None: w_list = [] keyList = [] dictionary = Dictionary(wordList=w_list, wordCount=len(w_list), letterCount=len(key.split(":")[-1]), subanagramKeys=keyList) dictionary.key = ndb.Key('Dictionary', key) dictionary.put() if word not in dictionary.wordList: dictionary.wordList.append(word) dictionary.wordCount = len(dictionary.wordList) myuser.wordCount += 1 dictionary.put() myuser.put() fail = False if key not in myuser.userDictionary: myuser.userDictionary.append(key) myuser.put() return fail
def setup_module(module): global DICTIONARIES global cluster global node dict_configs_path = os.path.join(SCRIPT_DIR, 'configs/dictionaries') for f in os.listdir(dict_configs_path): os.remove(os.path.join(dict_configs_path, f)) for layout in LAYOUTS: for source in SOURCES: if source.compatible_with_layout(layout): structure = DictionaryStructure(layout, FIELDS[layout.layout_type]) dict_name = source.name + "_" + layout.name dict_path = os.path.join(dict_configs_path, dict_name + '.xml') dictionary = Dictionary(dict_name, structure, source, dict_path, "table_" + dict_name) dictionary.generate_config() DICTIONARIES.append(dictionary) else: print "Source", source.name, "incompatible with layout", layout.name main_configs = [] for fname in os.listdir(dict_configs_path): main_configs.append(os.path.join(dict_configs_path, fname)) cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join( SCRIPT_DIR, 'configs')) node = cluster.add_instance('node', main_configs=main_configs, with_mysql=True, with_mongo=True) cluster.add_instance('clickhouse1')
def test_set_get(self): dictionary = Dictionary() dictionary.set(key=1, value=2) value1 = dictionary.get(1) self.assertEqual(2, value1, "set_get value 1 did not have the right value")
def build_index(training_data_dir, dictionary_file, postings_file, is_debug): training_files = sorted(os.listdir(training_data_dir), key=lambda x: int(x)) if is_debug: training_files = training_files[:DEBUG_LIMIT] dictionary = Dictionary(dictionary_file) postings = Postings(postings_file) for training_file in training_files: doc_id = int(training_file) doc_path = osp.join(training_data_dir, training_file) postings.not_list().add(doc_id) add_doc_to_index(doc_id, doc_path, dictionary, postings) postings.save() # turn line nos to byte offsets f = open(postings_file) current_line = 1 f.readline() # skip postings list containing all doc ids while True: term = dictionary.term_for_offset(current_line) dictionary.add_term(term, f.tell()) line = f.readline() if not line: break current_line += 1 dictionary.save()
def test_delete_get(self): dictionary = Dictionary().set(key=1, value=2) dictionary.delete(key=1) value = dictionary.get(key=1) self.assertEqual(None, value, "delete_get did not have the right value")
def build_index(in_dir, out_dict, out_postings): """ build index from documents stored in the input directory, then output the dictionary file and postings file """ print('indexing...') indexing_doc_files = sorted(map(int, os.listdir(in_dir))) dictionary = Dictionary(out_dict) postings = PostingsFile(out_postings) temp_dictionary = defaultdict(lambda: defaultdict(int)) # For each document get the terms and add it into the temporary in-memory posting lists for document in indexing_doc_files: terms = util.read_document(in_dir, document) tf_for_doc = defaultdict(int) for term in terms: tf_for_doc[term] += 1 temp_dictionary[term][document] += 1 # Maintain normalised length and count in dictionary.txt dictionary.add_normalised_doc_length(document, tf_for_doc) dictionary.add_doc_count() # Format posting to store in posting list postings.format_posting(temp_dictionary) # Save dictionary and posting list with offsets tracking postings.save(dictionary) dictionary.save()
def build_model(params, ar_module): """ Build all components of the model. """ constraint_indices = list( set( list(chain(*ar_module.synonyms)) + list(chain(*ar_module.antonyms)))) constraint_words = [ ar_module.inverted_index[i] for i in constraint_indices ] dico = Dictionary(dict(zip(constraint_indices, constraint_words)), dict(zip(constraint_words, constraint_indices))) for emb in [ar_module.model.init_W, ar_module.model.dynamic_W]: emb.weight.requires_grad = False normalize_embeddings(emb.weight.data, params.normalize_embeddings) # mapping mapping = Generator(params) # discriminator discriminator = Discriminator(params) if params.adversarial else None # cuda if params.cuda: mapping.cuda() if params.adversarial: discriminator.cuda() return constraint_indices, dico, mapping, discriminator