Python Dictionary.Dictionaryの例

プログラミング言語: Python

名前空間/パッケージ名: dictionary

クラス/型: Dictionary

メソッド/関数: Dictionary

hotexamples.comのコード掲載数: 30

Pythonの辞書、dictionary.Dictionary.Dictionaryは、キーと値のペアを持つ要素を格納するデータ構造です。このディクショナリは、キーを使用して値にアクセスすることができ、高速な検索や変更が可能です。Pythonの組み込みのディクショナリ型と同様に、dictionary.Dictionary.Dictionaryも可変であり、動的に要素を追加、削除、変更することができます。キーと値のペアはユニークであり、重複するキーは許されません。ディクショナリは、多くの異なる用途で使用され、データの効率的な検索やグループ化に役立ちます。

Python Dictionary.Dictionary - 30件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのdictionary.Dictionary.Dictionaryの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

Dictionary(30)

add_term(12)

add(12)

encode_brief(7)

check(6)

add_word(5)

add_pad_token(5)

add_unk_token(5)

delete(4)

add_all(3)

accept_new(3)

doc_length(3)

build_dictionary(3)

delete_word(2)

add_new_term(2)

add_normalised_doc_length(2)

close(2)

all_docs(2)

add_single_word2dic(2)

add_start_token(2)

all_terms(2)

add_symbol(2)

create_default(2)

database_exists(2)

bos(2)

add_items(2)

add_documents(2)

add_doc_count(2)

encode_line(2)

entries(2)

open(2)

doc_to_bag_of_words(1)

is_in_dict(1)

setup(1)

confirm_multiple_words(1)

contains(1)

correct(1)

search_words(1)

search_anagrams(1)

definition(1)

has_word(1)

init_dict(1)

definitions(1)

doc2bow(1)

getPossibleWords(1)

getIDF(1)

getDefs(1)

getAllTFIDFV(1)

examples(1)

dict_learn(1)

コード例 #1

ファイルを表示

ファイル: candidate_test.py プロジェクト: tatujan/CSCI520

    def setUp(self):
        dict = Dictionary()
        state = Follower()
        self.node = Node(0, state, [], dict, [])

        dict2 = Dictionary()
        state2 = Candidate()
        self.node2 = Node(1, state2, [], dict2, [self.node])
        self.node2.neighbors.append(self.node2)

コード例 #2

ファイルを表示

def get_postag_data(config,
                    train_path,
                    dev_path,
                    vocab_path=None,
                    label_path=None):
    use_se_marker = config.use_se_marker
    raw_train_sents = get_sentences(train_path, use_se_marker)
    raw_dev_sents = get_sentences(dev_path, use_se_marker)
    word_to_embeddings = get_pretrained_embeddings(
        WORD_EMBEDDINGS[config.word_embedding])

    # Prepare word dictionary.
    word_dict = Dictionary(unknown_token=UNKNOWN_TOKEN)
    if use_se_marker:
        word_dict.add_all([START_MARKER, END_MARKER])
    if vocab_path != None:
        with open(vocab_path, 'r') as f_vocab:
            for line in f_vocab:
                word_dict.add(line.strip())
            f_vocab.close()
        word_dict.accept_new = False
        print 'Load {} words. Dictionary freezed.'.format(word_dict.size())

    # Parpare label dictionary.
    label_dict = Dictionary()
    if label_path != None:
        with open(label_path, 'r') as f_labels:
            for line in f_labels:
                label_dict.add(line.strip())
            f_labels.close()
        label_dict.set_unknown_token(UNKNOWN_LABEL)
        label_dict.accept_new = False
        print 'Load {} labels. Dictionary freezed.'.format(label_dict.size())

    train_sents = [(string_sequence_to_ids(sent[0], word_dict, True,
                                           word_to_embeddings),
                    string_sequence_to_ids(sent[1], label_dict))
                   for sent in raw_train_sents]
    dev_sents = [(string_sequence_to_ids(sent[0], word_dict, True,
                                         word_to_embeddings),
                  string_sequence_to_ids(sent[1], label_dict))
                 for sent in raw_dev_sents]

    print("Extracted {} words and {} tags".format(word_dict.size(),
                                                  label_dict.size()))
    print("Max training sentence length: {}".format(
        max([len(s[0]) for s in train_sents])))
    print("Max development sentence length: {}".format(
        max([len(s[0]) for s in dev_sents])))
    word_embedding = [word_to_embeddings[w] for w in word_dict.idx2str]
    word_embedding_shape = [len(word_embedding), len(word_embedding[0])]
    return (train_sents, dev_sents, word_dict, label_dict, [word_embedding],
            [word_embedding_shape])

コード例 #3

ファイルを表示

 def setUp(self):
     self.empty_dictionary = Dictionary([])
     self.dictionary1 = Dictionary(['ana', 'ema', 'eganam'])
     self.dictionary2 = Dictionary(['ana', 'ema', 'ama', 'ame'])
     self.grid = create_grid(3, 3,
                             ['e', 'm', 'a', 'g', 'a', 'n', 'g', 'g', 'g'])
     self.word = ""
     self.foundWords = set()
     self.visited = {}
     for x in range(0, 3):
         for y in range(0, 3):
             self.visited[(x, y)] = 0

コード例 #4

ファイルを表示

def checkFile(file_name, dictionary_file="words.dat"):
    # Set up dictionary based on words.dat
    d = Dictionary(file_name=dictionary_file)

    file_in = open(file_name, 'r')
    file_out = open("{}.out".format(file_name), 'w')

    current_word = ""

    while True:
        # Read one character at a time from the input file
        next_char = file_in.read(1)
        # Exit the loop when there's nothing else to read
        if not next_char:
            break

        if next_char in d.ALLOWED_LETTERS:
            current_word += next_char
        else:
            # Verify the current_word with the dictionary
            resp, current_word = d.verify(current_word)
            if not resp:  # Word was not found in dictionary
                resp, new_word = getUserResponse(current_word)
                d.update(resp, current_word, new_word)
                current_word = new_word
            file_out.write(current_word)
            current_word = ""
            file_out.write(next_char)

    file_in.close()
    file_out.close()
    print("Spellchecked file written to {}.out.".format(file_name))

コード例 #5

ファイルを表示

ファイル: fuzzer.py プロジェクト: shangzuoyan/fuchsia

    def __init__(self, factory, fuzz_spec):
        assert factory, 'Factory not set.'
        self._factory = factory
        self._label = fuzz_spec['label']
        self._package = fuzz_spec['package']
        self._package_url = fuzz_spec['package_url']
        self._package_path = None
        if 'fuzzer' in fuzz_spec:
            self._executable = fuzz_spec['fuzzer']
            manifest = fuzz_spec['manifest']
            self._is_test = False
        elif 'fuzzer_test' in fuzz_spec:
            # Infer the associated fuzzer metadata if it is currently being built as a fuzzer test.
            self._executable = re.sub(r'_test$', '', fuzz_spec['fuzzer_test'])
            manifest = re.sub(r'_test\.cmx$', '.cmx',
                              fuzz_spec['test_manifest'])
            self._is_test = True

        self._executable_url = '{}#meta/{}'.format(self._package_url, manifest)
        self._ns = Namespace(self)
        self._corpus = Corpus(self, fuzz_spec.get('corpus'))
        self._dictionary = Dictionary(self)
        self._options = {'artifact_prefix': self.ns.data()}
        self._libfuzzer_opts = {}
        self._libfuzzer_inputs = []
        self._subprocess_args = []
        self._debug = False
        self._foreground = False
        self._output = None
        self._logbase = None
        self._last_known_pid = 0
        self._clusterfuzz_gcs_url = \
            'gs://corpus.internal.clusterfuzz.com/libFuzzer/fuchsia_{}-{}'.format(
                self._package, self._executable)
        self._realm_label = ''

コード例 #6

ファイルを表示

    def init_module(self, configparser):
        # initialize text preparer
        hashtags = bool(configparser.get('NLP', 'hashtags'))
        links = bool(configparser.get('NLP', 'links'))
        emoji = bool(configparser.get('NLP', 'emoji'))
        pos_eng = configparser.get('NLP', 'pos_eng')
        pos_rus = configparser.get('NLP', 'pos_rus')
        text_preparer = TextPreparer(hashtags, links, emoji, pos_eng, pos_rus)

        dictionary_dir = configparser.get('NLP', 'dictionary_dir')
        # initialize bag creators
        bag_creators = dict()
        meta_file = configparser.get('NLP', 'meta_file')
        meta_reader = csv.reader(open(meta_file, 'r'), delimiter=';')
        next(meta_reader)
        for line in meta_reader:
            filename = os.path.join(dictionary_dir, line[0])
            category = line[1]
            schema = ws.SCHEMES[line[2]]
            count = int(line[3])
            dictionary_reader = csv.reader(open(filename, 'r'), delimiter=';')
            next(dictionary_reader)
            words_count = list()
            for index, word_data in enumerate(dictionary_reader):
                if index >= count:
                    break
                words_count.append(
                    (word_data[0], (int(word_data[1]), int(word_data[2]))))
            dictionary = Dictionary(words_count)
            bag_creator = BagCreator(dictionary, category, schema)
            bag_creators[category] = bag_creator
        return NLPPostHandler(text_preparer, bag_creators)

コード例 #7

ファイルを表示

ファイル: test_dictionary.py プロジェクト: puskini33/Data-Structures-Algorithms

def test_get_slot():
    map_buckets = Dictionary()
    bucket_object = map_buckets.get_bucket('9.0')
    key = map_buckets.set_key_to_value('9.0', 'Tesla')
    bucket_object, node = map_buckets.get_slot('9.0')
    assert node.value[1] == 'Tesla'
    assert node.value[0] == '9.0'

コード例 #8

ファイルを表示

ファイル: build_index.py プロジェクト: racheltanxueqi/CS3245

def build_index(training_data_dir, dictionary_file, postings_file, is_debug):
    training_files = sorted(os.listdir(training_data_dir), key=lambda x: x)
    if is_debug:
        training_files = training_files[:DEBUG_LIMIT]

    dictionary = Dictionary(dictionary_file)
    postings = Postings(postings_file)
    for training_file in training_files:
        doc_id = training_file
        doc_path = osp.join(training_data_dir, training_file)
        add_doc_to_index(doc_id, doc_path, dictionary, postings)
    postings.save()

    # turn line nos to byte offsets
    f = open(postings_file)
    current_line = 0
    while True:
        term = dictionary.term_for_offset(current_line)
        dictionary.add_term(term, f.tell(), update_freq=False)
        line = f.readline()
        if not line:
            break
        current_line += 1
    dictionary.generate_idf(len(training_files))
    dictionary.save()

コード例 #9

ファイルを表示

 def test_dictionary_not_loaded(self):
     """
     tests if DictionaryNotLoaded exception occurs when the dictionary is not loaded.
     :return: DictionaryNotLoaded
     """
     d = Dictionary()
     self.assertRaises(DictionaryNotLoaded, d.query_definition, "hello")

コード例 #10

ファイルを表示

def test_dictionary_add_term():
    d = Dictionary()

    first_pointer = 10
    d.add_term('asdf', 1, first_pointer)
    assert_eq(1, d.get_frequency('asdf'))
    assert_eq(first_pointer, d.get_head('asdf'))
    assert_eq(first_pointer, d.get_tail('asdf'))

    next_pointer = 20
    d.add_term('asdf', 2, next_pointer)
    assert_eq(2, d.get_frequency('asdf'))
    assert_eq(first_pointer, d.get_head('asdf'))
    assert_eq(next_pointer, d.get_tail('asdf'))

    third_pointer = 30
    d.add_term('qwer', 2, third_pointer)
    assert_eq(1, d.get_frequency('qwer'))
    assert_eq(third_pointer, d.get_head('qwer'))
    assert_eq(third_pointer, d.get_tail('qwer'))

    forth_pointer = 40
    d.add_term('asdf', 2, forth_pointer)
    assert_eq(2, d.get_frequency('asdf'))
    assert_eq(first_pointer, d.get_head('asdf'))
    assert_eq(next_pointer, d.get_tail('asdf'))

コード例 #11

ファイルを表示

def test_dictionary_has_entry():
    d = Dictionary()
    assert not d.has_entry('asdf', 1)

    d.add_term('asdf', 1, 10)
    assert d.has_entry('asdf', 1)
    assert not d.has_entry('qwer', 1)

コード例 #12

ファイルを表示

    def __init__(self,
                 data_dir,
                 min_occurance=None,
                 size=None,
                 load_from=None):

        self.size = size

        data_dir = data_dir
        data_file = os.path.join(data_dir, 'dataset/review.json')

        dictionary_file = os.path.join(data_dir, 'dict.json')
        if not os.path.exists(dictionary_file):
            assert min_occurance is not None
            assert size is not None
            self.dictionary = Dictionary(data_file, min_occurance, size)
            self.dictionary.save(dictionary_file)
        else:
            self.dictionary = Dictionary.load(dictionary_file)

        if load_from is not None:
            self.data = self.load(load_from)
        else:
            dataset_file = os.path.join(data_dir, 'data.json')
            if not os.path.exists(dataset_file):
                self.data = self.create_dataset(data_file)
                self.save(dataset_file)
            self.data = self.load(dataset_file)

コード例 #13

ファイルを表示

def main():
	print('Initializing...', end='')

	dictionary = Dictionary()
	domain_set = text_file_to_set(DOMAIN_FILE_LOCATION)
	skip_list = text_file_to_set(DOMAIN_FILE_LOCATION)

	print('\r******************************')
	print('***** Word Domain Filter *****')
	print('******************************')
	print('By Emet Behrendt')

	# Menu loop
	while True:
		# List actions for user
		print("\nAvailable Actions:")
		print("[1] Search for one word domains")
		print("[2] Search for two word domains")
		print("[3] Search for domains of a specific length")
		print("[4] Exit")
		# Gets user input for action
		action = int(input("\nPlease select an action: "))
		# Executes action as requested by user
		if action == 1:
			one_word_filter(domain_set, dictionary)
		elif action == 2:
			two_word_filter(domain_set, dictionary)
		elif action == 3:
			n = int(input('Enter a length to search for: '))
			num_letter_filter(domain_set, n)
		elif action == 4:
			break
		# Informs user in their action was not valid
		else:
			print(f"Action '{action}' not found. Please try again.")

コード例 #14

ファイルを表示

ファイル: assignment3.py プロジェクト: flerdacodeu/CodeU-2018-Group7

def main():
    #main function if we want to check something that is not in the tests
    print('Input the number of words in dictionary')
    nd = int(input())
    print('Input the words')
    inputsdict = [0]*nd
    for i in range(0,nd):
        inputsdict[i] = input()
    
    print('Input the number of rows and columns')
    n = int(input())
    m = int(input())
    print('Input the letters')
    inputsgrid = [0]*(n*m)
    for i in range(0,n*m):
        inputsgrid[i] = input()
        
    dictionary = Dictionary(inputsdict)
    grid = create_grid(n,m,inputsgrid)
    
    print('Input i and j start position')
    i = int(input())
    j = int(input())
    
    foundWords = set()
    word = ""
    visited = {}
    for x in range(0, n):
        for y in range(0, m):
            visited[(x, y)] = 0
    find_words_from_grid(grid,i,j,n,m,word,dictionary,foundWords,visited)
    print(foundWords)

コード例 #15

ファイルを表示

def main():
    files = sys.argv[1:]
    d = Dictionary()
    for f in files:
        for word in parseWords(f):
            d.add_word(word)
    d.save("words.dat")

コード例 #16

ファイルを表示

def main():
    filename = 'boggle-dictionary.txt'
    dictionary = Dictionary(filename)
    boggle_board = get_board()

    print("Boggle board after shuffle:")
    for row in boggle_board:
        print(row)

    print("\nWords found in board:")
    word_list = find_words(boggle_board, dictionary)
    for word in word_list:
        print(word)

    #benchmarking
    print('\nAverage time taken to find words in standard 4x4 boggle board =')
    print(benchmarking(dictionary), 'seconds')

    #Create result object
    result = dict()
    result['score'] = calculate_score(word_list)
    result['words'] = sorted(word_list)

    print('\nResult object:')
    print(result)

    return result

コード例 #17

ファイルを表示

def radius_challenge(username, password, host, secret, port, nasip, debug):
    hostname = gethostname()
    dict_path = sys.path[0] + "/lib/dicts/dictionary"
    radius = Client(server=host,
                    secret=secret,
                    authport=port,
                    dict=Dictionary(dict_path))
    request = radius.CreateAuthPacket(code=packet.AccessRequest)
    if debug:
        print "[DEBUG] assembling packet attributes"
    request["User-Name"] = username
    request["NAS-IP-Address"] = nasip
    request["NAS-Identifier"] = hostname
    if debug:
        print "[DEBUG] auth method: mscharpv2"
    auth = mschap2.MSCHAP2()
    authAttrs = {}
    authAttrs = auth.getAuthAttrs(username, password)
    for key in authAttrs.keys():
        request[key] = authAttrs[key]
    if debug:
        print "[DEBUG] dumping request attributes..."
        for key in request.keys():
            print "[DEBUG]\t\t %s : %s" % (key, request[key])
    tsStart = time()
    try:
        reply = radius.SendPacket(request)
    except packet.PacketError, e:
        if debug:
            print e
        print "CRITICAL: Timeout sending Access-Request"
        return False

コード例 #18

ファイルを表示

 def test_load_dictionary(self) -> None:
     """ Reading a dictionary and ensuring the number of lines matches the number of words
         Also testing the various exceptions are raised correctly """
     for filename in TestDictionary.FILENAMES:
         self.dictionary = Dictionary(TestDictionary.DEFAULT_HASH_BASE, TestDictionary.DEFAULT_TABLE_SIZE)
         words = self.dictionary.load_dictionary(filename)
         lines = file_len(filename)
         self.assertEqual(words, lines, "Number of words should match number of lines")
     
     # TODO: Add your own test cases (consider testing exceptions being raised)
     # test case 1: # checking it doesnt throw an erro for FileNotFoundError
     print("Testing load dictionary method......work on it")
     filename_2 = 'engli.txt'
     bucket = Dictionary(TestDictionary.DEFAULT_HASH_BASE, TestDictionary.DEFAULT_TABLE_SIZE)
     words = bucket.load_dictionary(filename_2)
     self.assertEqual(words, 0, "Number of words should be 0")

コード例 #19

ファイルを表示

ファイル: create_vocab.py プロジェクト: zhangyu68/kbqa-ar-smcnn

def creat_word_rel_dict(r_file, *q_files):
    word_dict = Dictionary()
    word_dict.add_unk_token()
    word_dict.add_pad_token()
    word_dict.add_start_token()

    for q_file in q_files:
        qa_data = pickle.load(open(q_file, 'rb'))
        for data in qa_data:
            q = data.question
            tokens = q.split(' ')
            for token in tokens:
                word_dict.add(token)
    print(len(word_dict))

    rels = pickle.load(open(r_file, 'rb'))
    for rel in rels:
        rel_word = []
        w = rel[3:].split('.')
        for i in w:
            rel_word.extend(i.split('_'))
        for word in rel_word:
            word_dict.add(word)
    print(len(word_dict))
    return word_dict

コード例 #20

ファイルを表示

ファイル: test_dictionary.py プロジェクト: kokiebisu/Object-Oriented-Programming

 def test_valid_query(self):
     """Tests to see if the the querying the definition is implemented correctly"""
     data = 'Children word for "father".'
     dictionary = Dictionary('../data.json')
     value = dictionary.query_definition("dad")
     print(value)
     self.assertEquals(value, data)

コード例 #21

ファイルを表示

    def reload(path, params):
        """
        Create a sentence embedder from a pretrained model.
        """
        # reload model
        reloaded = torch.load(path)
        state_dict = reloaded['model']

        # handle models from multi-GPU checkpoints
        if 'checkpoint' in path:
            state_dict = {(k[7:] if k.startswith('module.') else k): v
                          for k, v in state_dict.items()}

        # reload dictionary and model parameters
        dico = Dictionary(reloaded['dico_id2word'], reloaded['dico_word2id'],
                          reloaded['dico_counts'])
        pretrain_params = AttrDict(reloaded['params'])
        pretrain_params.n_words = len(dico)
        pretrain_params.bos_index = dico.index(BOS_WORD)
        pretrain_params.eos_index = dico.index(EOS_WORD)
        pretrain_params.pad_index = dico.index(PAD_WORD)
        pretrain_params.unk_index = dico.index(UNK_WORD)
        pretrain_params.mask_index = dico.index(MASK_WORD)

        # build model and reload weights
        model = Trained_Model(pretrain_params, dico)
        model.load_state_dict(state_dict)
        model.eval()

        # adding missing parameters
        params.max_batch_size = 0

        return SentenceEmbedder(model, dico, pretrain_params)

コード例 #22

ファイルを表示

class Emotion:
    mood_min = -15
    mood_max = 15
    mood_recovery = 0.5
    dictionary = Dictionary()
    
    def __init__(self):
        self.mood = 0
    
    def clear(self):
        self.mood = 0

    def adjust_mood(self, value):
        self.mood += value
        if self.mood > self.mood_max:
            self.mood = self.mood_max
        elif self.mood < self.mood_min:
            self.mood = self.mood_min
    
    def update(self, input_text):
        for item in self.dictionary.pattern:
            if item.match(input_text):
                self.adjust_mood(item.modify)
                break
        
        if self.mood < 0:
            self.mood += self.mood_recovery
        elif self.mood > 0:
            self.mood -= self.mood_recovery

コード例 #23

ファイルを表示

ファイル: trader.py プロジェクト: rcostu/trader

def main():
    # Init
    configuration = Dictionary()
    environment = Environment(configuration)
    learner = QLearning(configuration)

    # Learn
    configuration._debug = True
    strategy = learner.q_learn(environment, do_plot=True)
    configuration._debug = False

    # Test
    done = False
    total_reward = 0.
    configuration._debug = True
    state = environment.reset()
    while not done:
        action = environment.decide_next_action(state, strategy)
        state, reward, done, _ = environment.step(action)
        total_reward += reward

    configuration.display.results(environment.portfolio_, do_plot=True)

    # Save the model?
    if configuration.save_model is True:
        learner.nn.save_model(learner.model)

コード例 #24

ファイルを表示

ファイル: AddWord.py プロジェクト: MertAltintas6/Anagram-Engine

    def addWord(self, key, myuser, word):
        dictionary_key = ndb.Key('Dictionary', key)
        dictionary = dictionary_key.get()
        fail = True
        if dictionary == None:
            w_list = []
            keyList = []
            dictionary = Dictionary(wordList=w_list,
                                    wordCount=len(w_list),
                                    letterCount=len(key.split(":")[-1]),
                                    subanagramKeys=keyList)
            dictionary.key = ndb.Key('Dictionary', key)
            dictionary.put()

        if word not in dictionary.wordList:
            dictionary.wordList.append(word)
            dictionary.wordCount = len(dictionary.wordList)
            myuser.wordCount += 1
            dictionary.put()
            myuser.put()
            fail = False
        if key not in myuser.userDictionary:
            myuser.userDictionary.append(key)
            myuser.put()

        return fail

コード例 #25

ファイルを表示

ファイル: test.py プロジェクト: ClickHouse-Ninja/ClickHouse-Server

def setup_module(module):
    global DICTIONARIES
    global cluster
    global node

    dict_configs_path = os.path.join(SCRIPT_DIR, 'configs/dictionaries')
    for f in os.listdir(dict_configs_path):
        os.remove(os.path.join(dict_configs_path, f))

    for layout in LAYOUTS:
        for source in SOURCES:
            if source.compatible_with_layout(layout):
                structure = DictionaryStructure(layout,
                                                FIELDS[layout.layout_type])
                dict_name = source.name + "_" + layout.name
                dict_path = os.path.join(dict_configs_path, dict_name + '.xml')
                dictionary = Dictionary(dict_name, structure, source,
                                        dict_path, "table_" + dict_name)
                dictionary.generate_config()
                DICTIONARIES.append(dictionary)
            else:
                print "Source", source.name, "incompatible with layout", layout.name

    main_configs = []
    for fname in os.listdir(dict_configs_path):
        main_configs.append(os.path.join(dict_configs_path, fname))
    cluster = ClickHouseCluster(__file__,
                                base_configs_dir=os.path.join(
                                    SCRIPT_DIR, 'configs'))
    node = cluster.add_instance('node',
                                main_configs=main_configs,
                                with_mysql=True,
                                with_mongo=True)
    cluster.add_instance('clickhouse1')

コード例 #26

ファイルを表示

ファイル: test_dictionaries.py プロジェクト: roger-pan/data-structures

    def test_set_get(self):
        dictionary = Dictionary()
        dictionary.set(key=1, value=2)
        value1 = dictionary.get(1)

        self.assertEqual(2, value1,
                         "set_get value 1 did not have the right value")

コード例 #27

ファイルを表示

def build_index(training_data_dir, dictionary_file, postings_file, is_debug):
    training_files = sorted(os.listdir(training_data_dir),
                            key=lambda x: int(x))
    if is_debug:
        training_files = training_files[:DEBUG_LIMIT]

    dictionary = Dictionary(dictionary_file)
    postings = Postings(postings_file)
    for training_file in training_files:
        doc_id = int(training_file)
        doc_path = osp.join(training_data_dir, training_file)
        postings.not_list().add(doc_id)
        add_doc_to_index(doc_id, doc_path, dictionary, postings)
    postings.save()

    # turn line nos to byte offsets
    f = open(postings_file)
    current_line = 1
    f.readline()  # skip postings list containing all doc ids
    while True:
        term = dictionary.term_for_offset(current_line)
        dictionary.add_term(term, f.tell())
        line = f.readline()
        if not line:
            break
        current_line += 1
    dictionary.save()

コード例 #28

ファイルを表示

ファイル: test_dictionaries.py プロジェクト: roger-pan/data-structures

    def test_delete_get(self):
        dictionary = Dictionary().set(key=1, value=2)
        dictionary.delete(key=1)
        value = dictionary.get(key=1)

        self.assertEqual(None, value,
                         "delete_get did not have the right value")

コード例 #29

ファイルを表示

ファイル: index.py プロジェクト: tshradheya/cs3245-hw2

def build_index(in_dir, out_dict, out_postings):
    """
    build index from documents stored in the input directory,
    then output the dictionary file and postings file
    """
    print('indexing...')

    indexing_doc_files = sorted(map(int, os.listdir(in_dir)))

    dictionary = Dictionary(out_dict)
    postings = PostingsFile(out_postings)

    temp_dictionary = defaultdict(lambda: defaultdict(int))

    # For each document get the terms and add it into the temporary in-memory posting lists
    for document in indexing_doc_files:
        terms = util.read_document(in_dir, document)
        tf_for_doc = defaultdict(int)

        for term in terms:
            tf_for_doc[term] += 1
            temp_dictionary[term][document] += 1

        # Maintain normalised length and count in dictionary.txt
        dictionary.add_normalised_doc_length(document, tf_for_doc)
        dictionary.add_doc_count()

    # Format posting to store in posting list
    postings.format_posting(temp_dictionary)

    # Save dictionary and posting list with offsets tracking
    postings.save(dictionary)
    dictionary.save()

コード例 #30

ファイルを表示

ファイル: models.py プロジェクト: cambridgeltl/xling-postspec

def build_model(params, ar_module):
    """
    Build all components of the model.
    """

    constraint_indices = list(
        set(
            list(chain(*ar_module.synonyms)) +
            list(chain(*ar_module.antonyms))))
    constraint_words = [
        ar_module.inverted_index[i] for i in constraint_indices
    ]
    dico = Dictionary(dict(zip(constraint_indices, constraint_words)),
                      dict(zip(constraint_words, constraint_indices)))

    for emb in [ar_module.model.init_W, ar_module.model.dynamic_W]:
        emb.weight.requires_grad = False
        normalize_embeddings(emb.weight.data, params.normalize_embeddings)

    # mapping
    mapping = Generator(params)
    # discriminator
    discriminator = Discriminator(params) if params.adversarial else None

    # cuda
    if params.cuda:
        mapping.cuda()
        if params.adversarial:
            discriminator.cuda()
    return constraint_indices, dico, mapping, discriminator