Python Dictionary.all_terms Exemples

Langage de programmation: Python

Espace de nommage/Pack: dictionary

Class/Type: Dictionary

Méthode/Fonction: all_terms

Exemples au hotexamples.com: 5

Python Dictionary.all_terms - 5 exemples trouvés. Ce sont les exemples réels les mieux notés de dictionary.Dictionary.all_terms extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

Dictionary(30)

add_term(12)

add(12)

encode_brief(7)

check(6)

add_word(5)

add_pad_token(5)

add_unk_token(5)

delete(4)

add_all(3)

accept_new(3)

doc_length(3)

build_dictionary(3)

delete_word(2)

add_new_term(2)

add_normalised_doc_length(2)

close(2)

all_docs(2)

add_single_word2dic(2)

add_start_token(2)

all_terms(2)

add_symbol(2)

create_default(2)

database_exists(2)

bos(2)

add_items(2)

add_documents(2)

add_doc_count(2)

encode_line(2)

entries(2)

open(2)

doc_to_bag_of_words(1)

is_in_dict(1)

setup(1)

confirm_multiple_words(1)

contains(1)

correct(1)

search_words(1)

search_anagrams(1)

definition(1)

has_word(1)

init_dict(1)

definitions(1)

doc2bow(1)

getPossibleWords(1)

getIDF(1)

getDefs(1)

getAllTFIDFV(1)

examples(1)

dict_learn(1)

Méthodes fréquemment utilisées

Dictionary (30)

add_term (12)

add (12)

encode_brief (7)

check (6)

add_word (5)

add_pad_token (5)

add_unk_token (5)

delete (4)

add_all (3)

Méthodes fréquemment utilisées

accept_new (3)

doc_length (3)

build_dictionary (3)

delete_word (2)

add_new_term (2)

add_normalised_doc_length (2)

close (2)

all_docs (2)

add_single_word2dic (2)

add_start_token (2)

all_terms (2)

add_symbol (2)

create_default (2)

database_exists (2)

bos (2)

add_items (2)

add_documents (2)

add_doc_count (2)

encode_line (2)

entries (2)

Méthodes fréquemment utilisées

all_terms (2)

add_symbol (2)

create_default (2)

database_exists (2)

bos (2)

add_items (2)

add_documents (2)

add_doc_count (2)

encode_line (2)

entries (2)

open (2)

doc_to_bag_of_words (1)

is_in_dict (1)

setup (1)

confirm_multiple_words (1)

contains (1)

correct (1)

search_words (1)

search_anagrams (1)

definition (1)

has_word (1)

init_dict (1)

definitions (1)

doc2bow (1)

getPossibleWords (1)

getIDF (1)

getDefs (1)

getAllTFIDFV (1)

examples (1)

dict_learn (1)

Méthodes fréquemment utilisées

open (2)

doc_to_bag_of_words (1)

is_in_dict (1)

setup (1)

confirm_multiple_words (1)

contains (1)

correct (1)

search_words (1)

search_anagrams (1)

definition (1)

has_word (1)

init_dict (1)

definitions (1)

doc2bow (1)

getPossibleWords (1)

getIDF (1)

getDefs (1)

getAllTFIDFV (1)

examples (1)

dict_learn (1)

encode (1)

clear (1)

BinarySearch (1)

classify (1)

add_doc_len (1)

GetDictionaryNames (1)

New (1)

Search (1)

SequentialSearch (1)

_debug (1)

_definitions (1)

_parse_csv (1)

addCommentsToDict (1)

addDocuments (1)

addToDictionary (1)

addWord (1)

add_cfs (1)

add_court_weight (1)

add_dfs (1)

add_doc_length (1)

Exemple #1

0

Afficher le fichier

def test_dictionary_all_terms(): d = Dictionary() assert_eq([], d.all_terms()) d.add_term('asdf', 1, 1) assert_eq(['asdf'], d.all_terms()) d.add_term('asdf', 2, 1) assert_eq(['asdf'], d.all_terms()) d.add_term('qwer', 1, 1) d.add_term('zxcv', 1, 1) assert_eq(sorted(['asdf', 'qwer', 'zxcv']), sorted(d.all_terms()))

Exemple #2

0

Afficher le fichier

Fichier : test_dictionary.py Projet : kaiserahmed/cs3245-hw

def test_dictionary_all_terms(): d = Dictionary() assert_eq([], d.all_terms()) d.add_term('asdf', 1, 1) assert_eq(['asdf'], d.all_terms()) d.add_term('asdf', 2, 1) assert_eq(['asdf'], d.all_terms()) d.add_term('qwer', 1, 1) d.add_term('zxcv', 1, 1) assert_eq( sorted(['asdf', 'qwer', 'zxcv']), sorted(d.all_terms()))

Exemple #3

0

Afficher le fichier

Fichier : test_dictionary.py Projet : kaiserahmed/cs3245-hw

def test_dictionary_to_json_from_json(): d = Dictionary() d.add_term('asdf', 1, 1) d.add_term('asdf', 2, 1) d.add_term('qwer', 1, 1) d.add_term('zxcv', 1, 1) d2 = Dictionary.from_json(d.to_json()) assert_eq(d2.all_docs(), d.all_docs()) assert_eq(d2.all_terms(), d.all_terms()) assert_eq(d2.get_frequency('asdf'), d.get_frequency('asdf')) assert_eq(d2.get_frequency('qwer'), d.get_frequency('qwer')) assert_eq(d2.get_frequency('zxcv'), d.get_frequency('zxcv')) assert_eq(d2.get_head('asdf'), d.get_head('asdf')) assert_eq(d2.get_head('qwer'), d.get_head('qwer')) assert_eq(d2.get_head('zxcv'), d.get_head('zxcv')) assert_eq(d2.get_tail('asdf'), d.get_tail('asdf')) assert_eq(d2.get_tail('qwer'), d.get_tail('qwer')) assert_eq(d2.get_tail('zxcv'), d.get_tail('zxcv'))

Exemple #4

0

Afficher le fichier

def test_dictionary_to_json_from_json(): d = Dictionary() d.add_term('asdf', 1, 1) d.add_term('asdf', 2, 1) d.add_term('qwer', 1, 1) d.add_term('zxcv', 1, 1) d2 = Dictionary.from_json(d.to_json()) assert_eq(d2.all_docs(), d.all_docs()) assert_eq(d2.all_terms(), d.all_terms()) assert_eq(d2.get_frequency('asdf'), d.get_frequency('asdf')) assert_eq(d2.get_frequency('qwer'), d.get_frequency('qwer')) assert_eq(d2.get_frequency('zxcv'), d.get_frequency('zxcv')) assert_eq(d2.get_head('asdf'), d.get_head('asdf')) assert_eq(d2.get_head('qwer'), d.get_head('qwer')) assert_eq(d2.get_head('zxcv'), d.get_head('zxcv')) assert_eq(d2.get_tail('asdf'), d.get_tail('asdf')) assert_eq(d2.get_tail('qwer'), d.get_tail('qwer')) assert_eq(d2.get_tail('zxcv'), d.get_tail('zxcv'))

Exemple #5

0

Afficher le fichier

Fichier : build_index.py Projet : kaiserahmed/cs3245-hw

def build(training_dir, dict_file, postings_file): dictionary = Dictionary() # Read each file in the training dir. filepaths = [] for filename in os.listdir(training_dir): filepaths.append(os.path.join(training_dir, filename)) # Sort the filepaths according to doc_id filepaths = sorted(filepaths, key=lambda x: int(os.path.basename(x))) # Two loops here to have control over the size of the loop. # NOTE(michael): for testing. # filepaths = filepaths[:10] with PostingsFile(postings_file, mode='w+') as postings_file: for filepath in filepaths: terms = process_file(filepath) # TODO(michael): Making assumption that document is an int. doc_id = int(os.path.basename(filepath)) for term in terms: if not dictionary.has_entry(term, doc_id): current_node_location = postings_file.pointer if dictionary.get_frequency(term) != 0: # Update previous node in the linked list. previous_node_location = dictionary.get_tail(term) previous_entry = \ postings_file.get_entry(previous_node_location) postings_file.write_entry( previous_entry.doc_id, current_node_location, write_location=previous_node_location) dictionary.add_term(term, doc_id, current_node_location) postings_file.write_entry( doc_id, write_location=current_node_location) # Skip pointers for term in dictionary.all_terms(): term_frequency = dictionary.get_frequency(term) skip_pointer_frequency = int(math.sqrt(term_frequency)) # Don't bother if too low. if skip_pointer_frequency < SKIP_POINTER_THRESHOLD: continue head = dictionary.get_head(term) entries = postings_file.get_entry_list_from_pointer(head) for idx in xrange(term_frequency): if idx % skip_pointer_frequency == 0: skip_to = idx + skip_pointer_frequency # Nothing to point to. if skip_to >= term_frequency: continue current_entry = entries[idx] skip_to_entry = entries[skip_to] # Add skip pointer. postings_file.write_entry( current_entry.doc_id, current_entry.next_pointer, skip_to_entry.own_pointer, skip_to_entry.doc_id, write_location=current_entry.own_pointer) # Write dictionary to file. with open(dict_file, 'w') as dictionary_file: dictionary_file.write(dictionary.to_json())