Ejemplos de Dictionary.all_terms en Python

Lenguaje de programación: Python

Namespace/Package Name: dictionary

Clase / Tipo: Dictionary

Método / Función: all_terms

Ejemplos en hotexamples.com: 5

Python Dictionary.all_terms - 5 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de dictionary.Dictionary.all_terms extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

Dictionary(30)

add_term(12)

add(12)

encode_brief(7)

check(6)

add_word(5)

add_pad_token(5)

add_unk_token(5)

delete(4)

add_all(3)

accept_new(3)

doc_length(3)

build_dictionary(3)

delete_word(2)

add_new_term(2)

add_normalised_doc_length(2)

close(2)

all_docs(2)

add_single_word2dic(2)

add_start_token(2)

all_terms(2)

add_symbol(2)

create_default(2)

database_exists(2)

bos(2)

add_items(2)

add_documents(2)

add_doc_count(2)

encode_line(2)

entries(2)

open(2)

doc_to_bag_of_words(1)

is_in_dict(1)

setup(1)

confirm_multiple_words(1)

contains(1)

correct(1)

search_words(1)

search_anagrams(1)

definition(1)

has_word(1)

init_dict(1)

definitions(1)

doc2bow(1)

getPossibleWords(1)

getIDF(1)

getDefs(1)

getAllTFIDFV(1)

examples(1)

dict_learn(1)

Ejemplo n.º 1

Mostrar archivo

def test_dictionary_all_terms():
    d = Dictionary()
    assert_eq([], d.all_terms())

    d.add_term('asdf', 1, 1)
    assert_eq(['asdf'], d.all_terms())

    d.add_term('asdf', 2, 1)
    assert_eq(['asdf'], d.all_terms())

    d.add_term('qwer', 1, 1)
    d.add_term('zxcv', 1, 1)
    assert_eq(sorted(['asdf', 'qwer', 'zxcv']), sorted(d.all_terms()))

Ejemplo n.º 2

Mostrar archivo

Archivo: test_dictionary.py Proyecto: kaiserahmed/cs3245-hw

def test_dictionary_all_terms():
    d = Dictionary()
    assert_eq([], d.all_terms())

    d.add_term('asdf', 1, 1)
    assert_eq(['asdf'], d.all_terms())

    d.add_term('asdf', 2, 1)
    assert_eq(['asdf'], d.all_terms())

    d.add_term('qwer', 1, 1)
    d.add_term('zxcv', 1, 1)
    assert_eq(
        sorted(['asdf', 'qwer', 'zxcv']),
        sorted(d.all_terms()))

Ejemplo n.º 3

Mostrar archivo

Archivo: test_dictionary.py Proyecto: kaiserahmed/cs3245-hw

def test_dictionary_to_json_from_json():
    d = Dictionary()
    d.add_term('asdf', 1, 1)
    d.add_term('asdf', 2, 1)
    d.add_term('qwer', 1, 1)
    d.add_term('zxcv', 1, 1)

    d2 = Dictionary.from_json(d.to_json())
    assert_eq(d2.all_docs(), d.all_docs())
    assert_eq(d2.all_terms(), d.all_terms())

    assert_eq(d2.get_frequency('asdf'), d.get_frequency('asdf'))
    assert_eq(d2.get_frequency('qwer'), d.get_frequency('qwer'))
    assert_eq(d2.get_frequency('zxcv'), d.get_frequency('zxcv'))

    assert_eq(d2.get_head('asdf'), d.get_head('asdf'))
    assert_eq(d2.get_head('qwer'), d.get_head('qwer'))
    assert_eq(d2.get_head('zxcv'), d.get_head('zxcv'))

    assert_eq(d2.get_tail('asdf'), d.get_tail('asdf'))
    assert_eq(d2.get_tail('qwer'), d.get_tail('qwer'))
    assert_eq(d2.get_tail('zxcv'), d.get_tail('zxcv'))

Ejemplo n.º 4

Mostrar archivo

def test_dictionary_to_json_from_json():
    d = Dictionary()
    d.add_term('asdf', 1, 1)
    d.add_term('asdf', 2, 1)
    d.add_term('qwer', 1, 1)
    d.add_term('zxcv', 1, 1)

    d2 = Dictionary.from_json(d.to_json())
    assert_eq(d2.all_docs(), d.all_docs())
    assert_eq(d2.all_terms(), d.all_terms())

    assert_eq(d2.get_frequency('asdf'), d.get_frequency('asdf'))
    assert_eq(d2.get_frequency('qwer'), d.get_frequency('qwer'))
    assert_eq(d2.get_frequency('zxcv'), d.get_frequency('zxcv'))

    assert_eq(d2.get_head('asdf'), d.get_head('asdf'))
    assert_eq(d2.get_head('qwer'), d.get_head('qwer'))
    assert_eq(d2.get_head('zxcv'), d.get_head('zxcv'))

    assert_eq(d2.get_tail('asdf'), d.get_tail('asdf'))
    assert_eq(d2.get_tail('qwer'), d.get_tail('qwer'))
    assert_eq(d2.get_tail('zxcv'), d.get_tail('zxcv'))

Ejemplo n.º 5

Mostrar archivo

Archivo: build_index.py Proyecto: kaiserahmed/cs3245-hw

def build(training_dir, dict_file, postings_file):
    dictionary = Dictionary()

    # Read each file in the training dir.
    filepaths = []
    for filename in os.listdir(training_dir):
        filepaths.append(os.path.join(training_dir, filename))

    # Sort the filepaths according to doc_id
    filepaths = sorted(filepaths, key=lambda x: int(os.path.basename(x)))

    # Two loops here to have control over the size of the loop.
    # NOTE(michael): for testing.
    # filepaths = filepaths[:10]

    with PostingsFile(postings_file, mode='w+') as postings_file:
        for filepath in filepaths:
            terms = process_file(filepath)
            # TODO(michael): Making assumption that document is an int.
            doc_id = int(os.path.basename(filepath))

            for term in terms:
                if not dictionary.has_entry(term, doc_id):
                    current_node_location = postings_file.pointer

                    if dictionary.get_frequency(term) != 0:
                        # Update previous node in the linked list.
                        previous_node_location = dictionary.get_tail(term)
                        previous_entry = \
                            postings_file.get_entry(previous_node_location)
                        postings_file.write_entry(
                            previous_entry.doc_id,
                            current_node_location,
                            write_location=previous_node_location)

                    dictionary.add_term(term, doc_id, current_node_location)
                    postings_file.write_entry(
                        doc_id, write_location=current_node_location)

        # Skip pointers
        for term in dictionary.all_terms():
            term_frequency = dictionary.get_frequency(term)
            skip_pointer_frequency = int(math.sqrt(term_frequency))

            # Don't bother if too low.
            if skip_pointer_frequency < SKIP_POINTER_THRESHOLD:
                continue

            head = dictionary.get_head(term)
            entries = postings_file.get_entry_list_from_pointer(head)

            for idx in xrange(term_frequency):
                if idx % skip_pointer_frequency == 0:
                    skip_to = idx + skip_pointer_frequency

                    # Nothing to point to.
                    if skip_to >= term_frequency:
                        continue

                    current_entry = entries[idx]
                    skip_to_entry = entries[skip_to]

                    # Add skip pointer.
                    postings_file.write_entry(
                        current_entry.doc_id,
                        current_entry.next_pointer,
                        skip_to_entry.own_pointer,
                        skip_to_entry.doc_id,
                        write_location=current_entry.own_pointer)

    # Write dictionary to file.
    with open(dict_file, 'w') as dictionary_file:
        dictionary_file.write(dictionary.to_json())