Python Indexer.add_document 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: indexer

클래스/타입: Indexer

메소드/함수: add_document

hotexamples.com에서의 예제들: 2

Python Indexer.add_document - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 indexer.Indexer.add_document에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

add_new_doc(30)

Indexer(30)

create_index(6)

create_unigram_index(3)

calculate_idf(3)

LoadIndexes(3)

close(3)

dump(3)

coords_to_indices(2)

indices_to_coords(2)

calculationSummerize(2)

add_idf_to_dictionary(2)

add_document(2)

LoadDict(2)

fix_inverted_index(2)

finish(2)

evaluate_input(1)

execute(1)

create_save_indexer_with_relevant_docs(1)

entities_and_small_big(1)

directory(1)

delete_dict_after_saving(1)

create_indexer(1)

create_dirs(1)

create_bulk_index_string(1)

finish_index(1)

CreatInvertedIndex(1)

finish_indexing(1)

get_num_spatial_nodes(1)

tokenize(1)

set_idx_fields(1)

process(1)

keys(1)

isStopword(1)

ignore_extensions(1)

get__lda__(1)

fit(1)

getStemmed(1)

getOr(1)

getAnd(1)

get(1)

generate_local_index(1)

create_block(1)

generate_global_index(1)

compute_tf(1)

createIndex(1)

add_square_Wij(1)

bp_index(1)

batch_get_feat_stacked(1)

after_indexing(1)

예제 #1

파일 보기

from crawler import Crawler
from indexer import Indexer
from query_processor import QuerryProcessor
from document import Document
from time import sleep

if __name__ == '__main__':

    # sleep(5.0)
    # print("THREAD-TIME!")
    crawler = Crawler('https://www.in.gr', 20, 5, True, 'BFS')
    crawler.initializeCrawl()

    ind = Indexer(Crawler.documents)

    query = input("Enter your search query:")
    ind.add_document(Document('search_query', query))
    print('Building Indexer...')
    ind.create_indexer()
    print('Calculating TF-IDFs. May take a while.')
    ind.calculate_scores()

    qp = QuerryProcessor(ind.inverted_index, len(ind.documents))
    docs_with_cos_ = qp.compare_documents()
    docs_with_cos_ = sorted(
        docs_with_cos_, key=lambda x: x[1],
        reverse=True)  # sorting based on cosine similarity scores
    print(f'Showing top results based on your query "{query}":')
    for doc in docs_with_cos_:
        print(doc[0].link)

예제 #2

파일 보기

파일: __main__.py 프로젝트: heyarne/Crawler

frontier = Frontier([
    'http://mysql12.f4.htw-berlin.de/crawl/d01.html',
    'http://mysql12.f4.htw-berlin.de/crawl/d06.html',
    'http://mysql12.f4.htw-berlin.de/crawl/d08.html'
])
parser = Parser()
indexer = Indexer()
web_graph = Graph()

for url in frontier:
    # get outgoing links for the graph and content for tokenization
    body, links_on_page = parser.parse(url)

    # add document to indexer
    indexer.add_document(url, body)

    # build our webgraph
    node = web_graph.get_node(url)
    if node is None:
        node = web_graph.add_node(url)

    for out_link in links_on_page:
        web_graph.add_edge(url, out_link)

    # hand links to the frontier to make sure they are all crawled
    frontier.add_urls(links_on_page)

# for node in web_graph:
#     print(node)
#