Python Corpus.add_source_document 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: Corpus

클래스/타입: Corpus

메소드/함수: add_source_document

hotexamples.com에서의 예제들: 2

Python Corpus.add_source_document - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 Corpus.Corpus.add_source_document에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Corpus(30)

find(5)

get_postag_set(4)

read(3)

__init__(2)

verificarPlagio(2)

add_source_document(2)

add_target_document(2)

get_file_name(2)

buildCorpus(2)

emails_as_string(2)

dump(2)

preprocess(2)

get_data(2)

read_ner(2)

outputWords(1)

pickledumpwords(1)

output_rules(1)

ner(1)

outputPOStags(1)

nettoyer_texte(1)

most_frequent_word_by_year(1)

most_frequent_word_by_month(1)

most_frequent_word_by_day(1)

most_frequent_word(1)

most_frequent_trigrams(1)

most_frequent_content_words(1)

picklegetwords(1)

read_label(1)

prepapre_to_matrix(1)

search_ambiguous(1)

vectoriserDocCorpus(1)

url_to_dir(1)

train_word2vec(1)

tag_words_with_most_likely_parses(1)

spanishTags(1)

set_lista_texto(1)

save_json(1)

process(1)

save(1)

results(1)

resetSentStats(1)

read_word2vec(1)

read_prediction(1)

load_json(1)

read_data(1)

most_frequent_bigrams(1)

get_instances(1)

lemmatiserCorpus(1)

calculSimilarite(1)

예제 #1

파일 보기

def create_gannt_corpus_obj():
    corpus = Corpus()
    doc_id = 0

    source_file_directory = 'GANNT/high/'
    target_file_directory = 'GANNT/low/'

    # Gets a list of the source document file names and sort by ID number
    source_file_names = [
        f for f in listdir(source_file_directory)
        if isfile(join(source_file_directory, f))
    ]
    source_file_names.sort()

    # Iterates through the source document files and stores it's contents and names in document dictionary
    # Dictionary's key is an id number that increments with each document.
    for fileName in source_file_names:
        doc_name = fileName.rstrip('.txt')
        temp_document = Document(
            doc_id, doc_name,
            open(source_file_directory + fileName, 'r').read().rstrip("\n"))
        corpus.add_source_document(temp_document)
        doc_id += 1

    # Gets a list of the target document file names and sort by ID number
    target_file_names = [
        f for f in listdir(target_file_directory)
        if isfile(join(target_file_directory, f))
    ]
    target_file_names.sort()

    # Get and add to
    for fileName in target_file_names:
        doc_name = fileName.rstrip('.txt')
        temp_document = Document(
            doc_id, doc_name,
            open(target_file_directory + fileName, 'r').read().rstrip("\n"))
        corpus.add_target_document(temp_document)
        doc_id += 1

    return corpus

예제 #2

파일 보기

def create_icebreaker_corpus_obj():
    source_file = 'IceBreaker/Requirements.csv'
    target_file = 'IceBreaker/ClassDiagram.csv'
    corpus = Corpus()
    index = 0
    with open(source_file) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')

        for name, desc in csv_reader:
            doc = Document(index, name, desc)
            corpus.add_source_document(doc)
        csv_file.close()

    with open(target_file) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')

        for name, desc in csv_reader:
            doc = Document(index, name, desc)
            corpus.add_target_document(doc)
        csv_file.close()

    return corpus