예제 #1
0
def create_gannt_corpus_obj():
    corpus = Corpus()
    doc_id = 0

    source_file_directory = 'GANNT/high/'
    target_file_directory = 'GANNT/low/'

    # Gets a list of the source document file names and sort by ID number
    source_file_names = [
        f for f in listdir(source_file_directory)
        if isfile(join(source_file_directory, f))
    ]
    source_file_names.sort()

    # Iterates through the source document files and stores it's contents and names in document dictionary
    # Dictionary's key is an id number that increments with each document.
    for fileName in source_file_names:
        doc_name = fileName.rstrip('.txt')
        temp_document = Document(
            doc_id, doc_name,
            open(source_file_directory + fileName, 'r').read().rstrip("\n"))
        corpus.add_source_document(temp_document)
        doc_id += 1

    # Gets a list of the target document file names and sort by ID number
    target_file_names = [
        f for f in listdir(target_file_directory)
        if isfile(join(target_file_directory, f))
    ]
    target_file_names.sort()

    # Get and add to
    for fileName in target_file_names:
        doc_name = fileName.rstrip('.txt')
        temp_document = Document(
            doc_id, doc_name,
            open(target_file_directory + fileName, 'r').read().rstrip("\n"))
        corpus.add_target_document(temp_document)
        doc_id += 1

    return corpus
예제 #2
0
def create_icebreaker_corpus_obj():
    source_file = 'IceBreaker/Requirements.csv'
    target_file = 'IceBreaker/ClassDiagram.csv'
    corpus = Corpus()
    index = 0
    with open(source_file) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')

        for name, desc in csv_reader:
            doc = Document(index, name, desc)
            corpus.add_source_document(doc)
        csv_file.close()

    with open(target_file) as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')

        for name, desc in csv_reader:
            doc = Document(index, name, desc)
            corpus.add_target_document(doc)
        csv_file.close()

    return corpus