def create_gannt_corpus_obj(): corpus = Corpus() doc_id = 0 source_file_directory = 'GANNT/high/' target_file_directory = 'GANNT/low/' # Gets a list of the source document file names and sort by ID number source_file_names = [ f for f in listdir(source_file_directory) if isfile(join(source_file_directory, f)) ] source_file_names.sort() # Iterates through the source document files and stores it's contents and names in document dictionary # Dictionary's key is an id number that increments with each document. for fileName in source_file_names: doc_name = fileName.rstrip('.txt') temp_document = Document( doc_id, doc_name, open(source_file_directory + fileName, 'r').read().rstrip("\n")) corpus.add_source_document(temp_document) doc_id += 1 # Gets a list of the target document file names and sort by ID number target_file_names = [ f for f in listdir(target_file_directory) if isfile(join(target_file_directory, f)) ] target_file_names.sort() # Get and add to for fileName in target_file_names: doc_name = fileName.rstrip('.txt') temp_document = Document( doc_id, doc_name, open(target_file_directory + fileName, 'r').read().rstrip("\n")) corpus.add_target_document(temp_document) doc_id += 1 return corpus
def create_icebreaker_corpus_obj(): source_file = 'IceBreaker/Requirements.csv' target_file = 'IceBreaker/ClassDiagram.csv' corpus = Corpus() index = 0 with open(source_file) as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') for name, desc in csv_reader: doc = Document(index, name, desc) corpus.add_source_document(doc) csv_file.close() with open(target_file) as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') for name, desc in csv_reader: doc = Document(index, name, desc) corpus.add_target_document(doc) csv_file.close() return corpus