def part1(documentPath, maximumDocuments=0): startTime = time.time() print("Executing code for Part 1...\n") print("Extracting data from XML Document...") values = XMLParse(documentPath, maximumDocuments) print("Number of Documents: "+str(len(values))) extractionTime = round(time.time() - startTime, 3) print("Time: " + str(extractionTime) + " seconds") print("Removing stopwords and stemming...") for i in range(len(values)-1, -1, -1): if values[i].hasField('BODY'): values[i].setField('BODY',removeStopwords(values[i].getField("BODY"))) else: del values[i] removingTime = round(time.time() - startTime - extractionTime, 3) print("Time: " + str(removingTime) + " seconds") print("Creating list of all unique words in corpus...") uniqueWords = getUniqueWords(values) uniqueWordsTime = round(time.time() - startTime - extractionTime - removingTime, 3) print("Time: " + str(uniqueWordsTime) + " seconds") print("Computing TF, IDF, and TFIDF...") computedTFIDF = TFIDF(values, uniqueWords) idfTime = round(time.time() - startTime - extractionTime - removingTime - uniqueWordsTime, 3) print("Time: " + str(idfTime) + " seconds") print("Computing Cosine Similarity...") computedTFIDF.calculateCosineSimilarity() #computedTFIDF.printVal('sim', 19) cosineSimTime = round(time.time() - startTime - extractionTime - removingTime - uniqueWordsTime - idfTime, 3) print("Time: " + str(cosineSimTime) + " seconds") print('\nPart 1 Complete') print("Execution Time: " + str(round(time.time() - startTime, 3)) + " seconds\n") return computedTFIDF