def similarity(): """ Handles the similarity query page functionality. Returns ranked list of files and their cosine similarities to a comparison document. """ fileManager = session_functions.loadFileManager() labels = fileManager.getActiveLabels() if 'analyoption' not in session: session['analyoption'] = constants.DEFAULT_ANALIZE_OPTIONS if 'uploadname' not in session: session['similarities'] = constants.DEFAULT_SIM_OPTIONS if request.method == 'GET': # 'GET' request occurs when the page is first loaded similaritiesgenerated = False return render_template('similarity.html', labels=labels, docsListScore="", docsListName="", similaritiesgenerated=similaritiesgenerated) if request.method == "POST": # 'POST' request occur when html form is submitted (i.e. 'Get Graphs', 'Download...') docsListScore, docsListName = utility.generateSimilarities(fileManager) similaritiesgenerated = True session_functions.cacheAnalysisOption() session_functions.cacheSimOptions() return render_template('similarity.html', labels=labels, docsListScore=docsListScore, docsListName=docsListName, similaritiesgenerated=similaritiesgenerated)
def statistics(): """ Handles the functionality on the Statistics page ... Note: Returns a response object (often a render_template call) to flask and eventually to the browser. """ fileManager = session_functions.loadFileManager() if request.method == "GET": # "GET" request occurs when the page is first loaded. labels = fileManager.getActiveLabels() # if len(labels) >= 1: #FileInfoDict, corpusInfoDict = fileManager.generateStatistics() # return render_template('statistics.html', labels=labels, FileInfoDict=FileInfoDict, # corpusInfoDict=corpusInfoDict) if 'analyoption' not in session: session['analyoption'] = constants.DEFAULT_ANALIZE_OPTIONS return render_template('statistics.html', labels=labels) if request.method == "POST": normalize = request.form['normalizeType'] labels = fileManager.getActiveLabels() if len(labels) >= 1: FileInfoDict, corpusInfoDict= utility.generateStatistics(fileManager) session_functions.cacheAnalysisOption() return render_template('statistics.html', labels=labels, FileInfoDict=FileInfoDict, corpusInfoDict=corpusInfoDict, normalize=normalize)
def kmeans(): """ Handles the functionality on the kmeans page. It analyzes the various texts and displays the class label of the files. Note: Returns a response object (often a render_template call) to flask and eventually to the browser. """ fileManager = session_functions.loadFileManager() labels = fileManager.getActiveLabels() defaultK = int(len(labels) / 2) if 'analyoption' not in session: session['analyoption'] = constants.DEFAULT_ANALIZE_OPTIONS if 'kmeanoption' not in session: session['kmeanoption'] = constants.DEFAULT_KMEAN_OPTIONS if request.method == 'GET': # 'GET' request occurs when the page is first loaded return render_template('kmeans.html', labels=labels, silhouettescore='', kmeansIndex=[], fileNameStr='', fileNumber=len(labels), KValue=0, defaultK=defaultK, colorChartStr='', kmeansdatagenerated=False) if request.method == "POST": # 'POST' request occur when html form is submitted (i.e. 'Get Graphs', 'Download...') if request.form['viz'] == 'PCA': kmeansIndex, silhouetteScore, fileNameStr, KValue, colorChartStr = utility.generateKMeansPCA(fileManager) session_functions.cacheAnalysisOption() session_functions.cacheKmeanOption() session_functions.saveFileManager(fileManager) return render_template('kmeans.html', labels=labels, silhouettescore=silhouetteScore, kmeansIndex=kmeansIndex, fileNameStr=fileNameStr, fileNumber=len(labels), KValue=KValue, defaultK=defaultK, colorChartStr=colorChartStr, kmeansdatagenerated=True) elif request.form['viz'] == 'Voronoi': kmeansIndex, silhouetteScore, fileNameStr, KValue, colorChartStr, finalPointsList, finalCentroidsList, textData, maxVal = utility.generateKMeansVoronoi(fileManager) session_functions.cacheAnalysisOption() session_functions.cacheKmeanOption() session_functions.saveFileManager(fileManager) return render_template('kmeans.html', labels=labels, silhouettescore=silhouetteScore, kmeansIndex=kmeansIndex, fileNameStr=fileNameStr, fileNumber=len(labels), KValue=KValue, defaultK=defaultK, colorChartStr=colorChartStr, finalPointsList=finalPointsList, finalCentroidsList=finalCentroidsList, textData=textData, maxVal=maxVal, kmeansdatagenerated=True)
def tokenizer(): """ Handles the functionality on the tokenizer page. It analyzes the texts to produce and send various frequency matrices. Note: Returns a response object (often a render_template call) to flask and eventually to the browser. """ fileManager = session_functions.loadFileManager() if 'analyoption' not in session: session['analyoption'] = constants.DEFAULT_ANALIZE_OPTIONS if 'csvoptions' not in session: session['csvoptions'] = constants.DEFAULT_CSV_OPTIONS if request.method == "GET": # "GET" request occurs when the page is first loaded. labels = fileManager.getActiveLabels() return render_template('tokenizer.html', labels=labels, matrixExist=False) if 'gen-csv' in request.form: # The 'Generate and Visualize Matrix' button is clicked on tokenizer.html. session_functions.cacheAnalysisOption() session_functions.cacheCSVOptions() labels = fileManager.getActiveLabels() matrixTitle, tableStr = utility.generateTokenizeResults(fileManager) session_functions.saveFileManager(fileManager) return render_template('tokenizer.html', labels=labels, matrixTitle=matrixTitle, tableStr=tableStr, matrixExist=True) if 'get-csv' in request.form: # The 'Download Matrix' button is clicked on tokenizer.html. session_functions.cacheAnalysisOption() session_functions.cacheCSVOptions() savePath, fileExtension = utility.generateCSV(fileManager) session_functions.saveFileManager(fileManager) return send_file(savePath, attachment_filename="frequency_matrix" + fileExtension, as_attachment=True)
def topword(): """ Handles the topword page functionality. """ fileManager = session_functions.loadFileManager() labels = fileManager.getActiveLabels() if 'topwordoption' not in session: session['topwordoption'] = constants.DEFAULT_TOPWORD_OPTIONS if 'analyoption' not in session: session['analyoption'] = constants.DEFAULT_ANALIZE_OPTIONS if request.method == 'GET': # 'GET' request occurs when the page is first loaded # get the class label and eliminate the id (this is not the unique id in filemanager) ClassdivisionMap = fileManager.getClassDivisionMap()[1:] # if there is no file active (ClassdivisionMap == []) just jump to the page # notice python eval from right to left # if there is only one chunk then make the default test prop-z for all if ClassdivisionMap != [] and len(ClassdivisionMap[0]) == 1: session['topwordoption']['testMethodType'] = 'pz' session['topwordoption']['testInput'] = 'useAll' return render_template('topword2.html', labels=labels, classmap=ClassdivisionMap, topwordsgenerated='class_div') if request.method == "POST": # 'POST' request occur when html form is submitted (i.e. 'Get Graphs', 'Download...') if request.form['testMethodType'] == 'pz': if request.form['testInput'] == 'useclass': result = utility.GenerateZTestTopWord(fileManager) # only give the user a preview of the topWord for key in result.keys(): if len(result[key]) > 20: result.update({key: result[key][:20]}) session_functions.cacheAnalysisOption() session_functions.cacheTopwordOptions() return render_template('topword2.html', result=result, labels=labels, topwordsgenerated='pz_class') else: result = utility.GenerateZTestTopWord(fileManager) print result[0] # only give the user a preview of the topWord for i in range(len(result)): if len(result[i][1]) > 20: result[i][1] = result[i][1][:20] print result[1] session_functions.cacheAnalysisOption() session_functions.cacheTopwordOptions() return render_template('topword2.html', result=result, labels=labels, topwordsgenerated='pz_all') else: result = utility.generateKWTopwords(fileManager) print result # only give the user a preview of the topWord if len(result) > 50: result = result[:50] session_functions.cacheAnalysisOption() session_functions.cacheTopwordOptions() return render_template('topword2.html', result=result, labels=labels, topwordsgenerated='KW')
def hierarchy(): """ Handles the functionality on the hierarchy page. It analyzes the various texts and displays a dendrogram. Note: Returns a response object (often a render_template call) to flask and eventually to the browser. """ fileManager = session_functions.loadFileManager() leq = '≤'.decode('utf-8') if 'analyoption' not in session: session['analyoption'] = constants.DEFAULT_ANALIZE_OPTIONS if 'hierarchyoption' not in session: session['hierarchyoption'] = constants.DEFAULT_HIERARCHICAL_OPTIONS if request.method == "GET": # "GET" request occurs when the page is first loaded. labels = fileManager.getActiveLabels() thresholdOps = {} return render_template('hierarchy.html', labels=labels, thresholdOps=thresholdOps) if 'dendro_download' in request.form: # The 'Download Dendrogram' button is clicked on hierarchy.html. # sends pdf file to downloads folder. utility.generateDendrogram(fileManager) attachmentname = "den_" + request.form['title'] + ".pdf" if request.form['title'] != '' else 'dendrogram.pdf' session_functions.cacheAnalysisOption() session_functions.cacheHierarchyOption() return send_file(pathjoin(session_functions.session_folder(), constants.RESULTS_FOLDER + "dendrogram.pdf"), attachment_filename=attachmentname, as_attachment=True) if 'dendroSVG_download' in request.form: utility.generateDendrogram(fileManager) attachmentname = "den_" + request.form['title'] + ".svg" if request.form['title'] != '' else 'dendrogram.svg' session_functions.cacheAnalysisOption() session_functions.cacheHierarchyOption() return send_file(pathjoin(session_functions.session_folder(), constants.RESULTS_FOLDER + "dendrogram.svg"), attachment_filename=attachmentname, as_attachment=True) if 'getdendro' in request.form: # The 'Get Dendrogram' button is clicked on hierarchy.html. pdfPageNumber, score, inconsistentMax, maxclustMax, distanceMax, distanceMin, monocritMax, monocritMin, threshold = utility.generateDendrogram(fileManager) session['dengenerated'] = True labels = fileManager.getActiveLabels() inconsistentOp = "0 " + leq + " t " + leq + " " + str(inconsistentMax) maxclustOp = "2 " + leq + " t " + leq + " " + str(maxclustMax) distanceOp = str(distanceMin) + " " + leq + " t " + leq + " " + str(distanceMax) monocritOp = str(monocritMin) + " " + leq + " t " + leq + " " + str(monocritMax) thresholdOps = {"inconsistent": inconsistentOp, "maxclust": maxclustOp, "distance": distanceOp, "monocrit": monocritOp} session_functions.saveFileManager(fileManager) session_functions.cacheAnalysisOption() session_functions.cacheHierarchyOption() return render_template('hierarchy.html', labels=labels, pdfPageNumber=pdfPageNumber, score=score, inconsistentMax=inconsistentMax, maxclustMax=maxclustMax, distanceMax=distanceMax, distanceMin=distanceMin, monocritMax=monocritMax, monocritMin=monocritMin, threshold=threshold, thresholdOps=thresholdOps)