def statistics(): """ Handles the functionality on the Statistics page ... Note: Returns a response object (often a render_template call) to flask and eventually to the browser. """ fileManager = managers.utility.loadFileManager() labels = fileManager.getActiveLabels() if request.method == "GET": # "GET" request occurs when the page is first loaded. if 'analyoption' not in session: session['analyoption'] = constants.DEFAULT_ANALIZE_OPTIONS if 'statisticoption' not in session: session['statisticoption'] = {'segmentlist': map(unicode, fileManager.files.keys())} # default is all on return render_template('statistics.html', labels=labels, labels2=labels) if request.method == "POST": token = request.form['tokenType'] FileInfoDict, corpusInfoDict = utility.generateStatistics(fileManager) session_manager.cacheAnalysisOption() session_manager.cacheStatisticOption() # DO NOT save fileManager! return render_template('statistics.html', labels=labels, FileInfoDict=FileInfoDict, corpusInfoDict=corpusInfoDict, token= token)
def kmeans(): """ Handles the functionality on the kmeans page. It analyzes the various texts and displays the class label of the files. Note: Returns a response object (often a render_template call) to flask and eventually to the browser. """ fileManager = managers.utility.loadFileManager() labels = fileManager.getActiveLabels() defaultK = int(len(labels) / 2) if request.method == 'GET': # 'GET' request occurs when the page is first loaded if 'analyoption' not in session: session['analyoption'] = constants.DEFAULT_ANALIZE_OPTIONS if 'kmeanoption' not in session: session['kmeanoption'] = constants.DEFAULT_KMEAN_OPTIONS return render_template('kmeans.html', labels=labels, silhouettescore='', kmeansIndex=[], fileNameStr='', fileNumber=len(labels), KValue=0, defaultK=defaultK, colorChartStr='', kmeansdatagenerated=False) if request.method == "POST": # 'POST' request occur when html form is submitted (i.e. 'Get Graphs', 'Download...') if request.form['viz'] == 'PCA': kmeansIndex, silhouetteScore, fileNameStr, KValue, colorChartStr = utility.generateKMeansPCA(fileManager) session_manager.cacheAnalysisOption() session_manager.cacheKmeanOption() managers.utility.saveFileManager(fileManager) return render_template('kmeans.html', labels=labels, silhouettescore=silhouetteScore, kmeansIndex=kmeansIndex, fileNameStr=fileNameStr, fileNumber=len(labels), KValue=KValue, defaultK=defaultK, colorChartStr=colorChartStr, kmeansdatagenerated=True) elif request.form['viz'] == 'Voronoi': kmeansIndex, silhouetteScore, fileNameStr, KValue, colorChartStr, finalPointsList, finalCentroidsList, textData, maxVal = utility.generateKMeansVoronoi( fileManager) session_manager.cacheAnalysisOption() session_manager.cacheKmeanOption() managers.utility.saveFileManager(fileManager) return render_template('kmeans.html', labels=labels, silhouettescore=silhouetteScore, kmeansIndex=kmeansIndex, fileNameStr=fileNameStr, fileNumber=len(labels), KValue=KValue, defaultK=defaultK, colorChartStr=colorChartStr, finalPointsList=finalPointsList, finalCentroidsList=finalCentroidsList, textData=textData, maxVal=maxVal, kmeansdatagenerated=True)
def tokenizer(): """ Handles the functionality on the tokenizer page. It analyzes the texts to produce and send various frequency matrices. Note: Returns a response object (often a render_template call) to flask and eventually to the browser. """ fileManager = managers.utility.loadFileManager() if request.method == "GET": if 'analyoption' not in session: session['analyoption'] = constants.DEFAULT_ANALIZE_OPTIONS if 'csvoptions' not in session: session['csvoptions'] = constants.DEFAULT_CSV_OPTIONS # "GET" request occurs when the page is first loaded. labels = fileManager.getActiveLabels() return render_template('tokenizer.html', labels=labels, matrixExist=False) if 'gen-csv' in request.form: # The 'Generate and Visualize Matrix' button is clicked on tokenizer.html. session_manager.cacheAnalysisOption() session_manager.cacheCSVOptions() labels = fileManager.getActiveLabels() matrixTitle, tableStr = utility.generateTokenizeResults(fileManager) managers.utility.saveFileManager(fileManager) return render_template('tokenizer.html', labels=labels, matrixTitle=matrixTitle, tableStr=tableStr, matrixExist=True) if 'get-csv' in request.form: # The 'Download Matrix' button is clicked on tokenizer.html. session_manager.cacheAnalysisOption() session_manager.cacheCSVOptions() savePath, fileExtension = utility.generateCSV(fileManager) managers.utility.saveFileManager(fileManager) return send_file(savePath, attachment_filename="frequency_matrix" + fileExtension, as_attachment=True)
def similarity(): """ Handles the similarity query page functionality. Returns ranked list of files and their cosine similarities to a comparison document. """ fileManager = managers.utility.loadFileManager() encodedLabels = {} labels = fileManager.getActiveLabels() for i in labels: encodedLabels[str(i)] = labels[i].encode("utf-8") if request.method == 'GET': # 'GET' request occurs when the page is first loaded if 'analyoption' not in session: session['analyoption'] = constants.DEFAULT_ANALIZE_OPTIONS if 'similarities' not in session: session['similarities'] = constants.DEFAULT_SIM_OPTIONS return render_template('similarity.html', labels=labels, encodedLabels=encodedLabels, docsListScore="", docsListName="", similaritiesgenerated=False) if 'gen-sims'in request.form: # 'POST' request occur when html form is submitted (i.e. 'Get Graphs', 'Download...') docsListScore, docsListName = utility.generateSimilarities(fileManager) session_manager.cacheAnalysisOption() session_manager.cacheSimOptions() return render_template('similarity.html', labels=labels, encodedLabels=encodedLabels, docsListScore=docsListScore, docsListName=docsListName, similaritiesgenerated=True) if 'get-sims' in request.form: # The 'Download Matrix' button is clicked on similarity.html. session_manager.cacheAnalysisOption() session_manager.cacheSimOptions() savePath, fileExtension = utility.generateSimsCSV(fileManager) managers.utility.saveFileManager(fileManager) return send_file(savePath, attachment_filename="similarity-query" + fileExtension, as_attachment=True)
def topword(): """ Handles the topword page functionality. """ fileManager = managers.utility.loadFileManager() labels = fileManager.getActiveLabels() if request.method == 'GET': # 'GET' request occurs when the page is first loaded if 'topwordoption' not in session: session['topwordoption'] = constants.DEFAULT_TOPWORD_OPTIONS if 'analyoption' not in session: session['analyoption'] = constants.DEFAULT_ANALIZE_OPTIONS # get the class label and eliminate the id (this is not the unique id in filemanager) ClassdivisionMap = fileManager.getClassDivisionMap()[1:] # if there is no file active (ClassdivisionMap == []) just jump to the page # notice python eval from right to left # if there is only one chunk then make the default test prop-z for all if ClassdivisionMap != [] and len(ClassdivisionMap[0]) == 1: session['topwordoption']['testMethodType'] = 'pz' session['topwordoption']['testInput'] = 'useAll' return render_template('topword.html', labels=labels, classmap=ClassdivisionMap, topwordsgenerated='class_div') if request.method == "POST": # 'POST' request occur when html form is submitted (i.e. 'Get Graphs', 'Download...') if request.form['testMethodType'] == 'pz': if request.form['testInput'] == 'useclass': # prop-z test for class result = utility.GenerateZTestTopWord(fileManager) # get the topword test result if 'get-topword' in request.form: # download topword path = utility.getTopWordCSV(result, 'pzClass') session_manager.cacheAnalysisOption() session_manager.cacheTopwordOptions() return send_file(path, attachment_filename=constants.TOPWORD_CSV_FILE_NAME, as_attachment=True) else: # only give the user a preview of the topWord for key in result.keys(): if len(result[key]) > 20: result.update({key: result[key][:20]}) session_manager.cacheAnalysisOption() session_manager.cacheTopwordOptions() return render_template('topword.html', result=result, labels=labels, topwordsgenerated='pz_class', classmap=[]) else: # prop-z test for all result = utility.GenerateZTestTopWord(fileManager) # get the topword test result if 'get-topword' in request.form: # download topword path = utility.getTopWordCSV(result, 'pzAll') session_manager.cacheAnalysisOption() session_manager.cacheTopwordOptions() return send_file(path, attachment_filename=constants.TOPWORD_CSV_FILE_NAME, as_attachment=True) else: # only give the user a preview of the topWord for i in range(len(result)): if len(result[i][1]) > 20: result[i][1] = result[i][1][:20] session_manager.cacheAnalysisOption() session_manager.cacheTopwordOptions() return render_template('topword.html', result=result, labels=labels, topwordsgenerated='pz_all', classmap=[]) else: # Kruskal-Wallis test result = utility.generateKWTopwords(fileManager) # get the topword test result if 'get-topword' in request.form: # download topword path = utility.getTopWordCSV(result, 'KW') session_manager.cacheAnalysisOption() session_manager.cacheTopwordOptions() return send_file(path, attachment_filename=constants.TOPWORD_CSV_FILE_NAME, as_attachment=True) else: # only give the user a preview of the topWord result = result[:50] if len(result) > 50 else result session_manager.cacheAnalysisOption() session_manager.cacheTopwordOptions() return render_template('topword.html', result=result, labels=labels, topwordsgenerated='KW', classmap=[])
def hierarchy(): """ Handles the functionality on the hierarchy page. It analyzes the various texts and displays a dendrogram. Note: Returns a response object (often a render_template call) to flask and eventually to the browser. """ fileManager = managers.utility.loadFileManager() leq = '≤'.decode('utf-8') if request.method == "GET": # "GET" request occurs when the page is first loaded. if 'analyoption' not in session: session['analyoption'] = constants.DEFAULT_ANALIZE_OPTIONS if 'hierarchyoption' not in session: session['hierarchyoption'] = constants.DEFAULT_HIERARCHICAL_OPTIONS labels = fileManager.getActiveLabels() thresholdOps = {} return render_template('hierarchy.html', labels=labels, thresholdOps=thresholdOps) if 'dendro_download' in request.form: # The 'Download Dendrogram' button is clicked on hierarchy.html. # sends pdf file to downloads folder. utility.generateDendrogram(fileManager) attachmentname = "den_" + request.form['title'] + ".pdf" if request.form['title'] != '' else 'dendrogram.pdf' session_manager.cacheAnalysisOption() session_manager.cacheHierarchyOption() return send_file(pathjoin(session_manager.session_folder(), constants.RESULTS_FOLDER + "dendrogram.pdf"), attachment_filename=attachmentname, as_attachment=True) if 'dendroSVG_download' in request.form: utility.generateDendrogram(fileManager) attachmentname = "den_" + request.form['title'] + ".svg" if request.form['title'] != '' else 'dendrogram.svg' session_manager.cacheAnalysisOption() session_manager.cacheHierarchyOption() return send_file(pathjoin(session_manager.session_folder(), constants.RESULTS_FOLDER + "dendrogram.svg"), attachment_filename=attachmentname, as_attachment=True) if 'getdendro' in request.form: # The 'Get Dendrogram' button is clicked on hierarchy.html. pdfPageNumber, score, inconsistentMax, maxclustMax, distanceMax, distanceMin, monocritMax, monocritMin, threshold = utility.generateDendrogram( fileManager) session['dengenerated'] = True labels = fileManager.getActiveLabels() inconsistentOp = "0 " + leq + " t " + leq + " " + str(inconsistentMax) maxclustOp = "2 " + leq + " t " + leq + " " + str(maxclustMax) distanceOp = str(distanceMin) + " " + leq + " t " + leq + " " + str(distanceMax) monocritOp = str(monocritMin) + " " + leq + " t " + leq + " " + str(monocritMax) thresholdOps = {"inconsistent": inconsistentOp, "maxclust": maxclustOp, "distance": distanceOp, "monocrit": monocritOp} managers.utility.saveFileManager(fileManager) session_manager.cacheAnalysisOption() session_manager.cacheHierarchyOption() return render_template('hierarchy.html', labels=labels, pdfPageNumber=pdfPageNumber, score=score, inconsistentMax=inconsistentMax, maxclustMax=maxclustMax, distanceMax=distanceMax, distanceMin=distanceMin, monocritMax=monocritMax, monocritMin=monocritMin, threshold=threshold, thresholdOps=thresholdOps)