Esempio n. 1
0
def similarity():
    """
    Handles the similarity query page functionality. Returns ranked list of files and their cosine similarities to a comparison document.
    """

    fileManager = session_functions.loadFileManager()
    labels = fileManager.getActiveLabels()
    if 'analyoption' not in session:
        session['analyoption'] = constants.DEFAULT_ANALIZE_OPTIONS
    if 'uploadname' not in session:
        session['similarities'] = constants.DEFAULT_SIM_OPTIONS

    if request.method == 'GET':
        # 'GET' request occurs when the page is first loaded
        similaritiesgenerated = False
        return render_template('similarity.html', labels=labels, docsListScore="", docsListName="",
                               similaritiesgenerated=similaritiesgenerated)

    if request.method == "POST":
        # 'POST' request occur when html form is submitted (i.e. 'Get Graphs', 'Download...')
        docsListScore, docsListName = utility.generateSimilarities(fileManager)

        similaritiesgenerated = True

        session_functions.cacheAnalysisOption()
        session_functions.cacheSimOptions()
        return render_template('similarity.html', labels=labels, docsListScore=docsListScore, docsListName=docsListName,
                               similaritiesgenerated=similaritiesgenerated)
Esempio n. 2
0
def statistics():
    """
    Handles the functionality on the Statistics page ...
    Note: Returns a response object (often a render_template call) to flask and eventually
          to the browser.
    """
    fileManager = session_functions.loadFileManager()
    if request.method == "GET":
        # "GET" request occurs when the page is first loaded.

        labels = fileManager.getActiveLabels()
        # if len(labels) >= 1:
            #FileInfoDict, corpusInfoDict = fileManager.generateStatistics()

            # return render_template('statistics.html', labels=labels, FileInfoDict=FileInfoDict,
            #                        corpusInfoDict=corpusInfoDict)

        if 'analyoption' not in session:
            session['analyoption'] = constants.DEFAULT_ANALIZE_OPTIONS

        return render_template('statistics.html', labels=labels)

    if request.method == "POST":
        normalize = request.form['normalizeType']
        labels = fileManager.getActiveLabels()
        if len(labels) >= 1:
            FileInfoDict, corpusInfoDict= utility.generateStatistics(fileManager)
            session_functions.cacheAnalysisOption()
            return render_template('statistics.html', labels=labels, FileInfoDict=FileInfoDict,
                                   corpusInfoDict=corpusInfoDict, normalize=normalize)
Esempio n. 3
0
def kmeans():
    """
    Handles the functionality on the kmeans page. It analyzes the various texts and
    displays the class label of the files.
    Note: Returns a response object (often a render_template call) to flask and eventually
          to the browser.
    """
    fileManager = session_functions.loadFileManager()
    labels = fileManager.getActiveLabels()
    defaultK = int(len(labels) / 2)
    if 'analyoption' not in session:
        session['analyoption'] = constants.DEFAULT_ANALIZE_OPTIONS
    if 'kmeanoption' not in session:
        session['kmeanoption'] = constants.DEFAULT_KMEAN_OPTIONS

    if request.method == 'GET':
        # 'GET' request occurs when the page is first loaded
        return render_template('kmeans.html', labels=labels, silhouettescore='', kmeansIndex=[], fileNameStr='',
                               fileNumber=len(labels), KValue=0, defaultK=defaultK,
                               colorChartStr='', kmeansdatagenerated=False)

    if request.method == "POST":
        # 'POST' request occur when html form is submitted (i.e. 'Get Graphs', 'Download...')


        if request.form['viz'] == 'PCA':
            kmeansIndex, silhouetteScore, fileNameStr, KValue, colorChartStr = utility.generateKMeansPCA(fileManager)

            session_functions.cacheAnalysisOption()
            session_functions.cacheKmeanOption()
            session_functions.saveFileManager(fileManager)
            return render_template('kmeans.html', labels=labels, silhouettescore=silhouetteScore,
                                   kmeansIndex=kmeansIndex,
                                   fileNameStr=fileNameStr, fileNumber=len(labels), KValue=KValue, defaultK=defaultK,
                                   colorChartStr=colorChartStr, kmeansdatagenerated=True)

        elif request.form['viz'] == 'Voronoi':

            kmeansIndex, silhouetteScore, fileNameStr, KValue, colorChartStr, finalPointsList, finalCentroidsList, textData, maxVal = utility.generateKMeansVoronoi(fileManager)

            session_functions.cacheAnalysisOption()
            session_functions.cacheKmeanOption()
            session_functions.saveFileManager(fileManager)
            return render_template('kmeans.html', labels=labels, silhouettescore=silhouetteScore,
                                   kmeansIndex=kmeansIndex, fileNameStr=fileNameStr, fileNumber=len(labels),
                                   KValue=KValue, defaultK=defaultK, colorChartStr=colorChartStr,
                                   finalPointsList=finalPointsList, finalCentroidsList=finalCentroidsList,
                                   textData=textData, maxVal=maxVal, kmeansdatagenerated=True)
Esempio n. 4
0
def tokenizer():
    """
    Handles the functionality on the tokenizer page. It analyzes the texts to produce
    and send various frequency matrices.
    Note: Returns a response object (often a render_template call) to flask and eventually
          to the browser.
    """
    fileManager = session_functions.loadFileManager()
    if 'analyoption' not in session:
        session['analyoption'] = constants.DEFAULT_ANALIZE_OPTIONS
    if 'csvoptions' not in session:
        session['csvoptions'] = constants.DEFAULT_CSV_OPTIONS

    if request.method == "GET":
        # "GET" request occurs when the page is first loaded.
        labels = fileManager.getActiveLabels()
        return render_template('tokenizer.html', labels=labels, matrixExist=False)

    if 'gen-csv' in request.form:
        # The 'Generate and Visualize Matrix' button is clicked on tokenizer.html.
        session_functions.cacheAnalysisOption()
        session_functions.cacheCSVOptions()
        labels = fileManager.getActiveLabels()

        matrixTitle, tableStr = utility.generateTokenizeResults(fileManager)
        session_functions.saveFileManager(fileManager)

        return render_template('tokenizer.html', labels=labels, matrixTitle=matrixTitle,
                               tableStr=tableStr, matrixExist=True)

    if 'get-csv' in request.form:
        # The 'Download Matrix' button is clicked on tokenizer.html.
        session_functions.cacheAnalysisOption()
        session_functions.cacheCSVOptions()
        savePath, fileExtension = utility.generateCSV(fileManager)
        session_functions.saveFileManager(fileManager)

        return send_file(savePath, attachment_filename="frequency_matrix" + fileExtension, as_attachment=True)
Esempio n. 5
0
def topword():
    """
    Handles the topword page functionality.
    """
    fileManager = session_functions.loadFileManager()
    labels = fileManager.getActiveLabels()
    if 'topwordoption' not in session:
        session['topwordoption'] = constants.DEFAULT_TOPWORD_OPTIONS
    if 'analyoption' not in session:
        session['analyoption'] = constants.DEFAULT_ANALIZE_OPTIONS

    if request.method == 'GET':
        # 'GET' request occurs when the page is first loaded

        # get the class label and eliminate the id (this is not the unique id in filemanager)
        ClassdivisionMap = fileManager.getClassDivisionMap()[1:]

        # if there is no file active (ClassdivisionMap == []) just jump to the page
            # notice python eval from right to left
        # if there is only one chunk then make the default test prop-z for all
        if ClassdivisionMap != [] and len(ClassdivisionMap[0]) == 1:
            session['topwordoption']['testMethodType'] = 'pz'
            session['topwordoption']['testInput'] = 'useAll'

        return render_template('topword2.html', labels=labels, classmap=ClassdivisionMap, topwordsgenerated='class_div')

    if request.method == "POST":
        # 'POST' request occur when html form is submitted (i.e. 'Get Graphs', 'Download...')
        if request.form['testMethodType'] == 'pz':
            if request.form['testInput'] == 'useclass':

                result = utility.GenerateZTestTopWord(fileManager)

                # only give the user a preview of the topWord
                for key in result.keys():
                    if len(result[key]) > 20:
                        result.update({key: result[key][:20]})

                session_functions.cacheAnalysisOption()
                session_functions.cacheTopwordOptions()
                return render_template('topword2.html', result=result, labels=labels, topwordsgenerated='pz_class')
            else:
                result = utility.GenerateZTestTopWord(fileManager)
                print result[0]

                # only give the user a preview of the topWord
                for i in range(len(result)):
                    if len(result[i][1]) > 20:
                        result[i][1] = result[i][1][:20]
                print result[1]

                session_functions.cacheAnalysisOption()
                session_functions.cacheTopwordOptions()
                return render_template('topword2.html', result=result, labels=labels, topwordsgenerated='pz_all')
        else:
            result = utility.generateKWTopwords(fileManager)
            print result

            # only give the user a preview of the topWord
            if len(result) > 50:
                result = result[:50]

            session_functions.cacheAnalysisOption()
            session_functions.cacheTopwordOptions()
            return render_template('topword2.html', result=result, labels=labels, topwordsgenerated='KW')
Esempio n. 6
0
def hierarchy():
    """
    Handles the functionality on the hierarchy page. It analyzes the various texts and
    displays a dendrogram.
    Note: Returns a response object (often a render_template call) to flask and eventually
          to the browser.
    """
    fileManager = session_functions.loadFileManager()
    leq = '≤'.decode('utf-8')
    if 'analyoption' not in session:
        session['analyoption'] = constants.DEFAULT_ANALIZE_OPTIONS
    if 'hierarchyoption' not in session:
        session['hierarchyoption'] = constants.DEFAULT_HIERARCHICAL_OPTIONS

    if request.method == "GET":
        # "GET" request occurs when the page is first loaded.

        labels = fileManager.getActiveLabels()
        thresholdOps = {}
        return render_template('hierarchy.html', labels=labels, thresholdOps=thresholdOps)

    if 'dendro_download' in request.form:
        # The 'Download Dendrogram' button is clicked on hierarchy.html.
        # sends pdf file to downloads folder.
        utility.generateDendrogram(fileManager)
        attachmentname = "den_" + request.form['title'] + ".pdf" if request.form['title'] != '' else 'dendrogram.pdf'
        session_functions.cacheAnalysisOption()
        session_functions.cacheHierarchyOption()
        return send_file(pathjoin(session_functions.session_folder(), constants.RESULTS_FOLDER + "dendrogram.pdf"),
                         attachment_filename=attachmentname, as_attachment=True)

    if 'dendroSVG_download' in request.form:
        utility.generateDendrogram(fileManager)
        attachmentname = "den_" + request.form['title'] + ".svg" if request.form['title'] != '' else 'dendrogram.svg'
        session_functions.cacheAnalysisOption()
        session_functions.cacheHierarchyOption()
        return send_file(pathjoin(session_functions.session_folder(), constants.RESULTS_FOLDER + "dendrogram.svg"),
                         attachment_filename=attachmentname, as_attachment=True)


    if 'getdendro' in request.form:
        # The 'Get Dendrogram' button is clicked on hierarchy.html.

        pdfPageNumber, score, inconsistentMax, maxclustMax, distanceMax, distanceMin, monocritMax, monocritMin, threshold = utility.generateDendrogram(fileManager)
        session['dengenerated'] = True
        labels = fileManager.getActiveLabels()

        inconsistentOp = "0 " + leq + " t " + leq + " " + str(inconsistentMax)
        maxclustOp = "2 " + leq + " t " + leq + " " + str(maxclustMax)
        distanceOp = str(distanceMin) + " " + leq + " t " + leq + " " + str(distanceMax)
        monocritOp = str(monocritMin) + " " + leq + " t " + leq + " " + str(monocritMax)

        thresholdOps = {"inconsistent": inconsistentOp, "maxclust": maxclustOp, "distance": distanceOp,
                        "monocrit": monocritOp}

        session_functions.saveFileManager(fileManager)
        session_functions.cacheAnalysisOption()
        session_functions.cacheHierarchyOption()
        return render_template('hierarchy.html', labels=labels, pdfPageNumber=pdfPageNumber, score=score,
                               inconsistentMax=inconsistentMax, maxclustMax=maxclustMax, distanceMax=distanceMax,
                               distanceMin=distanceMin, monocritMax=monocritMax, monocritMin=monocritMin,
                               threshold=threshold, thresholdOps=thresholdOps)