Exemple #1
0
def cut():
    """
    Handles the functionality of the cut page. It cuts the files into various segments
    depending on the specifications chosen by the user, and sends the text segments.
    Note: Returns a response object (often a render_template call) to flask and eventually
          to the browser.
    """
    fileManager = session_functions.loadFileManager()
    if request.method == "GET":

        # "GET" request occurs when the page is first loaded.
        if 'cuttingoptions' not in session:
            session['cuttingoptions'] = constants.DEFAULT_CUT_OPTIONS

        previews = fileManager.getPreviewsOfActive()

        return render_template('cut.html', previews=previews, num_active_files=len(previews))

    if 'preview' in request.form or 'apply' in request.form:

        # The 'Preview Cuts' or 'Apply Cuts' button is clicked on cut.html.
        session_functions.cacheCuttingOptions()

        savingChanges = True if 'apply' in request.form else False  # Saving changes only if apply in request form
        previews = fileManager.cutFiles(savingChanges=savingChanges)

        if savingChanges:
            session_functions.saveFileManager(fileManager)
            
        return render_template('cut.html', previews=previews, num_active_files=len(previews))

    if 'downloadchunks' in request.form:
        # The 'Download Segmented Files' button is clicked on cut.html
        # sends zipped files to downloads folder
        return fileManager.zipActiveFiles('cut_files.zip')
Exemple #2
0
def kmeans():
    """
    Handles the functionality on the kmeans page. It analyzes the various texts and
    displays the class label of the files.
    Note: Returns a response object (often a render_template call) to flask and eventually
          to the browser.
    """

    fileManager = session_functions.loadFileManager()
    labels = fileManager.getActiveLabels()
    defaultK = int(len(labels)/2)

    if request.method == 'GET':
        # 'GET' request occurs when the page is first loaded

        session['kmeansdatagenerated'] = False

        return render_template('kmeans.html', labels=labels, silhouettescore='', kmeansIndex=[], fileNameStr='', fileNumber=len(labels), KValue=0, defaultK=defaultK)

    if request.method == "POST":
        # 'POST' request occur when html form is submitted (i.e. 'Get Graphs', 'Download...')

        session['kmeansdatagenerated'] = True

        kmeansIndex, silhouetteScore, fileNameStr, KValue = fileManager.generateKMeans()

        session_functions.saveFileManager(fileManager)
        return render_template('kmeans.html', labels=labels, silhouettescore=silhouetteScore, kmeansIndex=kmeansIndex, fileNameStr=fileNameStr, fileNumber=len(labels), KValue=KValue, defaultK=defaultK)
Exemple #3
0
def csvgenerator():
    """
    Handles the functionality on the csvgenerator page. It analyzes the texts to produce
    and send various frequency matrices.
    Note: Returns a response object (often a render_template call) to flask and eventually
          to the browser.
    """
    fileManager = session_functions.loadFileManager()

    if request.method == "GET":
        # "GET" request occurs when the page is first loaded.
        if 'csvoptions' not in session:
            session['csvoptions'] = constants.DEFAULT_CSV_OPTIONS

        labels = fileManager.getActiveLabels()
        return render_template('csvgenerator.html', labels=labels)

    if 'get-csv' in request.form:
        #The 'Generate and Download Matrix' button is clicked on csvgenerator.html.
        session_functions.cacheCSVOptions()

        savePath, fileExtension = fileManager.generateCSV()

        session_functions.saveFileManager(fileManager)
        return send_file(savePath, attachment_filename="frequency_matrix"+fileExtension, as_attachment=True)
Exemple #4
0
def hierarchy():
    """
    Handles the functionality on the hierarchy page. It analyzes the various texts and
    displays a dendrogram.
    Note: Returns a response object (often a render_template call) to flask and eventually
          to the browser.
    """
    fileManager = session_functions.loadFileManager()

    ineq = '≤'.decode('utf-8')

    if request.method == "GET":
        # "GET" request occurs when the page is first loaded.
        # if 'dendrogramoptions' not in session: # Default settings
        #     session['dendrogramoptions'] = constants.DEFAULT_DENDRO_OPTIONS

        labels = fileManager.getActiveLabels()
        thresholdOps={}
        return render_template('hierarchy.html', labels=labels, thresholdOps=thresholdOps)

    if 'dendro_download' in request.form:
        # The 'Download Dendrogram' button is clicked on hierarchy.html.
        # sends pdf file to downloads folder.
        attachmentname = "den_"+request.form['title']+".pdf" if request.form['title'] != '' else 'dendrogram.pdf'
        return send_file(pathjoin(session_functions.session_folder(),constants.RESULTS_FOLDER+"dendrogram.pdf"), attachment_filename=attachmentname, as_attachment=True)

    if 'refreshThreshold' in request.form:
        pdfPageNumber, score, inconsistentMax, maxclustMax, distanceMax, distanceMin, monocritMax, monocritMin, threshold = fileManager.generateDendrogram()
        labels = fileManager.getActiveLabels()

        inconsistentOp="0 " + ineq + " t " + ineq + " " + str(inconsistentMax)
        maxclustOp= "2 " + ineq + " t " + ineq + " " + str(maxclustMax)
        distanceOp= str(distanceMin) + " " + ineq + " t " + ineq + " " + str(distanceMax)
        monocritOp= str(monocritMin) + " " + ineq + " t " + ineq + " " + str(monocritMax)

        thresholdOps= {"inconsistent": inconsistentOp,"maxclust":maxclustOp,"distance":distanceOp,"monocrit":monocritOp}

        return render_template('hierarchy.html', labels=labels, inconsistentMax=inconsistentMax, maxclustMax=maxclustMax, distanceMax=distanceMax, distanceMin=distanceMin, monocritMax=monocritMax, monocritMin=monocritMin, threshold=threshold, thresholdOps=thresholdOps, distanceList=distanceList)

    if 'getdendro' in request.form:
        #The 'Get Dendrogram' button is clicked on hierarchy.html.
        pdfPageNumber, score, inconsistentMax, maxclustMax, distanceMax, distanceMin, monocritMax, monocritMin, threshold = fileManager.generateDendrogram()
        session['dengenerated'] = True
        labels = fileManager.getActiveLabels()

        inconsistentOp="0 " + ineq + " t " + ineq + " " + str(inconsistentMax)
        maxclustOp= "2 " + ineq + " t " + " " + str(maxclustMax)
        distanceOp= str(distanceMin) + " " + ineq + " t " + ineq + " " + str(distanceMax)
        monocritOp= str(monocritMin) + " " + ineq + " t " + ineq + " " + str(monocritMax)

        thresholdOps= {"inconsistent": inconsistentOp,"maxclust":maxclustOp,"distance":distanceOp,"monocrit":monocritOp}

        session_functions.saveFileManager(fileManager)

        return render_template('hierarchy.html', labels=labels, pdfPageNumber=pdfPageNumber, score=score, inconsistentMax=inconsistentMax, maxclustMax=maxclustMax, distanceMax=distanceMax, distanceMin=distanceMin, monocritMax=monocritMax, monocritMin=monocritMin, threshold=threshold, thresholdOps=thresholdOps)
Exemple #5
0
def kmeans():
    """
    Handles the functionality on the kmeans page. It analyzes the various texts and
    displays the class label of the files.
    Note: Returns a response object (often a render_template call) to flask and eventually
          to the browser.
    """
    fileManager = session_functions.loadFileManager()
    labels = fileManager.getActiveLabels()
    defaultK = int(len(labels) / 2)
    if 'analyoption' not in session:
        session['analyoption'] = constants.DEFAULT_ANALIZE_OPTIONS
    if 'kmeanoption' not in session:
        session['kmeanoption'] = constants.DEFAULT_KMEAN_OPTIONS

    if request.method == 'GET':
        # 'GET' request occurs when the page is first loaded
        return render_template('kmeans.html', labels=labels, silhouettescore='', kmeansIndex=[], fileNameStr='',
                               fileNumber=len(labels), KValue=0, defaultK=defaultK,
                               colorChartStr='', kmeansdatagenerated=False)

    if request.method == "POST":
        # 'POST' request occur when html form is submitted (i.e. 'Get Graphs', 'Download...')


        if request.form['viz'] == 'PCA':
            kmeansIndex, silhouetteScore, fileNameStr, KValue, colorChartStr = utility.generateKMeansPCA(fileManager)

            session_functions.cacheAnalysisOption()
            session_functions.cacheKmeanOption()
            session_functions.saveFileManager(fileManager)
            return render_template('kmeans.html', labels=labels, silhouettescore=silhouetteScore,
                                   kmeansIndex=kmeansIndex,
                                   fileNameStr=fileNameStr, fileNumber=len(labels), KValue=KValue, defaultK=defaultK,
                                   colorChartStr=colorChartStr, kmeansdatagenerated=True)

        elif request.form['viz'] == 'Voronoi':

            kmeansIndex, silhouetteScore, fileNameStr, KValue, colorChartStr, finalPointsList, finalCentroidsList, textData, maxVal = utility.generateKMeansVoronoi(fileManager)

            session_functions.cacheAnalysisOption()
            session_functions.cacheKmeanOption()
            session_functions.saveFileManager(fileManager)
            return render_template('kmeans.html', labels=labels, silhouettescore=silhouetteScore,
                                   kmeansIndex=kmeansIndex, fileNameStr=fileNameStr, fileNumber=len(labels),
                                   KValue=KValue, defaultK=defaultK, colorChartStr=colorChartStr,
                                   finalPointsList=finalPointsList, finalCentroidsList=finalCentroidsList,
                                   textData=textData, maxVal=maxVal, kmeansdatagenerated=True)
Exemple #6
0
def select():
    """
    Handles the functionality of the select page. Its primary role is to activate/deactivate
    specific files depending on the user's input.

    Note: Returns a response object (often a render_template call) to flask and eventually
          to the browser.
    """
    fileManager = session_functions.loadFileManager() # Usual loading of the FileManager

    if request.method == "GET":

        activePreviews = fileManager.getPreviewsOfActive()
        inactivePreviews = fileManager.getPreviewsOfInactive()

        return render_template('select.html', activeFiles=activePreviews, inactiveFiles=inactivePreviews)

    if 'toggleFile' in request.headers:
        # Catch-all for any POST request.
        # On the select page, POSTs come from JavaScript AJAX XHRequests.
        fileID = int(request.data)

        fileManager.toggleFile(fileID) # Toggle the file from active to inactive or vice versa

    elif 'setLabel' in request.headers:
        newLabel = (request.headers['setLabel']).decode('utf-8')
        fileID = int(request.data)

        fileManager.files[fileID].label = newLabel

    elif 'disableAll' in request.headers:
        fileManager.disableAll()

    elif 'selectAll' in request.headers:
        fileManager.enableAll()

    elif 'applyClassLabel' in request.headers:
        fileManager.classifyActiveFiles()

    elif 'deleteActive' in request.headers:
        fileManager.deleteActiveFiles()
    
    session_functions.saveFileManager(fileManager)

    return '' # Return an empty string because you have to return something
Exemple #7
0
def tokenizer():
    """
    Handles the functionality on the tokenize page. It analyzes the texts to produce
    and send various frequency matrices.
    Note: Returns a response object (often a render_template call) to flask and eventually
          to the browser.
    """
    fileManager = session_functions.loadFileManager()

    if request.method == "GET":
        # "GET" request occurs when the page is first loaded.
        if 'csvoptions' not in session:
            session['csvoptions'] = constants.DEFAULT_CSV_OPTIONS

        labels = fileManager.getActiveLabels()
        return render_template('tokenizer.html', labels=labels)

    if 'gen-csv' in request.form:
        #The 'Generate and Visualize Matrix' button is clicked on tokenizer.html.
        DocTermSparseMatrix, countMatrix = fileManager.generateCSVMatrix(roundDecimal=True)
        countMatrix = zip(*countMatrix)

        dtm = []
        for row in xrange(1,len(countMatrix)):
            dtm.append(list(countMatrix[row]))
        matrixTitle = list(countMatrix[0])
        matrixTitle[0] = "Token"
        matrixTitle[0] = matrixTitle[0].encode("utf-8")

        labels = fileManager.getActiveLabels()
        session_functions.saveFileManager(fileManager)

        return render_template('tokenizer.html', labels=labels, matrixData=dtm, matrixTitle=matrixTitle, matrixExist=True)

    if 'get-csv' in request.form:
        #The 'Download Matrix' button is clicked on tokenizer.html.
        session_functions.cacheCSVOptions()
        savePath, fileExtension = fileManager.generateCSV()
        session_functions.saveFileManager(fileManager)


        return send_file(savePath, attachment_filename="frequency_matrix"+fileExtension, as_attachment=True)
Exemple #8
0
def upload():
    """
    Handles the functionality of the upload page. It uploads files to be used
    in the current session.

    Note: Returns a response object (often a render_template call) to flask and eventually
          to the browser.
    """
    if request.method == "GET":
        return render_template('upload.html')

    if 'X_FILENAME' in request.headers: # X_FILENAME is the flag to signify a file upload
        # File upload through javascript
        fileManager = session_functions.loadFileManager()

        fileName = request.headers['X_FILENAME'] # Grab the filename, which will be UTF-8 percent-encoded (e.g. '%E7' instead of python's '\xe7')
        if isinstance(fileName, unicode): # If the filename comes through as unicode
            fileName = fileName.encode('ascii') # Convert to an ascii string

        fileName = unquote(fileName).decode('utf-8') # Unquote using urllib's percent-encoding decoder (turns '%E7' into '\xe7'), then deocde it

        # detect (and apply) the encoding type of the file's contents
        # since chardet runs slow, initially detect (only) first 500 chars; 
        # if that fails, chardet entire file for a fuller test
        try:
            encodingDetect = chardet.detect(request.data[:500]) # Detect the encoding from the first 500 characters
            encodingType   = encodingDetect['encoding']
        
            fileString = request.data.decode(encodingType) # Grab the file contents, which were encoded/decoded automatically into python's format
        except:
            encodingDetect = chardet.detect(request.data) # :( ... ok, detect the encoding from entire file
            encodingType   = encodingDetect['encoding']
        
            fileString = request.data.decode(encodingType) # Grab the file contents, which were encoded/decoded automatically into python's format

        fileManager.addFile(fileName, fileString) # Add the file to the FileManager

        session_functions.saveFileManager(fileManager)

        return 'success'
Exemple #9
0
def scrub():
    """
    Handles the functionality of the scrub page. It scrubs the files depending on the
    specifications chosen by the user, with an option to download the scrubbed files.

    Note: Returns a response object (often a render_template call) to flask and eventually
          to the browser.
    """
    fileManager = session_functions.loadFileManager()

    if request.method == "GET":
        # "GET" request occurs when the page is first loaded.
        if 'scrubbingoptions' not in session:
            session['scrubbingoptions'] = constants.DEFAULT_SCRUB_OPTIONS

        previews = fileManager.getPreviewsOfActive()
        tagsPresent, DOEPresent = fileManager.checkActivesTags()

        return render_template('scrub.html', previews=previews, haveTags=tagsPresent, haveDOE=DOEPresent)

    if 'preview' in request.form or 'apply' in request.form:
        #The 'Preview Scrubbing' or 'Apply Scrubbing' button is clicked on scrub.html.
        session_functions.cacheAlterationFiles()
        session_functions.cacheScrubOptions()

        # saves changes only if 'Apply Scrubbing' button is clicked
        savingChanges = True if 'apply' in request.form else False

        previews = fileManager.scrubFiles(savingChanges=savingChanges)
        tagsPresent, DOEPresent = fileManager.checkActivesTags()

        if savingChanges:
            session_functions.saveFileManager(fileManager)

        return render_template('scrub.html', previews=previews, haveTags=tagsPresent, haveDOE=DOEPresent)

    if 'download' in request.form:
        # The 'Download Scrubbed Files' button is clicked on scrub.html.
        # sends zipped files to downloads folder.
        return fileManager.zipActiveFiles('scrubbed.zip')
Exemple #10
0
def tokenizer():
    """
    Handles the functionality on the tokenizer page. It analyzes the texts to produce
    and send various frequency matrices.
    Note: Returns a response object (often a render_template call) to flask and eventually
          to the browser.
    """
    fileManager = session_functions.loadFileManager()
    if 'analyoption' not in session:
        session['analyoption'] = constants.DEFAULT_ANALIZE_OPTIONS
    if 'csvoptions' not in session:
        session['csvoptions'] = constants.DEFAULT_CSV_OPTIONS

    if request.method == "GET":
        # "GET" request occurs when the page is first loaded.
        labels = fileManager.getActiveLabels()
        return render_template('tokenizer.html', labels=labels, matrixExist=False)

    if 'gen-csv' in request.form:
        # The 'Generate and Visualize Matrix' button is clicked on tokenizer.html.
        session_functions.cacheAnalysisOption()
        session_functions.cacheCSVOptions()
        labels = fileManager.getActiveLabels()

        matrixTitle, tableStr = utility.generateTokenizeResults(fileManager)
        session_functions.saveFileManager(fileManager)

        return render_template('tokenizer.html', labels=labels, matrixTitle=matrixTitle,
                               tableStr=tableStr, matrixExist=True)

    if 'get-csv' in request.form:
        # The 'Download Matrix' button is clicked on tokenizer.html.
        session_functions.cacheAnalysisOption()
        session_functions.cacheCSVOptions()
        savePath, fileExtension = utility.generateCSV(fileManager)
        session_functions.saveFileManager(fileManager)

        return send_file(savePath, attachment_filename="frequency_matrix" + fileExtension, as_attachment=True)
Exemple #11
0
def upload():
    """
    Handles the functionality of the upload page. It uploads files to be used
    in the current session.
    Note: Returns a response object (often a render_template call) to flask and eventually
          to the browser.
    """
    if request.method == "GET":
        return render_template('upload.html', MAX_FILE_SIZE=constants.MAX_FILE_SIZE,
                               MAX_FILE_SIZE_INT=constants.MAX_FILE_SIZE_INT,
                               MAX_FILE_SIZE_UNITS=constants.MAX_FILE_SIZE_UNITS)

    if 'X_FILENAME' in request.headers:  # X_FILENAME is the flag to signify a file upload
        # File upload through javascript
        fileManager = session_functions.loadFileManager()

        # --- check file name ---
        fileName = request.headers[
            'X_FILENAME']  # Grab the filename, which will be UTF-8 percent-encoded (e.g. '%E7' instead of python's '\xe7')
        if isinstance(fileName, unicode):  # If the filename comes through as unicode
            fileName = fileName.encode('ascii')  # Convert to an ascii string
        fileName = unquote(fileName).decode(
            'utf-8')  # Unquote using urllib's percent-encoding decoder (turns '%E7' into '\xe7'), then deocde it
        # --- end check file name ---

        if fileName.endswith('.lexos'):
            fileManager.handleUploadWorkSpace()

            # update filemanager
            fileManager = session_functions.loadFileManager()
            fileManager.updateWorkspace()

        else:
            fileManager.addUploadFile(request.data, fileName)

        session_functions.saveFileManager(fileManager)
        return 'success'
Exemple #12
0
def select():
    """
    Handles the functionality of the select page. Its primary role is to activate/deactivate
    specific files depending on the user's input.
    Note: Returns a response object (often a render_template call) to flask and eventually
          to the browser.
    """
    fileManager = session_functions.loadFileManager()  # Usual loading of the FileManager

    if request.method == "GET":

        rows = fileManager.getPreviewsOfAll()
        for row in rows:
            if row["state"] == True:
                row["state"] = "DTTT_selected selected"
            else:
                row["state"] = ""

        return render_template('select.html', rows=rows)

    if 'previewTest' in request.headers:
        fileID = int(request.data)
        fileLabel = fileManager.files[fileID].label
        filePreview = fileManager.files[fileID].getPreview()
        previewVals = {"id": fileID, "label": fileLabel, "previewText": filePreview}
        import json

        return json.dumps(previewVals)

    if 'toggleFile' in request.headers:
        # Catch-all for any POST request.
        # On the select page, POSTs come from JavaScript AJAX XHRequests.
        fileID = int(request.data)

        fileManager.toggleFile(fileID)  # Toggle the file from active to inactive or vice versa

    elif 'setLabel' in request.headers:
        newName = (request.headers['setLabel']).decode('utf-8')
        fileID = int(request.data)

        fileManager.files[fileID].setName(newName)
        fileManager.files[fileID].label = newName

    elif 'setClass' in request.headers:
        newClassLabel = (request.headers['setClass']).decode('utf-8')
        fileID = int(request.data)
        fileManager.files[fileID].setClassLabel(newClassLabel)

    elif 'disableAll' in request.headers:
        fileManager.disableAll()

    elif 'selectAll' in request.headers:
        fileManager.enableAll()

    elif 'applyClassLabel' in request.headers:
        fileManager.classifyActiveFiles()

    elif 'deleteActive' in request.headers:
        fileManager.deleteActiveFiles()

    elif 'deleteRow' in request.headers:
        fileManager.deleteFiles(request.form.keys())  # delete the file in request.form

    session_functions.saveFileManager(fileManager)
    return ''  # Return an empty string because you have to return something