Exemple #1
0
def delete_one():
    """:return: string indicating that it has succeeded
    """
    file_manager = utility.load_file_manager()
    file_manager.delete_files([int(request.data)])
    utility.save_file_manager(file_manager)
    return "success"
Exemple #2
0
def init():
    """Initializes new session & creates new session folder & file manager.

    New session initialized using a random id.
    """

    folder_created = False
    while not folder_created:  # Continue to try to make
        try:
            session['id'] = ''.join(
                random.choice(string.ascii_uppercase + string.digits)
                for _ in range(30)
            )

            print('Attempting new id of', session['id'], '...', end=' ')
            os.makedirs(session_folder())
            folder_created = True

        # This except block will be hit if and only if
        # the os.makedirs line throws an exception
        except FileExistsError:
            print('Already in use.')

    # init FileManager
    from lexos.managers.file_manager import FileManager
    from lexos.managers import utility
    # initialize the file manager
    empty_file_manager = FileManager()

    utility.save_file_manager(empty_file_manager)

    print('Initialized new session, session folder, and empty file manager '
          'with id.')
Exemple #3
0
def delete_one():
    """:return: string indicating that it has succeeded
    """
    file_manager = utility.load_file_manager()
    file_manager.delete_files([int(request.data)])
    utility.save_file_manager(file_manager)
    return "success"
Exemple #4
0
def delete_selected():
    """:returns json object with the ids of the files to delete
    """
    file_manager = utility.load_file_manager()
    file_ids = file_manager.delete_active_files()
    utility.save_file_manager(file_manager)
    return json.dumps(file_ids)
Exemple #5
0
def delete_selected():
    """:returns json object with the ids of the files to delete
    """
    file_manager = utility.load_file_manager()
    file_ids = file_manager.delete_active_files()
    utility.save_file_manager(file_manager)
    return json.dumps(file_ids)
Exemple #6
0
def init():
    """Initializes new session & creates new session folder & file manager.

    New session initialized using a random id.
    """

    folder_created = False
    while not folder_created:  # Continue to try to make
        try:
            session['id'] = ''.join(
                random.choice(string.ascii_uppercase + string.digits)
                for _ in range(30))

            print('Attempting new id of', session['id'], '...', end=' ')
            os.makedirs(session_folder())
            folder_created = True

        # This except block will be hit if and only if
        # the os.makedirs line throws an exception
        except FileExistsError:
            print('Already in use.')

    # init FileManager
    from lexos.managers.file_manager import FileManager
    from lexos.managers import utility
    # initialize the file manager
    empty_file_manager = FileManager()

    utility.save_file_manager(empty_file_manager)

    print('Initialized new session, session folder, and empty file manager '
          'with id.')
Exemple #7
0
def disable_rows():
    """:return: string indicating that it has succeeded
    """
    file_manager = utility.load_file_manager()
    for file_id in request.json:
        file_manager.disable_files([file_id, ])
    utility.save_file_manager(file_manager)
    return 'success'
Exemple #8
0
def set_class_selected():
    file_manager = utility.load_file_manager()
    rows = request.json[0]
    new_class_label = request.json[1]
    for fileID in list(rows):
        file_manager.files[int(fileID)].set_class_label(new_class_label)
    utility.save_file_manager(file_manager)
    return json.dumps(rows)
Exemple #9
0
def set_class_selected():
    file_manager = utility.load_file_manager()
    rows = request.json[0]
    new_class_label = request.json[1]
    for file_id in list(rows):
        file_manager.files[int(file_id)].set_class_label(new_class_label)
    utility.save_file_manager(file_manager)
    return json.dumps(rows)
Exemple #10
0
def select_all():
    """selects all files.

    :return: string indicating that it has succeeded
    """
    file_manager = utility.load_file_manager()
    file_manager.enable_all()
    utility.save_file_manager(file_manager)
    return 'success'
Exemple #11
0
def select_all():
    """selects all files.

    :return: string indicating that it has succeeded
    """
    file_manager = utility.load_file_manager()
    file_manager.enable_all()
    utility.save_file_manager(file_manager)
    return 'success'
Exemple #12
0
def disable_rows():
    """:return: string indicating that it has succeeded
    """
    file_manager = utility.load_file_manager()
    for file_id in request.json:
        file_manager.disable_files([
            file_id,
        ])
    utility.save_file_manager(file_manager)
    return 'success'
Exemple #13
0
def set_class():
    """sets a class.

    :return: string indicating that it has succeeded
    """
    file_manager = utility.load_file_manager()
    file_id = int(request.json[0])
    new_class_label = request.json[1]
    file_manager.files[file_id].set_class_label(new_class_label)
    utility.save_file_manager(file_manager)
    return 'success'
Exemple #14
0
def set_class():
    """sets a class.

    :return: string indicating that it has succeeded
    """
    file_manager = utility.load_file_manager()
    file_id = int(request.json[0])
    new_class_label = request.json[1]
    file_manager.files[file_id].set_class_label(new_class_label)
    utility.save_file_manager(file_manager)
    return 'success'
Exemple #15
0
def set_label():
    """sets the label of a file.

    :return: string indicating that it has succeeded
    """
    file_manager = utility.load_file_manager()
    file_id = int(request.json[0])
    new_name = request.json[1]
    file_manager.files[file_id].set_name(new_name)
    file_manager.files[file_id].label = new_name
    utility.save_file_manager(file_manager)
    return 'success'
Exemple #16
0
def set_label():
    """sets the label of a file.

    :return: string indicating that it has succeeded
    """
    file_manager = utility.load_file_manager()
    file_id = int(request.json[0])
    new_name = request.json[1]
    file_manager.files[file_id].set_name(new_name)
    file_manager.files[file_id].label = new_name
    utility.save_file_manager(file_manager)
    return 'success'
Exemple #17
0
def get_tokenizer_csv():
    """Called when the CSV button in Tokenizer is clicked.

    :return: a response object (often a render_template call) to flask and
    eventually to the browser.
    """
    file_manager = utility.load_file_manager()
    session_manager.cache_analysis_option()
    session_manager.cache_csv_options()
    save_path, file_extension = utility.generate_csv(file_manager)
    utility.save_file_manager(file_manager)

    return send_file(save_path,
                     attachment_filename="frequency_matrix" + file_extension,
                     as_attachment=True)
Exemple #18
0
def do_cutting():
    """cuts the files.

    :return: cut files and their preview in a json object
    """
    file_manager = utility.load_file_manager()
    # The 'Preview Cuts' or 'Apply Cuts' button is clicked on cut.html.
    session_manager.cache_cutting_options()
    # Saving changes only if action = apply
    saving_changes = True if request.form['action'] == 'apply' else False
    previews = file_manager.cut_files(saving_changes=saving_changes)
    if saving_changes:
        utility.save_file_manager(file_manager)
    data = {"data": previews}
    data = json.dumps(data)
    return data
def do_cutting():
    """cuts the files.

    :return: cut files and their preview in a json object
    """
    file_manager = utility.load_file_manager()
    # The 'Preview Cuts' or 'Apply Cuts' button is clicked on cut.html.
    session_manager.cache_cutting_options()
    # Saving changes only if action = apply
    saving_changes = True if request.form['action'] == 'apply' else False
    previews = file_manager.cut_files(saving_changes=saving_changes)
    if saving_changes:
        utility.save_file_manager(file_manager)
    data = {"data": previews}
    data = json.dumps(data)
    return data
Exemple #20
0
def upload():
    """Handles the functionality of the upload page.

    It uploads files to be used in the current session.
    :return: a response object (often a render_template call) to flask and
     eventually to the browser.
    """
    # Detect the number of active documents.
    num_active_docs = detect_active_docs()
    if request.method == "GET":
        print("About to fix session in case of browser caching")
        # fix the session in case the browser is caching the old session
        session_manager.fix()
        print("Session fixed. Rendering template.")
        if 'generalsettings' not in session:
            session['generalsettings'] = \
                constants.DEFAULT_GENERALSETTINGS_OPTIONS
        return render_template(
            'upload.html',
            MAX_FILE_SIZE=constants.MAX_FILE_SIZE,
            MAX_FILE_SIZE_INT=constants.MAX_FILE_SIZE_INT,
            MAX_FILE_SIZE_UNITS=constants.MAX_FILE_SIZE_UNITS,
            itm="upload-tool",
            numActiveDocs=num_active_docs)

    # X-FILENAME is the flag to signify a file upload
    if 'X-FILENAME' in request.headers:

        # File upload through javascript
        file_manager = utility.load_file_manager()
        # --- check file name ---
        # Grab the filename, which will be UTF-8 percent-encoded (e.g. '%E7'
        # instead of python's '\xe7')
        file_name = request.headers['X-FILENAME']
        # Unquote using urllib's percent-encoding decoder (turns '%E7' into
        # '\xe7')
        file_name = unquote(file_name)
        # --- end check file name ---
        if file_name.endswith('.lexos'):
            file_manager.handle_upload_workspace()
            # update filemanager
            file_manager = utility.load_file_manager()
            file_manager.update_workspace()
        else:
            file_manager.add_upload_file(request.data, file_name)
        utility.save_file_manager(file_manager)
        return 'success'
Exemple #21
0
def upload():
    """Handles the functionality of the upload page.

    It uploads files to be used in the current session.
    :return: a response object (often a render_template call) to flask and
     eventually to the browser.
    """
    # Detect the number of active documents.
    num_active_docs = detect_active_docs()
    if request.method == "GET":
        print("About to fix session in case of browser caching")
        # fix the session in case the browser is caching the old session
        session_manager.fix()
        print("Session fixed. Rendering template.")
        if 'generalsettings' not in session:
            session['generalsettings'] = \
                constants.DEFAULT_GENERALSETTINGS_OPTIONS
        return render_template(
            'upload.html',
            MAX_FILE_SIZE=constants.MAX_FILE_SIZE,
            MAX_FILE_SIZE_INT=constants.MAX_FILE_SIZE_INT,
            MAX_FILE_SIZE_UNITS=constants.MAX_FILE_SIZE_UNITS,
            itm="upload-tool",
            numActiveDocs=num_active_docs)

    # X-FILENAME is the flag to signify a file upload
    if 'X-FILENAME' in request.headers:

        # File upload through javascript
        file_manager = utility.load_file_manager()
        # --- check file name ---
        # Grab the filename, which will be UTF-8 percent-encoded (e.g. '%E7'
        # instead of python's '\xe7')
        file_name = request.headers['X-FILENAME']
        # Unquote using urllib's percent-encoding decoder (turns '%E7' into
        # '\xe7')
        file_name = unquote(file_name)
        # --- end check file name ---
        if file_name.endswith('.lexos'):
            file_manager.handle_upload_workspace()
            # update filemanager
            file_manager = utility.load_file_manager()
            file_manager.update_workspace()
        else:
            file_manager.add_upload_file(request.data, file_name)
        utility.save_file_manager(file_manager)
        return 'success'
Exemple #22
0
def execute():
    """ Cuts the files.
    :return: Previews of the cut files.
    """

    file_manager = utility.load_file_manager()
    session_manager.cache_cutting_options()

    # Apply the cutting
    save = request.form["action"] == "apply"
    previews = file_manager.cut_files(saving_changes=save)

    # Save the results if requested
    if save:
        utility.save_file_manager(file_manager)

    return json.dumps(previews)
Exemple #23
0
def do_scrubbing():
    """:return: a json object with a scrubbed preview
    """
    file_manager = utility.load_file_manager()
    # The 'Preview Scrubbing' or 'Apply Scrubbing' button is clicked on
    # scrub.html.
    session_manager.cache_alteration_files()
    session_manager.cache_scrub_options()
    # saves changes only if 'Apply Scrubbing' button is clicked
    saving_changes = True if request.form["formAction"] == "apply" else False
    # preview_info is a tuple of (id, file_name(label), class_label, preview)
    previews = file_manager.scrub_files(saving_changes=saving_changes)
    # escape the html elements, only transforms preview[3], because that is
    # the text:
    previews = [
        [preview[0], preview[1], preview[2],
         general_functions.html_escape(preview[3])] for preview in previews]
    if saving_changes:
        utility.save_file_manager(file_manager)
    data = {"data": previews}
    data = json.dumps(data)
    return data
Exemple #24
0
def scrape():
    """ Scrapes the URLs and generates a text file from each URL.
    :return: A list of the scraped files.
    """

    urls = request.json
    urls = urls.strip()
    urls = urls.replace(',', '\n')  # Replace commas with line breaks
    urls = re.sub(r"\s+", '\n', urls)  # Get rid of extra white space
    urls = urls.split('\n')
    file_manager = utility.load_file_manager()

    scraped_files = []
    for i, url in enumerate(urls):
        response = requests.get(url)
        file_name = "url" + str(i) + ".txt"
        scraped_files.append(file_name)
        file_manager.add_upload_file(response.text, file_name)

    utility.save_file_manager(file_manager)

    return jsonify(scraped_files)
Exemple #25
0
def scrape():
    """scraps the urls an generates text file from each url.

    :return: json object with a string that indicates that is has succeeded
    """
    # Detect the number of active documents.
    num_active_docs = detect_active_docs()
    if request.method == "GET":
        return render_template('scrape.html', numActiveDocs=num_active_docs)
    if request.method == "POST":
        import requests
        urls = request.json["urls"]
        urls = urls.strip()
        urls = urls.replace(",", "\n")  # Replace commas with line breaks
        urls = re.sub(r"\s+", "\n", urls)  # Get rid of extra white space
        urls = urls.split("\n")
        file_manager = utility.load_file_manager()
        for i, url in enumerate(urls):
            r = requests.get(url)
            file_manager.add_upload_file(r.text, "url" + str(i) + ".txt")
        utility.save_file_manager(file_manager)
        response = "success"
        return json.dumps(response)
def do_scrubbing():
    """:return: a json object with a scrubbed preview
    """
    file_manager = utility.load_file_manager()
    # The 'Preview Scrubbing' or 'Apply Scrubbing' button is clicked on
    # scrub.html.
    session_manager.cache_alteration_files()
    session_manager.cache_scrub_options()
    # saves changes only if 'Apply Scrubbing' button is clicked
    saving_changes = True if request.form["formAction"] == "apply" else False
    # preview_info is a tuple of (id, file_name(label), class_label, preview)
    previews = file_manager.scrub_files(saving_changes=saving_changes)
    # escape the html elements, only transforms preview[3], because that is
    # the text:
    previews = [[
        preview[0], preview[1], preview[2],
        general_functions.html_escape(preview[3])
    ] for preview in previews]
    if saving_changes:
        utility.save_file_manager(file_manager)
    data = {"data": previews}
    data = json.dumps(data)
    return data
Exemple #27
0
def add_document() -> str:
    """ Adds a document to the file manager or load a .lexos file.
    :return: None.
    """

    file_manager = utility.load_file_manager()

    # Get and decode the file name
    file_name = request.headers["file-name"]
    file_name = unquote(file_name)

    # If the file is a .lexos file, load it
    if file_name.endswith('.lexos'):
        file_manager.handle_upload_workspace()
        file_manager = utility.load_file_manager()
        file_manager.update_workspace()

    # Otherwise, add the document
    else:
        file_manager.add_upload_file(request.data, file_name)

    utility.save_file_manager(file_manager)
    return ''
Exemple #28
0
def scrape():
    """scraps the urls an generates text file from each url.

    :return: json object with a string that indicates that is has succeeded
    """
    # Detect the number of active documents.
    num_active_docs = detect_active_docs()
    if request.method == "GET":
        return render_template('scrape.html', numActiveDocs=num_active_docs)
    if request.method == "POST":
        import requests
        urls = request.json["urls"]
        urls = urls.strip()
        urls = urls.replace(",", "\n")  # Replace commas with line breaks
        urls = re.sub("\s+", "\n", urls)  # Get rid of extra white space
        urls = urls.split("\n")
        file_manager = utility.load_file_manager()
        for i, url in enumerate(urls):
            r = requests.get(url)
            file_manager.add_upload_file(r.text, "url" + str(i) + ".txt")
        utility.save_file_manager(file_manager)
        response = "success"
        return json.dumps(response)
Exemple #29
0
def merge_documents():
    """:return: json object with the new file's id and preview
    """
    print("Merging...")
    file_manager = utility.load_file_manager()
    file_manager.disable_all()
    file_ids = request.json[0]
    new_name = request.json[1]
    source_file = request.json[2]
    milestone = request.json[3]
    end_milestone = re.compile(milestone + '$')
    new_file = ""
    for file_id in file_ids:
        new_file += file_manager.files[int(file_id)].load_contents()
        new_file += request.json[3]  # Add the milestone string
    new_file = re.sub(end_milestone, '', new_file)  # Strip the last milestone
    # The routine below is ugly, but it works
    file_id = file_manager.add_file(source_file, new_name, new_file)
    file_manager.files[file_id].name = new_name
    file_manager.files[file_id].label = new_name
    file_manager.files[file_id].active = True
    utility.save_file_manager(file_manager)
    # Returns a new fileID and some preview text
    return json.dumps([file_id, new_file[0:152] + '...'])
Exemple #30
0
def merge_documents():
    """:return: json object with the new file's id and preview
    """
    print("Merging...")
    file_manager = utility.load_file_manager()
    file_manager.disable_all()
    file_ids = request.json[0]
    new_name = request.json[1]
    source_file = request.json[2]
    milestone = request.json[3]
    end_milestone = re.compile(milestone + '$')
    new_file = ""
    for file_id in file_ids:
        new_file += file_manager.files[int(file_id)].load_contents()
        new_file += request.json[3]  # Add the milestone string
    new_file = re.sub(end_milestone, '', new_file)  # Strip the last milestone
    # The routine below is ugly, but it works
    file_id = file_manager.add_file(source_file, new_name, new_file)
    file_manager.files[file_id].name = new_name
    file_manager.files[file_id].label = new_name
    file_manager.files[file_id].active = True
    utility.save_file_manager(file_manager)
    # Returns a new fileID and some preview text
    return json.dumps([file_id, new_file[0:152] + '...'])
Exemple #31
0
def execute() -> str:
    """ Scrubs the active documents.
    :return: A JSON object with previews of the scrubbed documents.
    """

    file_manager = utility.load_file_manager()

    session_manager.cache_alteration_files()
    session_manager.cache_scrub_options()

    # Save changes only if the "Apply Scrubbing" button is clicked.
    saving_changes = request.form["action"] == "apply"

    # Scrub.
    previews = file_manager.scrub_files(saving_changes=saving_changes)

    # Create the previews.
    previews = [[preview[1], preview[3]] for preview in previews]

    # Save the changes if requested.
    if saving_changes:
        utility.save_file_manager(file_manager)

    return json.dumps(previews)
Exemple #32
0
def tokenizer():
    """Handles the functionality on the tokenizer page.

    :return: a response object (often a render_template call) to flask and
    eventually to the browser.
    """
    # Use timeit to test peformance
    from timeit import default_timer as timer
    start_t = timer()
    print("Initialising GET request.")
    import pandas as pd
    from operator import itemgetter
    # Detect the number of active documents.
    num_active_docs = detect_active_docs()
    file_manager = utility.load_file_manager()
    if request.method == "GET":
        # Get the active labels and sort them
        labels = file_manager.get_active_labels_with_id()
        header_labels = []
        for fileID in labels:
            header_labels.append(file_manager.files[int(fileID)].label)
        header_labels = natsorted(header_labels)
        # Get the starting options from the session
        if 'analyoption' not in session:
            session['analyoption'] = constants.DEFAULT_ANALYZE_OPTIONS
        if 'csvoptions' not in session:
            session['csvoptions'] = constants.DEFAULT_CSV_OPTIONS
        csv_orientation = session['csvoptions']['csvorientation']
        csv_delimiter = session['csvoptions']['csvdelimiter']
        cull_number = session['analyoption']['cullnumber']
        token_type = session['analyoption']['tokenType']
        normalize_type = session['analyoption']['normalizeType']
        token_size = session['analyoption']['tokenSize']
        norm = session['analyoption']['norm']
        data = {
            'cullnumber': cull_number,
            'tokenType': token_type,
            'normalizeType': normalize_type,
            'csvdelimiter': csv_delimiter,
            'mfwnumber': '1',
            'csvorientation': csv_orientation,
            'tokenSize': token_size,
            'norm': norm
        }
        # If there are active documents, generate a DTM matrix
        if num_active_docs > 0:
            end_t = timer()
            elapsed = end_t - start_t
            print("before generateCSVMatrixFromAjax")
            print(elapsed)
            # Get the DTM with the session options and convert it to a list of
            # lists
            dtm = utility.generate_csv_matrix_from_ajax(data,
                                                        file_manager,
                                                        round_decimal=True)
            end_t = timer()
            elapsed = end_t - start_t
            print("after generateCSVMatrixFromAjax")
            print(elapsed)
            # Print the first five rows for testing
            # print dtm[0:5]
            # #dtm[0] += (0,0,)
            # for i,row in enumerate(dtm[1:]):
            #     dtm[i+1] += (0,0,)
            # print dtm[0:5]
            # Create a pandas dataframe with the correct orientation.
            # Convert it to a list of lists (matrix)
            if csv_orientation == "filerow":
                df = pd.DataFrame(dtm)
                # Create the matrix
                matrix = df.values.tolist()
            else:
                df = pd.DataFrame(dtm)
                end_t = timer()
                elapsed = end_t - start_t
                print("DataFrame created.")
                print(elapsed)
                # Calculate the sums and averages
                length = len(df.index)
                sums = [0] * (length - 1)
                sums.insert(0, "Total")
                averages = [0] * (length - 1)
                averages.insert(0, "Average")
                end_t = timer()
                elapsed = end_t - start_t
                print("Sum and averages calculated.")
                print(elapsed)
                # Concatenate the total and average columns to the dataframe
                df = pd.concat([df, pd.DataFrame(sums, columns=['Total'])],
                               axis=1)
                df = pd.concat(
                    [df, pd.DataFrame(averages, columns=['Average'])], axis=1)
                end_t = timer()
                elapsed = end_t - start_t
                print("DataFrame modified.")
                print(elapsed)
                # Create the matrix
                matrix = df.values.tolist()
                matrix[0][0] = "Terms"
                end_t = timer()
                elapsed = end_t - start_t
                print("DataFrame converted to matrix.")
                print(elapsed)
            # Prevent Unicode errors in column headers
            for i, v in enumerate(matrix[0]):
                matrix[0][i] = v
            # Save the column headers and remove them from the matrix
            # columns = natsorted(matrix[0])
            columns = matrix[0]
            if csv_orientation == "filecolumn":
                columns[0] = "Terms"
            else:
                columns[0] = "Documents"
            del matrix[0]
            # Prevent Unicode errors in the row headers
            for i, v in enumerate(matrix):
                matrix[i][0] = v[0]
            # Calculate the number of rows in the matrix
            records_total = len(matrix)
            # Sort the matrix by column 0
            matrix = natsorted(matrix, key=itemgetter(0), reverse=False)
            # Set the table length -- maximum 10 records for initial load
            if records_total <= 10:
                end_index = records_total - 1
                matrix = matrix[0:end_index]
            else:
                matrix = matrix[0:9]
            # escape all the html character in matrix
            matrix = [[general_functions.html_escape(row[0])] + row[1:]
                      for row in matrix]
            # escape all the html character in columns
            columns = [general_functions.html_escape(item) for item in columns]
            # The first 10 rows are sent to the template as an HTML string.
            # After the template renders, an ajax request fetches new data
            # to re-render the table with the correct number of rows.
            # Create the columns string
            cols = "<tr>"
            for s in columns:
                cols += "<th>" + str(s) + "</th>"
            cols += "</tr>"
            # Create the rows string
            rows = ""
            for l in matrix:
                row = "<tr>"
                for s in l:
                    row += "<td>" + str(s) + "</td>"
                row += "</tr>"
                rows += row
        # Catch instances where there is no active document (triggers the error
        # modal)
        else:
            cols = "<tr><th>Terms</th></tr>"
            rows = "<tr><td></td></tr>"
            records_total = 0
        # Render the template
        end_t = timer()
        elapsed = end_t - start_t
        print("Matrix generated. Rendering template.")
        print(elapsed)
        return render_template('tokenizer.html',
                               draw=1,
                               itm="tokenize",
                               labels=labels,
                               headers=header_labels,
                               columns=cols,
                               rows=rows,
                               numRows=records_total,
                               orientation=csv_orientation,
                               numActiveDocs=num_active_docs)
    if request.method == "POST":
        end_t = timer()
        elapsed = end_t - start_t
        print("POST received.")
        print(elapsed)
        session_manager.cache_analysis_option()
        session_manager.cache_csv_options()
        if 'get-csv' in request.form:
            # The 'Download Matrix' button is clicked on tokenizer.html.
            save_path, file_extension = utility.generate_csv(file_manager)
            utility.save_file_manager(file_manager)
            return send_file(save_path,
                             attachment_filename="frequency_matrix" +
                             file_extension,
                             as_attachment=True)
        else:
            # Get the active labels and sort them
            labels = file_manager.get_active_labels_with_id()
            header_labels = []
            for fileID in labels:
                header_labels.append(file_manager.files[int(fileID)].label)
            # Get the Tokenizer options from the request json object
            length = int(request.json["length"])
            # Increment for the ajax response
            draw = int(request.json["draw"]) + 1
            search = request.json["search"]
            order = str(request.json["order"][1])
            sort_column = int(request.json["order"][0])
            csv_orientation = request.json["csvorientation"]
            # Set the sorting order
            if order == "desc":
                reverse = True
            else:
                reverse = False
            # Get the DTM with the requested options and convert it to a list
            # of lists
            dtm = utility.generate_csv_matrix_from_ajax(request.json,
                                                        file_manager,
                                                        round_decimal=True)
            end_t = timer()
            elapsed = end_t - start_t
            print("DTM received.")
            print(elapsed)
            if csv_orientation == "filerow":
                dtm[0][0] = "Documents"
                df = pd.DataFrame(dtm)
                footer_stats = df.drop(df.index[[0]], axis=0)
                footer_stats = footer_stats.drop(df.index[[0]], axis=1)
                footer_totals = footer_stats.sum().tolist()
                footer_totals = [round(total, 4) for total in footer_totals]
                footer_averages = footer_stats.mean().tolist()
                footer_averages = [round(ave, 4) for ave in footer_averages]
                sums = ["Total"]
                averages = ["Average"]
                # Discrepancy--this is used for tokenize/POST
                length = len(df.index)
                for i in range(0, length):
                    if i > 0:
                        rounded_sum = round(df.iloc[i][1:].sum(), 4)
                        sums.append(rounded_sum)
                        rounded_ave = round(df.iloc[i][1:].mean(), 4)
                        averages.append(rounded_ave)
                df = pd.concat([df, pd.DataFrame(sums, columns=['Total'])],
                               axis=1)
                df = pd.concat(
                    [df, pd.DataFrame(averages, columns=['Average'])], axis=1)
                # Populate the sum of sums and average of averages cells
                sum_of_sums = df['Total'].tolist()
                num_rows = len(df['Total'].tolist())
                num_rows = num_rows - 1
                sum_of_sums = sum(sum_of_sums[1:])
                sum_of_ave = df['Average'].tolist()
                sum_of_ave = sum(sum_of_ave[1:])
                footer_totals.append(round(sum_of_sums, 4))
                footer_totals.append(round(sum_of_ave, 4))
                ave_of_sums = sum_of_sums / num_rows
                ave_of_aves = ave_of_sums / num_rows
                footer_averages.append(round(ave_of_sums, 4))
                footer_averages.append(round(ave_of_aves, 4))
                # Change the DataFrame to a list
                matrix = df.values.tolist()
                # Prevent Unicode errors in column headers
                for i, v in enumerate(matrix[0]):
                    matrix[0][i] = v
                # Save the column headers and remove them from the matrix
                columns = natsorted(matrix[0][1:-2])
                columns.insert(0, "Documents")
                columns.append("Total")
                columns.append("Average")
                del matrix[0]
            else:
                df = pd.DataFrame(dtm)
                # print(df[0:3])
                end_t = timer()
                elapsed = end_t - start_t
                print("DTM created. Calculating footer stats")
                print(elapsed)
                footer_stats = df.drop(df.index[[0]], axis=0)
                # print(footer_stats[0:3])
                footer_stats = footer_stats.drop(df.index[[0]], axis=1)
                footer_totals = footer_stats.sum().tolist()
                footer_totals = [round(total, 4) for total in footer_totals]
                footer_averages = footer_stats.mean().tolist()
                footer_averages = [round(ave, 4) for ave in footer_averages]
                end_t = timer()
                elapsed = end_t - start_t
                print("Footer stats calculated. "
                      "Calculating totals and averages...")
                print(elapsed)
                # try it with nested for loops
                sums = []
                averages = []
                n_rows = len(df.index)
                # all rows are the same, so picking any row
                n_cols = len(df.iloc[1])
                for i in range(1, n_rows):
                    row_total = 0
                    for j in range(1, n_cols):
                        row_total += df.iloc[i][j]
                    sums.append(round(row_total, 4))
                    averages.append(round((row_total / (n_cols - 1)), 4))
                sums.insert(0, "Total")
                averages.insert(0, "Average")
                end_t = timer()
                elapsed = end_t - start_t
                print("Totals and averages calculated. Appending columns...")
                print(elapsed)
                # This seems to be the bottleneck
                df['Total'] = sums
                df['Average'] = averages
                end_t = timer()
                elapsed = end_t - start_t
                print("Populating columns with rounded values.")
                print(elapsed)
                # Populate the sum of sums and average of averages cells
                sum_of_sums = df['Total'].tolist()
                num_rows = len(df['Total'].tolist())
                num_rows = num_rows - 1
                sum_of_sums = sum(sum_of_sums[1:])
                sum_of_ave = df['Average'].tolist()
                sum_of_ave = sum(sum_of_ave[1:])
                footer_totals.append(round(sum_of_sums, 4))
                footer_totals.append(round(sum_of_ave, 4))
                ave_of_sums = sum_of_sums / num_rows
                ave_of_aves = ave_of_sums / num_rows
                footer_averages.append(round(ave_of_sums, 4))
                footer_averages.append(round(ave_of_aves, 4))
                end_t = timer()
                elapsed = end_t - start_t
                print("Rounded values added.")
                print(elapsed)
                matrix = df.values.tolist()
                matrix[0][0] = "Terms"
                # Prevent Unicode errors in column headers
                for i, v in enumerate(matrix[0]):
                    matrix[0][i] = v
                # Save the column headers and remove them from the matrix
                columns = natsorted(matrix[0])
                if csv_orientation == "filecolumn":
                    columns[0] = "Terms"
                else:
                    columns[0] = "Documents"
                del matrix[0]
        # Code for both orientations #
        end_t = timer()
        elapsed = end_t - start_t
        print("Starting common code.")
        print(elapsed)
        # Prevent Unicode errors in the row headers
        for i, v in enumerate(matrix):
            matrix[i][0] = v[0]
        # Calculate the number of rows in the matrix
        records_total = len(matrix)
        # Sort and Filter the cached DTM by column
        if len(search) != 0:
            matrix = [x for x in matrix if x[0].startswith(search)]
            matrix = natsorted(matrix,
                               key=itemgetter(sort_column),
                               reverse=reverse)
        else:
            matrix = natsorted(matrix,
                               key=itemgetter(sort_column),
                               reverse=reverse)
        # Get the number of filtered rows
        records_filtered = len(matrix)
        # Set the table length
        if length == -1:
            matrix = matrix[0:]
        else:
            start_index = int(request.json["start"])
            end_index = int(request.json["end"])
            matrix = matrix[start_index:end_index]
        # Correct the footer rows
        footer_totals = [float(Decimal("%.4f" % e)) for e in footer_totals]
        footer_averages = [float(Decimal("%.4f" % e)) for e in footer_averages]
        footer_totals.insert(0, "Total")
        footer_averages.insert(0, "Average")
        footer_totals.append("")
        footer_averages.append("")
        response = {
            "draw": draw,
            "records_total": records_total,
            "records_filtered": records_filtered,
            "length": int(length),
            "columns": columns,
            "data": matrix,
            "totals": footer_totals,
            "averages": footer_averages
        }
        end_t = timer()
        elapsed = end_t - start_t
        print("Returning table data to the browser.")
        print(elapsed)
        return json.dumps(response)
Exemple #33
0
def manage():
    """Handles the functionality of the select page.

    Its primary role is to activate/deactivate specific files depending on the
    user's input.
    :return: a response object (often a render_template call) to flask
    and eventually to the browser.
    """
    # Detect the number of active documents.
    num_active_docs = detect_active_docs()
    # Usual loading of the FileManager
    file_manager = utility.load_file_manager()
    if request.method == "GET":
        rows = file_manager.get_previews_of_all()
        for row in rows:
            if row["state"]:
                row["state"] = "selected"
            else:
                row["state"] = ""
        return render_template(
            'manage.html',
            rows=rows,
            itm="manage",
            numActiveDocs=num_active_docs)
    if 'previewTest' in request.headers:
        file_id = int(request.data)
        file_label = file_manager.files[file_id].label
        file_preview = file_manager.files[file_id].get_preview()
        preview_vals = {
            "id": file_id,
            "label": file_label,
            "previewText": file_preview}
        return json.dumps(preview_vals)
    if 'toggleFile' in request.headers:
        # Catch-all for any POST request.
        # On the select page, POSTs come from JavaScript AJAX XHRequests.
        file_id = int(request.data)
        # Toggle the file from active to inactive or vice versa
        file_manager.toggle_file(file_id)
    elif 'toggliFy' in request.headers:
        file_ids = request.data
        file_ids = file_ids.split(",")
        file_manager.disable_all()
        # Toggle the file from active to inactive or vice versa
        file_manager.enable_files(file_ids)

    elif 'setLabel' in request.headers:
        new_name = (request.headers['setLabel'])
        file_id = int(request.data)
        file_manager.files[file_id].set_name(new_name)
        file_manager.files[file_id].label = new_name
    elif 'setClass' in request.headers:
        new_class_label = (request.headers['setClass'])
        file_id = int(request.data)
        file_manager.files[file_id].set_class_label(new_class_label)
    elif 'disableAll' in request.headers:
        file_manager.disable_all()
    elif 'selectAll' in request.headers:
        file_manager.enable_all()
    elif 'applyClassLabel' in request.headers:
        file_manager.classify_active_files()
    elif 'deleteActive' in request.headers:
        file_manager.delete_active_files()
    elif 'deleteRow' in request.headers:
        # delete the file in request.form
        file_manager.delete_files(list(request.form.keys()))
    utility.save_file_manager(file_manager)
    return ''  # Return an empty string because you have to return something
Exemple #34
0
def k_means():
    """Handles the functionality on the kmeans page.

    It analyzes the various texts and displays the class label of the files.
    :return: a response object (often a render_template call) to flask and
    eventually to the browser.
    """
    # Detect the number of active documents.
    num_active_docs = detect_active_docs()
    file_manager = utility.load_file_manager()
    labels = file_manager.get_active_labels_with_id()
    for key in labels:
        labels[key] = labels[key]
    default_k = int(len(labels) / 2)
    if request.method == 'GET':
        # 'GET' request occurs when the page is first loaded
        if 'analyoption' not in session:
            session['analyoption'] = constants.DEFAULT_ANALYZE_OPTIONS
        if 'kmeanoption' not in session:
            session['kmeanoption'] = constants.DEFAULT_KMEAN_OPTIONS
        return render_template('kmeans.html',
                               labels=labels,
                               silhouettescore='',
                               kmeansIndex=[],
                               fileNameStr='',
                               fileNumber=len(labels),
                               KValue=0,
                               defaultK=default_k,
                               colorChartStr='',
                               kmeansdatagenerated=False,
                               itm="kmeans",
                               numActiveDocs=num_active_docs)
    if request.method == "POST":
        # 'POST' request occur when html form is submitted
        # (i.e. 'Get Graphs', 'Download...')
        session_manager.cache_analysis_option()
        session_manager.cache_k_mean_option()
        utility.save_file_manager(file_manager)
        if request.form['viz'] == 'PCA':
            kmeans_index, silhouette_score, file_name_str, k_value, \
                color_chart_str = utility.generate_k_means_pca(file_manager)
            return render_template('kmeans.html',
                                   labels=labels,
                                   silhouettescore=silhouette_score,
                                   kmeansIndex=kmeans_index,
                                   fileNameStr=file_name_str,
                                   fileNumber=len(labels),
                                   KValue=k_value,
                                   defaultK=default_k,
                                   colorChartStr=color_chart_str,
                                   kmeansdatagenerated=True,
                                   itm="kmeans",
                                   numActiveDocs=num_active_docs)
        elif request.form['viz'] == 'Voronoi':
            kmeans_index, silhouette_score, file_name_str, k_value, \
                color_chart_str, final_points_list, final_centroids_list, \
                text_data, max_x = \
                utility.generate_k_means_voronoi(file_manager)
            return render_template('kmeans.html',
                                   labels=labels,
                                   silhouettescore=silhouette_score,
                                   kmeansIndex=kmeans_index,
                                   fileNameStr=file_name_str,
                                   fileNumber=len(labels),
                                   KValue=k_value,
                                   defaultK=default_k,
                                   colorChartStr=color_chart_str,
                                   finalPointsList=final_points_list,
                                   finalCentroidsList=final_centroids_list,
                                   textData=text_data,
                                   maxX=max_x,
                                   kmeansdatagenerated=True,
                                   itm="kmeans",
                                   numActiveDocs=num_active_docs)
Exemple #35
0
def manage():
    """Handles the functionality of the select page.

    Its primary role is to activate/deactivate specific files depending on the
    user's input.
    :return: a response object (often a render_template call) to flask
    and eventually to the browser.
    """
    # Detect the number of active documents.
    num_active_docs = detect_active_docs()
    # Usual loading of the FileManager
    file_manager = utility.load_file_manager()
    if request.method == "GET":
        rows = file_manager.get_previews_of_all()
        for row in rows:
            if row["state"]:
                row["state"] = "selected"
            else:
                row["state"] = ""
        return render_template('manage.html',
                               rows=rows,
                               itm="manage",
                               numActiveDocs=num_active_docs)
    if 'previewTest' in request.headers:
        file_id = int(request.data)
        file_label = file_manager.files[file_id].label
        file_preview = file_manager.files[file_id].get_preview()
        preview_vals = {
            "id": file_id,
            "label": file_label,
            "previewText": file_preview
        }
        return json.dumps(preview_vals)
    if 'toggleFile' in request.headers:
        # Catch-all for any POST request.
        # On the select page, POSTs come from JavaScript AJAX XHRequests.
        file_id = int(request.data)
        # Toggle the file from active to inactive or vice versa
        file_manager.toggle_file(file_id)
    elif 'toggliFy' in request.headers:
        file_ids = request.data
        file_ids = file_ids.split(",")
        file_manager.disable_all()
        # Toggle the file from active to inactive or vice versa
        file_manager.enable_files(file_ids)

    elif 'setLabel' in request.headers:
        new_name = (request.headers['setLabel'])
        file_id = int(request.data)
        file_manager.files[file_id].set_name(new_name)
        file_manager.files[file_id].label = new_name
    elif 'setClass' in request.headers:
        new_class_label = (request.headers['setClass'])
        file_id = int(request.data)
        file_manager.files[file_id].set_class_label(new_class_label)
    elif 'disableAll' in request.headers:
        file_manager.disable_all()
    elif 'selectAll' in request.headers:
        file_manager.enable_all()
    elif 'applyClassLabel' in request.headers:
        file_manager.classify_active_files()
    elif 'deleteActive' in request.headers:
        file_manager.delete_active_files()
    elif 'deleteRow' in request.headers:
        # delete the file in request.form
        file_manager.delete_files(list(request.form.keys()))
    utility.save_file_manager(file_manager)
    return ''  # Return an empty string because you have to return something