Example #1
0
def multi_cloud():
    """Handles the functionality on the multicloud pages.

    :return: a response object (often a render_template call) to Flask and
    eventually to the browser.
    """
    # Detect the number of active documents.
    num_active_docs = detect_active_docs()
    file_manager = utility.load_file_manager()
    labels = file_manager.get_active_labels_with_id()
    labels = OrderedDict(
        natsorted(list(labels.items()), key=lambda x: x[1]))
    if request.method == 'GET':
        # 'GET' request occurs when the page is first loaded.
        if 'cloudoption' not in session:
            session['cloudoption'] = constants.DEFAULT_CLOUD_OPTIONS
        if 'multicloudoptions' not in session:
            session['multicloudoptions'] = \
                constants.DEFAULT_MULTICLOUD_OPTIONS
        return render_template(
            'multicloud.html',
            itm="multicloud",
            jsonStr="",
            labels=labels,
            numActiveDocs=num_active_docs)
    if request.method == "POST":
        # This is legacy code.
        # The form is now submitted by Ajax do_multicloud()
        # 'POST' request occur when html form is submitted
        # (i.e. 'Get Graphs', 'Download...')
        file_manager = utility.load_file_manager()
        json_obj = utility.generate_mc_json_obj(file_manager)
        # Replaces client-side array generator
        word_counts_array = []
        for doc in json_obj:
            name = doc["name"]
            children = doc["children"]
            word_counts = {}
            for item in children:
                word_counts[item["text"]] = item["size"]
            word_counts_array.append(
                {"name": name, "word_counts": word_counts,
                    "words": children})
        # Temporary fix because the front end needs a string
        json_obj = json.dumps(json_obj)
        session_manager.cache_cloud_option()
        session_manager.cache_multi_cloud_options()
        return render_template(
            'multicloud.html',
            itm="multicloud",
            JSONObj=json_obj,
            labels=labels,
            numActiveDocs=num_active_docs)
Example #2
0
def multi_cloud():
    """Handles the functionality on the multicloud pages.

    :return: a response object (often a render_template call) to Flask and
    eventually to the browser.
    """
    # Detect the number of active documents.
    num_active_docs = detect_active_docs()
    file_manager = utility.load_file_manager()
    labels = file_manager.get_active_labels_with_id()
    labels = OrderedDict(natsorted(list(labels.items()), key=lambda x: x[1]))
    if request.method == 'GET':
        # 'GET' request occurs when the page is first loaded.
        if 'cloudoption' not in session:
            session['cloudoption'] = constants.DEFAULT_CLOUD_OPTIONS
        if 'multicloudoptions' not in session:
            session['multicloudoptions'] = \
                constants.DEFAULT_MULTICLOUD_OPTIONS
        return render_template('multicloud.html',
                               itm="multicloud",
                               jsonStr="",
                               labels=labels,
                               numActiveDocs=num_active_docs)
    if request.method == "POST":
        # This is legacy code.
        # The form is now submitted by Ajax do_multicloud()
        # 'POST' request occur when html form is submitted
        # (i.e. 'Get Graphs', 'Download...')
        file_manager = utility.load_file_manager()
        json_obj = utility.generate_mc_json_obj(file_manager)
        # Replaces client-side array generator
        word_counts_array = []
        for doc in json_obj:
            name = doc["name"]
            children = doc["children"]
            word_counts = {}
            for item in children:
                word_counts[item["text"]] = item["size"]
            word_counts_array.append({
                "name": name,
                "word_counts": word_counts,
                "words": children
            })
        # Temporary fix because the front end needs a string
        json_obj = json.dumps(json_obj)
        session_manager.cache_cloud_option()
        session_manager.cache_multi_cloud_options()
        return render_template('multicloud.html',
                               itm="multicloud",
                               JSONObj=json_obj,
                               labels=labels,
                               numActiveDocs=num_active_docs)
Example #3
0
def top_words():
    """Handles the topword page functionality.

    :return: a response object (often a render_template call) to flask and
    eventually to the browser.
    """
    # Detect the number of active documents.
    num_active_docs = detect_active_docs()
    file_manager = utility.load_file_manager()
    labels = file_manager.get_active_labels_with_id()

    # 'GET' request occurs when the page is first loaded
    if 'topwordoption' not in session:
        session['topwordoption'] = constants.DEFAULT_TOPWORD_OPTIONS
    if 'analyoption' not in session:
        session['analyoption'] = constants.DEFAULT_ANALYZE_OPTIONS

    # get the class division map and number of existing classes
    class_division_map = FileManagerModel().load_file_manager().\
        get_class_division_map()
    num_class = class_division_map.shape[0]
    return render_template('topword.html',
                           labels=labels,
                           classmap=class_division_map,
                           numclass=num_class,
                           topwordsgenerated='class_div',
                           itm='topwords',
                           numActiveDocs=num_active_docs)
Example #4
0
def topword_html():
    # 'POST' request occurs when html form is submitted
    num_active_docs = detect_active_docs()
    file_manager = utility.load_file_manager()
    labels = file_manager.get_active_labels_with_id()

    # get the class division map and number of existing classes
    class_division_map = FileManagerModel().load_file_manager().\
        get_class_division_map()
    num_class = class_division_map.shape[0]
    if 'get-topword' in request.form:  # download topword
        path = TopwordModel().get_topword_csv_path(
            class_division_map=class_division_map)
        session_manager.cache_analysis_option()
        session_manager.cache_top_word_options()
        return send_file(path,
                         attachment_filename=constants.TOPWORD_CSV_FILE_NAME,
                         as_attachment=True)
    else:
        session_manager.cache_analysis_option()
        session_manager.cache_top_word_options()
        topword_result = TopwordModel().get_readable_result(
            class_division_map=class_division_map)
        return render_template('topword.html',
                               result=topword_result.results,
                               labels=labels,
                               header=topword_result.header,
                               numclass=num_class,
                               topwordsgenerated='True',
                               classmap=[],
                               itm='topwords',
                               numActiveDocs=num_active_docs)
Example #5
0
def delete_one():
    """:return: string indicating that it has succeeded
    """
    file_manager = utility.load_file_manager()
    file_manager.delete_files([int(request.data)])
    utility.save_file_manager(file_manager)
    return "success"
def scrub():
    # Are you looking for scrubber.py?
    """Handles the functionality of the scrub page.

    It scrubs the files depending on the specifications chosen by the user,
    with an option to download the scrubbed files.
    :return: a response object (often a render_template call) to flask and
     eventually to the browser.
    """
    # Detect the number of active documents.
    num_active_docs = detect_active_docs()
    file_manager = utility.load_file_manager()

    # "GET" request occurs when the page is first loaded.
    if 'scrubbingoptions' not in session:
        session['scrubbingoptions'] = constants.DEFAULT_SCRUB_OPTIONS
    if 'xmlhandlingoptions' not in session:
        session['xmlhandlingoptions'] = {
            "myselect": {
                "action": '',
                "attribute": ""
            }
        }
    utility.xml_handling_options()
    previews = file_manager.get_previews_of_active()
    tags_present, doe_present, gutenberg_present = \
        file_manager.check_actives_tags()
    return render_template('scrub.html',
                           previews=previews,
                           itm="scrubber",
                           haveTags=tags_present,
                           haveDOE=doe_present,
                           haveGutenberg=gutenberg_present,
                           numActiveDocs=num_active_docs)
Example #7
0
File: base.py Project: vbaira/Lexos
def get_document_previews() -> str:
    """ Returns previews of the active documents.
    :return: Previews of the active documents.
    """

    file_manager = utility.load_file_manager()
    return json.dumps(file_manager.get_previews_of_active())
Example #8
0
def delete_selected():
    """:returns json object with the ids of the files to delete
    """
    file_manager = utility.load_file_manager()
    file_ids = file_manager.delete_active_files()
    utility.save_file_manager(file_manager)
    return json.dumps(file_ids)
Example #9
0
def delete_one():
    """:return: string indicating that it has succeeded
    """
    file_manager = utility.load_file_manager()
    file_manager.delete_files([int(request.data)])
    utility.save_file_manager(file_manager)
    return "success"
Example #10
0
    def options_from_front_end(self) -> StatsFrontEndOption:
        """Get the options from front end.

        The only option is selected file ids.
        """
        # Force file ids to be integer type and remove extra blank.
        active_file_ids = [
            file.id for file in load_file_manager().get_active_files()
        ]

        # Get the selected column
        sort_column = int(
            self._front_end_data["statistics_table_selected_column"])

        # Get the sort column
        sort_ascending = bool(
            self._front_end_data["statistics_table_sort_mode"] == "Ascending")

        # Get the colors
        text_color = self._front_end_data.get("text_color")
        highlight_color = self._front_end_data.get("highlight_color")

        # Return stats front end option.
        return StatsFrontEndOption(active_file_ids=active_file_ids,
                                   sort_column=sort_column,
                                   sort_ascending=sort_ascending,
                                   text_color=text_color,
                                   highlight_color=highlight_color)
Example #11
0
def delete_selected():
    """:returns json object with the ids of the files to delete
    """
    file_manager = utility.load_file_manager()
    file_ids = file_manager.delete_active_files()
    utility.save_file_manager(file_manager)
    return json.dumps(file_ids)
Example #12
0
def scrub():
    # Are you looking for scrubber.py?
    """Handles the functionality of the scrub page.

    It scrubs the files depending on the specifications chosen by the user,
    with an option to download the scrubbed files.
    :return: a response object (often a render_template call) to flask and
     eventually to the browser.
    """
    # Detect the number of active documents.
    num_active_docs = detect_active_docs()
    file_manager = utility.load_file_manager()

    # "GET" request occurs when the page is first loaded.
    if 'scrubbingoptions' not in session:
        session['scrubbingoptions'] = constants.DEFAULT_SCRUB_OPTIONS
    if 'xmlhandlingoptions' not in session:
        session['xmlhandlingoptions'] = {
            "myselect": {"action": '', "attribute": ""}}
    utility.xml_handling_options()
    previews = file_manager.get_previews_of_active()
    tags_present, doe_present, gutenberg_present = \
        file_manager.check_actives_tags()
    return render_template(
        'scrub.html',
        previews=previews,
        itm="scrubber",
        haveTags=tags_present,
        haveDOE=doe_present,
        haveGutenberg=gutenberg_present,
        numActiveDocs=num_active_docs)
Example #13
0
def download():
    """ Downloads the cut files.
    :return: A .zip file containing the cut files.
    """

    file_manager = utility.load_file_manager()
    return file_manager.zip_active_files("cut-files.zip")
Example #14
0
def upload():
    """Handles the functionality of the upload page.

    It uploads files to be used in the current session.
    :return: a response object (often a render_template call) to flask and
     eventually to the browser.
    """
    # Detect the number of active documents.
    num_active_docs = detect_active_docs()
    if request.method == "GET":
        print("About to fix session in case of browser caching")
        # fix the session in case the browser is caching the old session
        session_manager.fix()
        print("Session fixed. Rendering template.")
        if 'generalsettings' not in session:
            session['generalsettings'] = \
                constants.DEFAULT_GENERALSETTINGS_OPTIONS
        return render_template(
            'upload.html',
            MAX_FILE_SIZE=constants.MAX_FILE_SIZE,
            MAX_FILE_SIZE_INT=constants.MAX_FILE_SIZE_INT,
            MAX_FILE_SIZE_UNITS=constants.MAX_FILE_SIZE_UNITS,
            itm="upload-tool",
            numActiveDocs=num_active_docs)

    # X-FILENAME is the flag to signify a file upload
    if 'X-FILENAME' in request.headers:

        # File upload through javascript
        file_manager = utility.load_file_manager()
        # --- check file name ---
        # Grab the filename, which will be UTF-8 percent-encoded (e.g. '%E7'
        # instead of python's '\xe7')
        file_name = request.headers['X-FILENAME']
        # Unquote using urllib's percent-encoding decoder (turns '%E7' into
        # '\xe7')
        file_name = unquote(file_name)
        # --- end check file name ---
        if file_name.endswith('.lexos'):
            file_manager.handle_upload_workspace()
            # update filemanager
            file_manager = utility.load_file_manager()
            file_manager.update_workspace()
        else:
            file_manager.add_upload_file(request.data, file_name)
        utility.save_file_manager(file_manager)
        return 'success'
Example #15
0
def upload():
    """Handles the functionality of the upload page.

    It uploads files to be used in the current session.
    :return: a response object (often a render_template call) to flask and
     eventually to the browser.
    """
    # Detect the number of active documents.
    num_active_docs = detect_active_docs()
    if request.method == "GET":
        print("About to fix session in case of browser caching")
        # fix the session in case the browser is caching the old session
        session_manager.fix()
        print("Session fixed. Rendering template.")
        if 'generalsettings' not in session:
            session['generalsettings'] = \
                constants.DEFAULT_GENERALSETTINGS_OPTIONS
        return render_template(
            'upload.html',
            MAX_FILE_SIZE=constants.MAX_FILE_SIZE,
            MAX_FILE_SIZE_INT=constants.MAX_FILE_SIZE_INT,
            MAX_FILE_SIZE_UNITS=constants.MAX_FILE_SIZE_UNITS,
            itm="upload-tool",
            numActiveDocs=num_active_docs)

    # X-FILENAME is the flag to signify a file upload
    if 'X-FILENAME' in request.headers:

        # File upload through javascript
        file_manager = utility.load_file_manager()
        # --- check file name ---
        # Grab the filename, which will be UTF-8 percent-encoded (e.g. '%E7'
        # instead of python's '\xe7')
        file_name = request.headers['X-FILENAME']
        # Unquote using urllib's percent-encoding decoder (turns '%E7' into
        # '\xe7')
        file_name = unquote(file_name)
        # --- end check file name ---
        if file_name.endswith('.lexos'):
            file_manager.handle_upload_workspace()
            # update filemanager
            file_manager = utility.load_file_manager()
            file_manager.update_workspace()
        else:
            file_manager.add_upload_file(request.data, file_name)
        utility.save_file_manager(file_manager)
        return 'success'
Example #16
0
def set_class_selected():
    file_manager = utility.load_file_manager()
    rows = request.json[0]
    new_class_label = request.json[1]
    for file_id in list(rows):
        file_manager.files[int(file_id)].set_class_label(new_class_label)
    utility.save_file_manager(file_manager)
    return json.dumps(rows)
Example #17
0
def set_class_selected():
    file_manager = utility.load_file_manager()
    rows = request.json[0]
    new_class_label = request.json[1]
    for fileID in list(rows):
        file_manager.files[int(fileID)].set_class_label(new_class_label)
    utility.save_file_manager(file_manager)
    return json.dumps(rows)
Example #18
0
def disable_rows():
    """:return: string indicating that it has succeeded
    """
    file_manager = utility.load_file_manager()
    for file_id in request.json:
        file_manager.disable_files([file_id, ])
    utility.save_file_manager(file_manager)
    return 'success'
Example #19
0
def download_scrubbing():
    """downloads scrubbed files.

    :return: a .zip with all the scrubbed files
    """
    # The 'Download Scrubbed Files' button is clicked on scrub.html.
    # Sends zipped files to downloads folder.
    file_manager = utility.load_file_manager()
    return file_manager.zip_active_files('scrubbed.zip')
Example #20
0
def select_all():
    """selects all files.

    :return: string indicating that it has succeeded
    """
    file_manager = utility.load_file_manager()
    file_manager.enable_all()
    utility.save_file_manager(file_manager)
    return 'success'
Example #21
0
def download_documents():
    """downloads all selected files.

    :return: a .zip file congaing all selected files
    """
    # The 'Download Selected Documents' button is clicked in manage.html.
    # Sends zipped files to downloads folder.
    file_manager = utility.load_file_manager()
    return file_manager.zip_active_files('selected_documents.zip')
Example #22
0
def download_documents():
    """downloads all selected files.

    :return: a .zip file congaing all selected files
    """
    # The 'Download Selected Documents' button is clicked in manage.html.
    # Sends zipped files to downloads folder.
    file_manager = utility.load_file_manager()
    return file_manager.zip_active_files('selected_documents.zip')
Example #23
0
def select_all():
    """selects all files.

    :return: string indicating that it has succeeded
    """
    file_manager = utility.load_file_manager()
    file_manager.enable_all()
    utility.save_file_manager(file_manager)
    return 'success'
Example #24
0
def download_cutting():
    """downloads cut files.

    :return: a .zip with all the cut files
    """
    # The 'Download Segmented Files' button is clicked on cut.html
    # sends zipped files to downloads folder
    file_manager = utility.load_file_manager()
    return file_manager.zip_active_files('cut_files.zip')
Example #25
0
def download_cutting():
    """downloads cut files.

    :return: a .zip with all the cut files
    """
    # The 'Download Segmented Files' button is clicked on cut.html
    # sends zipped files to downloads folder
    file_manager = utility.load_file_manager()
    return file_manager.zip_active_files('cut_files.zip')
Example #26
0
def disable_rows():
    """:return: string indicating that it has succeeded
    """
    file_manager = utility.load_file_manager()
    for file_id in request.json:
        file_manager.disable_files([
            file_id,
        ])
    utility.save_file_manager(file_manager)
    return 'success'
Example #27
0
def set_class():
    """sets a class.

    :return: string indicating that it has succeeded
    """
    file_manager = utility.load_file_manager()
    file_id = int(request.json[0])
    new_class_label = request.json[1]
    file_manager.files[file_id].set_class_label(new_class_label)
    utility.save_file_manager(file_manager)
    return 'success'
Example #28
0
def download_workspace():
    """send the workspace file (.lexos) to the user.

    Note that the workspace can be uploaded and restore all the workspace.
    :return: send workspace to the user
    """
    file_manager = utility.load_file_manager()
    path = file_manager.zip_workspace()
    return send_file(path,
                     attachment_filename=constants.WORKSPACE_FILENAME,
                     as_attachment=True)
Example #29
0
def set_class():
    """sets a class.

    :return: string indicating that it has succeeded
    """
    file_manager = utility.load_file_manager()
    file_id = int(request.json[0])
    new_class_label = request.json[1]
    file_manager.files[file_id].set_class_label(new_class_label)
    utility.save_file_manager(file_manager)
    return 'success'
Example #30
0
def set_label():
    """sets the label of a file.

    :return: string indicating that it has succeeded
    """
    file_manager = utility.load_file_manager()
    file_id = int(request.json[0])
    new_name = request.json[1]
    file_manager.files[file_id].set_name(new_name)
    file_manager.files[file_id].label = new_name
    utility.save_file_manager(file_manager)
    return 'success'
Example #31
0
def set_label():
    """sets the label of a file.

    :return: string indicating that it has succeeded
    """
    file_manager = utility.load_file_manager()
    file_id = int(request.json[0])
    new_name = request.json[1]
    file_manager.files[file_id].set_name(new_name)
    file_manager.files[file_id].label = new_name
    utility.save_file_manager(file_manager)
    return 'success'
Example #32
0
File: base.py Project: vbaira/Lexos
def download_workspace() -> str:
    """ Sends the workspace file (.lexos) to the user.
    :return: The workspace file.
    """

    file_manager = utility.load_file_manager()
    path = file_manager.zip_workspace()

    return send_file(
        path,
        attachment_filename=constants.WORKSPACE_FILENAME,
        as_attachment=True)
Example #33
0
def download_workspace():
    """send the workspace file (.lexos) to the user.

    Note that the workspace can be uploaded and restore all the workspace.
    :return: send workspace to the user
    """
    file_manager = utility.load_file_manager()
    path = file_manager.zip_workspace()
    return send_file(
        path,
        attachment_filename=constants.WORKSPACE_FILENAME,
        as_attachment=True)
Example #34
0
def get_previews():
    """:return: a json object with the id, label, and preview text for all
    text files
    """
    file_manager = utility.load_file_manager()
    file_id = int(request.data)
    file_label = file_manager.files[file_id].label
    file_preview = file_manager.files[file_id].load_contents()
    preview_vals = {
        "id": file_id,
        "label": file_label,
        "previewText": file_preview}
    return json.dumps(preview_vals)
Example #35
0
def add_document() -> str:
    """ Adds a document to the file manager or load a .lexos file.
    :return: None.
    """

    file_manager = utility.load_file_manager()

    # Get and decode the file name
    file_name = request.headers["file-name"]
    file_name = unquote(file_name)

    # If the file is a .lexos file, load it
    if file_name.endswith('.lexos'):
        file_manager.handle_upload_workspace()
        file_manager = utility.load_file_manager()
        file_manager.update_workspace()

    # Otherwise, add the document
    else:
        file_manager.add_upload_file(request.data, file_name)

    utility.save_file_manager(file_manager)
    return ''
Example #36
0
def get_previews():
    """:return: a json object with the id, label, and preview text for all
    text files
    """
    file_manager = utility.load_file_manager()
    file_id = int(request.data)
    file_label = file_manager.files[file_id].label
    file_preview = file_manager.files[file_id].load_contents()
    preview_vals = {
        "id": file_id,
        "label": file_label,
        "previewText": file_preview
    }
    return json.dumps(preview_vals)
Example #37
0
File: base.py Project: vbaira/Lexos
def detect_active_docs() -> int:
    """ Detects the number of active documents.
    :return: The number of active documents.
    """

    if session:
        file_manager = utility.load_file_manager()
        active = file_manager.get_active_files()
        if active:
            return len(active)
        else:
            return 0
    else:
        redirect(url_for('base.no_session'))
        return 0
Example #38
0
def get_tokenizer_csv():
    """Called when the CSV button in Tokenizer is clicked.

    :return: a response object (often a render_template call) to flask and
    eventually to the browser.
    """
    file_manager = utility.load_file_manager()
    session_manager.cache_analysis_option()
    session_manager.cache_csv_options()
    save_path, file_extension = utility.generate_csv(file_manager)
    utility.save_file_manager(file_manager)

    return send_file(save_path,
                     attachment_filename="frequency_matrix" + file_extension,
                     as_attachment=True)
Example #39
0
def analyze():
    """ Analyzes the files.
    :return: The results of the analysis.
    """

    path = get_path()
    analysis = ContentAnalysisModel()
    file_manager = load_file_manager()
    active_files = file_manager.get_active_files()

    # Set the formula
    session["formula"] = ContentAnalysisReceiver() \
        .options_from_front_end().formula

    # Add the files to analyze
    for file in active_files:
        analysis.add_file(file_name=file.name,
                          label=file.label,
                          content=file.load_contents())

    # Add the dictionaries
    for name in os.listdir(path):
        analysis.add_dictionary(file_name=name,
                                label=name,
                                content=open(os.path.join(path, name),
                                             'r').read())

    # Analyze
    overview_results, overview_csv, corpus_results, corpus_csv, \
        document_results, errors = analysis.analyze()

    # Return the results
    if len(errors):
        return jsonify({"error": errors})

    if not len(corpus_results):
        return jsonify({"error": "Failed to perform the analysis."})

    return jsonify({
        "overview-table-head": overview_results[0],
        "overview-table-body": overview_results[1:],
        "overview-table-csv": overview_csv,
        "corpus-table-head": ["Dictionary", "Phrase", "Count"],
        "corpus-table-body": corpus_results,
        "corpus-table-csv": corpus_csv,
        "documents": document_results,
        "error": False
    })
Example #40
0
def do_cutting():
    """cuts the files.

    :return: cut files and their preview in a json object
    """
    file_manager = utility.load_file_manager()
    # The 'Preview Cuts' or 'Apply Cuts' button is clicked on cut.html.
    session_manager.cache_cutting_options()
    # Saving changes only if action = apply
    saving_changes = True if request.form['action'] == 'apply' else False
    previews = file_manager.cut_files(saving_changes=saving_changes)
    if saving_changes:
        utility.save_file_manager(file_manager)
    data = {"data": previews}
    data = json.dumps(data)
    return data
Example #41
0
def do_cutting():
    """cuts the files.

    :return: cut files and their preview in a json object
    """
    file_manager = utility.load_file_manager()
    # The 'Preview Cuts' or 'Apply Cuts' button is clicked on cut.html.
    session_manager.cache_cutting_options()
    # Saving changes only if action = apply
    saving_changes = True if request.form['action'] == 'apply' else False
    previews = file_manager.cut_files(saving_changes=saving_changes)
    if saving_changes:
        utility.save_file_manager(file_manager)
    data = {"data": previews}
    data = json.dumps(data)
    return data
Example #42
0
File: base.py Project: vbaira/Lexos
def get_active_document_count() -> int:
    """ Gets the number of active documents.
    :return: The number of active documents.
    """

    if session:
        file_manager = utility.load_file_manager()
        active_files = file_manager.get_active_files()

        if active_files:
            return len(active_files)
        else:
            return 0
    else:
        redirect("no-session")
        return 0
Example #43
0
def download() -> str:
    """ Returns a download of the active files.
    :return: the zip files needs to be downloaded.
    """

    file_manager = utility.load_file_manager()

    response = make_response(
        file_manager.zip_active_files("scrubbed_documents.zip"))

    # Disable download caching
    response.headers["Cache-Control"] = \
        "max-age=0, no-cache, no-store, must-revalidate"
    response.headers["Expires"] = 0
    response.headers["Pragma"] = "no-cache"

    return response
Example #44
0
def execute():
    """ Cuts the files.
    :return: Previews of the cut files.
    """

    file_manager = utility.load_file_manager()
    session_manager.cache_cutting_options()

    # Apply the cutting
    save = request.form["action"] == "apply"
    previews = file_manager.cut_files(saving_changes=save)

    # Save the results if requested
    if save:
        utility.save_file_manager(file_manager)

    return json.dumps(previews)
Example #45
0
def detect_active_docs() -> int:
    """detects the number of active documents.

    This function can be called at the beginning of each tool.
    :return: number of active documents
    """
    # TODO: this function should probably be moved to file_manager.py
    if session:
        file_manager = utility.load_file_manager()
        active = file_manager.get_active_files()
        if active:
            return len(active)
        else:
            return 0
    else:
        redirect(url_for('base.no_session'))
        return 0
Example #46
0
def cut():
    """ Handles the functionality of the cut page.

    It cuts the files into various segments depending on the specifications
    chosen by the user, and sends the text segments.
    :return: a response object (often a render_template call) to flask and
    eventually to the browser.
    """
    # Detect the number of active documents.
    num_active_docs = detect_active_docs()
    file_manager = utility.load_file_manager()
    active = file_manager.get_active_files()
    if len(active) > 0:
        num_char = [x.num_letters() for x in active]
        num_word = [x.num_words() for x in active]
        num_line = [x.num_lines() for x in active]
        max_char = max(num_char)
        max_word = max(num_word)
        max_line = max(num_line)
        active_file_ids = [lfile.id for lfile in active]
    else:
        num_char = []
        num_word = []
        num_line = []
        max_char = 0
        max_word = 0
        max_line = 0
        active_file_ids = []
    if request.method == "GET":
        # "GET" request occurs when the page is first loaded.
        if 'cuttingoptions' not in session:
            session['cuttingoptions'] = constants.DEFAULT_CUT_OPTIONS
        previews = file_manager.get_previews_of_active()
        return render_template(
            'cut.html',
            previews=previews,
            num_active_files=len(previews),
            numChar=num_char,
            numWord=num_word,
            numLine=num_line,
            maxChar=max_char,
            maxWord=max_word,
            maxLine=max_line,
            activeFileIDs=active_file_ids,
            itm="cut",
            numActiveDocs=num_active_docs)
Example #47
0
def do_scrubbing():
    """:return: a json object with a scrubbed preview
    """
    file_manager = utility.load_file_manager()
    # The 'Preview Scrubbing' or 'Apply Scrubbing' button is clicked on
    # scrub.html.
    session_manager.cache_alteration_files()
    session_manager.cache_scrub_options()
    # saves changes only if 'Apply Scrubbing' button is clicked
    saving_changes = True if request.form["formAction"] == "apply" else False
    # preview_info is a tuple of (id, file_name(label), class_label, preview)
    previews = file_manager.scrub_files(saving_changes=saving_changes)
    # escape the html elements, only transforms preview[3], because that is
    # the text:
    previews = [
        [preview[0], preview[1], preview[2],
         general_functions.html_escape(preview[3])] for preview in previews]
    if saving_changes:
        utility.save_file_manager(file_manager)
    data = {"data": previews}
    data = json.dumps(data)
    return data
Example #48
0
def scrape():
    """scraps the urls an generates text file from each url.

    :return: json object with a string that indicates that is has succeeded
    """
    # Detect the number of active documents.
    num_active_docs = detect_active_docs()
    if request.method == "GET":
        return render_template('scrape.html', numActiveDocs=num_active_docs)
    if request.method == "POST":
        import requests
        urls = request.json["urls"]
        urls = urls.strip()
        urls = urls.replace(",", "\n")  # Replace commas with line breaks
        urls = re.sub(r"\s+", "\n", urls)  # Get rid of extra white space
        urls = urls.split("\n")
        file_manager = utility.load_file_manager()
        for i, url in enumerate(urls):
            r = requests.get(url)
            file_manager.add_upload_file(r.text, "url" + str(i) + ".txt")
        utility.save_file_manager(file_manager)
        response = "success"
        return json.dumps(response)
Example #49
0
def merge_documents():
    """:return: json object with the new file's id and preview
    """
    print("Merging...")
    file_manager = utility.load_file_manager()
    file_manager.disable_all()
    file_ids = request.json[0]
    new_name = request.json[1]
    source_file = request.json[2]
    milestone = request.json[3]
    end_milestone = re.compile(milestone + '$')
    new_file = ""
    for file_id in file_ids:
        new_file += file_manager.files[int(file_id)].load_contents()
        new_file += request.json[3]  # Add the milestone string
    new_file = re.sub(end_milestone, '', new_file)  # Strip the last milestone
    # The routine below is ugly, but it works
    file_id = file_manager.add_file(source_file, new_name, new_file)
    file_manager.files[file_id].name = new_name
    file_manager.files[file_id].label = new_name
    file_manager.files[file_id].active = True
    utility.save_file_manager(file_manager)
    # Returns a new fileID and some preview text
    return json.dumps([file_id, new_file[0:152] + '...'])
Example #50
0
def viz():
    """Handles the functionality on the alternate bubbleViz page.

    :return: a response object (often a render_template call) to flask and
    eventually to the browser.
    """
    # Detect the number of active documents.
    num_active_docs = detect_active_docs()
    file_manager = utility.load_file_manager()
    labels = file_manager.get_active_labels_with_id()
    from collections import OrderedDict
    from natsort import natsorted
    labels = OrderedDict(natsorted(labels.items(), key=lambda x: x[1]))
    if request.method == "GET":
        # "GET" request occurs when the page is first loaded.
        if 'cloudoption' not in session:
            session['cloudoption'] = constants.DEFAULT_CLOUD_OPTIONS
        if 'bubblevisoption' not in session:
            session['bubblevisoption'] = constants.DEFAULT_BUBBLEVIZ_OPTIONS
        return render_template(
            'viz.html',
            JSONObj="",
            labels=labels,
            itm="bubbleviz",
            numActiveDocs=num_active_docs)
    if request.method == "POST":
        # "POST" request occur when html form is submitted
        # (i.e. 'Get Dendrogram', 'Download...')
        # Legacy function
        # json_obj = utility.generateJSONForD3(file_manager, mergedSet=True)
        # Get the file manager, sorted labels, and tokenization options
        file_manager = utility.load_file_manager()
        if 'analyoption' not in session:
            session['analyoption'] = constants.DEFAULT_ANALYZE_OPTIONS
        token_type = session['analyoption']['tokenType']
        token_size = int(session['analyoption']['tokenSize'])
        # Limit docs to those selected or to active docs
        chosen_doc_ids = [int(x) for x in request.form.getlist('segmentlist')]
        active_docs = []
        if chosen_doc_ids:
            for file_id in chosen_doc_ids:
                active_docs.append(file_id)
        else:
            for l_file in file_manager.files.values():
                if l_file.active:
                    active_docs.append(l_file.id)
        # Get the contents of all selected/active docs
        all_contents = []
        for file_id in active_docs:
            if file_manager.files[file_id].active:
                content = file_manager.files[file_id].load_contents()
                all_contents.append(content)
        # Generate a DTM
        dtm, vocab = utility.simple_vectorizer(
            all_contents, token_type, token_size)
        # Convert the DTM to a pandas dataframe with the terms as column
        # headers
        import pandas as pd
        df = pd.DataFrame(dtm, columns=vocab)
        # Get the Minimum Token Length and Maximum Term Settings
        minimum_length = int(
            request.form['minlength']) if 'minlength' in request.form else 0
        if 'maxwords' in request.form:
            # Make sure there is a number in the input form
            check_for_value = request.form['maxwords']
            if check_for_value == "":
                max_num_words = 100
            else:
                max_num_words = int(request.form['maxwords'])
        # Filter words that don't meet the minimum length from the dataframe
        for term in vocab:
            if len(term) < minimum_length:
                del df[term]
        # Extract a dictionary of term count sums
        sums_dict = df.sum(axis=0).to_dict()
        # Create a new dataframe of sums and sort it by counts, then terms
        # Warning!!! This is not natsort. Multiple terms at the edge of
        # the maximum number of words limit may be cut off in abitrary
        # order. We need to implement natsort for dataframes.
        f = pd.DataFrame(list(sums_dict.items()), columns=['term', 'count'])
        f.sort_values(by=['count', 'term'], axis=0,
                      ascending=[False, True], inplace=True)
        # Convert the dataframe head to a dict for use below
        f = f.head(n=max_num_words).to_dict()
        # Build the JSON object for d3.js
        termslist = []
        countslist = []
        children = []
        for item in f['term'].items():
            termslist.append(item[1])
        for item in f['count'].items():
            countslist.append(item[1])
        for k, v in enumerate(termslist):
            children.append({"name": v, "size": str(countslist[k])})
        json_obj = {"name": "tokens", "children": children}
        # Turn the JSON object into a JSON string for the front end
        json_str = json.dumps(json_obj)
        session_manager.cache_cloud_option()
        session_manager.cache_bubble_viz_option()
        return render_template(
            'viz.html',
            JSONObj=json_str,
            labels=labels,
            itm="bubbleviz",
            numActiveDocs=num_active_docs)
Example #51
0
def content_analysis():
    """Handles the functionality on the contentanalysis page.

    :return: a response object (often a render_template call) to flask and
    eventually to the browser.
    """
    analysis = ContentAnalysisModel()
    path = os.path.join(constants.TMP_FOLDER,
                        constants.UPLOAD_FOLDER,
                        session['id'], 'content_analysis/')
    if os.path.isdir(path):
        dictionary_names = [name for name in os.listdir(path)]
    else:
        dictionary_names = []
    if request.method == 'GET':
        if 'dictionary_labels' in session:
            dict_labels = session['dictionary_labels']
        else:
            dict_labels = []
        if 'active_dictionaries' in session:
            active_dicts = session['active_dictionaries']
        else:
            active_dicts = [True] * len(dict_labels)
        if 'toggle_all_value' in session:
            toggle_all_value = session['toggle_all_value']
        else:
            toggle_all_value = True
        if 'formula' in session:
            formula = session['formula']
        else:
            formula = ""
        return render_template('contentanalysis.html',
                               dictionary_labels=dict_labels,
                               active_dictionaries=active_dicts,
                               toggle_all_value=toggle_all_value,
                               itm="content-analysis",
                               formula=formula)
    else:
        num_active_docs = detect_active_docs()
        active_dicts = ContentAnalysisReceiver().options_from_front_end(
        ).active_dicts
        dict_labels = ContentAnalysisReceiver().options_from_front_end(
        ).dict_labels
        session['formula'] = ContentAnalysisReceiver().options_from_front_end(
        ).formula
        if len(dict_labels) == 0:
            dict_labels = [os.path.splitext(dict_name)[0]
                           for dict_name in dictionary_names]
            active_dicts = [True] * len(dict_labels)
        num_active_dicts = active_dicts.count(True)
        if num_active_docs == 0 and num_active_dicts == 0:
            return error("At least 1 active document and 1 active "
                         "dictionary are required to perform a "
                         "content analysis.")
        elif num_active_docs == 0:
            return error("At least 1 active document is required to perform "
                         "a content analysis.")
        elif num_active_dicts == 0:
            return error("At least 1 active dictionary is required to perform"
                         " a content analysis.")
        file_manager = load_file_manager()
        active_files = file_manager.get_active_files()
        for file in active_files:
            analysis.add_file(file_name=file.name,
                              label=file.label,
                              content=file.load_contents())
        for dict_name, dict_label, active in zip(dictionary_names,
                                                 dict_labels,
                                                 active_dicts):
            if active:
                f = open(os.path.join(path, dict_name), "r")
                content = f.read()
                analysis.add_dictionary(file_name=dict_name,
                                        label=dict_label,
                                        content=content)
        result_table, corpus_raw_counts_table, files_raw_counts_tables,\
            formula_errors = analysis.analyze()
        if len(formula_errors) != 0 or result_table is None:
            return error(formula_errors)
        data = {"result_table": result_table,
                "dictionary_labels": dict_labels,
                "active_dictionaries": active_dicts,
                "corpus_raw_counts_table": corpus_raw_counts_table,
                "files_raw_counts_tables": files_raw_counts_tables,
                "error": False}
        return json.dumps(data)
Example #52
0
def word_cloud():
    """Handles the functionality on the visualisation page.

    a prototype for displaying single word cloud graphs.
    :return: a response object (often a render_template call) to flask and
    eventually to the browser.
    """
    # Detect the number of active documents.
    num_active_docs = detect_active_docs()
    file_manager = utility.load_file_manager()
    labels = file_manager.get_active_labels_with_id()
    from collections import OrderedDict
    labels = OrderedDict(natsorted(list(labels.items()), key=lambda x: x[1]))
    if request.method == "GET":
        # "GET" request occurs when the page is first loaded.
        if 'cloudoption' not in session:
            session['cloudoption'] = constants.DEFAULT_CLOUD_OPTIONS
        # there is no wordcloud option so we don't initialize that
        return render_template(
            'wordcloud.html',
            itm="word-cloud",
            labels=labels,
            numActiveDocs=num_active_docs)
    if request.method == "POST":
        # "POST" request occur when html form is submitted
        # (i.e. 'Get Dendrogram', 'Download...')
        # Get the file manager, sorted labels, and tokenization options
        file_manager = utility.load_file_manager()
        if 'analyoption' not in session:
            session['analyoption'] = constants.DEFAULT_ANALYZE_OPTIONS
        token_type = session['analyoption']['tokenType']
        token_size = int(session['analyoption']['tokenSize'])
        # Limit docs to those selected or to active docs
        chosen_doc_ids = [int(x) for x in request.form.getlist('segmentlist')]
        active_docs = []
        if chosen_doc_ids:
            for file_id in chosen_doc_ids:
                active_docs.append(file_id)
        else:
            for l_file in file_manager.files.values():
                if l_file.active:
                    active_docs.append(l_file.id)
        # Get the contents of all selected/active docs
        all_contents = []
        for file_id in active_docs:
            if file_manager.files[file_id].active:
                content = file_manager.files[file_id].load_contents()
                all_contents.append(content)
        # Generate a DTM
        dtm, vocab = utility.simple_vectorizer(
            all_contents, token_type, token_size)
        # Convert the DTM to a pandas dataframe and save the sums
        import pandas as pd
        df = pd.DataFrame(dtm)
        df = df.sum(axis=0)
        # Build the JSON object for d3.js
        json_obj = {"name": "tokens", "children": []}
        for k, v in enumerate(vocab):
            json_obj["children"].append({"name": v, "size": str(df[k])})
        # Create a list of column values for the word count table
        from operator import itemgetter
        terms = natsorted(
            json_obj["children"],
            key=itemgetter('size'),
            reverse=True)
        column_values = []
        for term in terms:
            # rows = [term["name"].encode('utf-8'), term["size"]]
            rows = [term["name"], term["size"]]
            column_values.append(rows)
        # Turn the JSON object into a JSON string for the front end
        json_obj = json.dumps(json_obj)
        session_manager.cache_cloud_option()
        return render_template(
            'wordcloud.html',
            labels=labels,
            JSONObj=json_obj,
            columnValues=column_values,
            itm="word-cloud",
            numActiveDocs=num_active_docs)
Example #53
0
def manage():
    """Handles the functionality of the select page.

    Its primary role is to activate/deactivate specific files depending on the
    user's input.
    :return: a response object (often a render_template call) to flask
    and eventually to the browser.
    """
    # Detect the number of active documents.
    num_active_docs = detect_active_docs()
    # Usual loading of the FileManager
    file_manager = utility.load_file_manager()
    if request.method == "GET":
        rows = file_manager.get_previews_of_all()
        for row in rows:
            if row["state"]:
                row["state"] = "selected"
            else:
                row["state"] = ""
        return render_template(
            'manage.html',
            rows=rows,
            itm="manage",
            numActiveDocs=num_active_docs)
    if 'previewTest' in request.headers:
        file_id = int(request.data)
        file_label = file_manager.files[file_id].label
        file_preview = file_manager.files[file_id].get_preview()
        preview_vals = {
            "id": file_id,
            "label": file_label,
            "previewText": file_preview}
        return json.dumps(preview_vals)
    if 'toggleFile' in request.headers:
        # Catch-all for any POST request.
        # On the select page, POSTs come from JavaScript AJAX XHRequests.
        file_id = int(request.data)
        # Toggle the file from active to inactive or vice versa
        file_manager.toggle_file(file_id)
    elif 'toggliFy' in request.headers:
        file_ids = request.data
        file_ids = file_ids.split(",")
        file_manager.disable_all()
        # Toggle the file from active to inactive or vice versa
        file_manager.enable_files(file_ids)

    elif 'setLabel' in request.headers:
        new_name = (request.headers['setLabel'])
        file_id = int(request.data)
        file_manager.files[file_id].set_name(new_name)
        file_manager.files[file_id].label = new_name
    elif 'setClass' in request.headers:
        new_class_label = (request.headers['setClass'])
        file_id = int(request.data)
        file_manager.files[file_id].set_class_label(new_class_label)
    elif 'disableAll' in request.headers:
        file_manager.disable_all()
    elif 'selectAll' in request.headers:
        file_manager.enable_all()
    elif 'applyClassLabel' in request.headers:
        file_manager.classify_active_files()
    elif 'deleteActive' in request.headers:
        file_manager.delete_active_files()
    elif 'deleteRow' in request.headers:
        # delete the file in request.form
        file_manager.delete_files(list(request.form.keys()))
    utility.save_file_manager(file_manager)
    return ''  # Return an empty string because you have to return something
Example #54
0
def download_scrub():
    """:return: the zip files needs to be downloaded."""
    file_manager = utility.load_file_manager()
    return file_manager.zip_active_files('scrubbed.zip')
Example #55
0
def do_multicloud():
    """:return: a json object with all the word counts
    """
    # Get the file manager, sorted labels, and tokenization options
    file_manager = utility.load_file_manager()
    if 'analyoption' not in session:
        session['analyoption'] = constants.DEFAULT_ANALYZE_OPTIONS
    token_type = session['analyoption']['tokenType']
    token_size = int(session['analyoption']['tokenSize'])
    # Limit docs to those selected or to active docs
    chosen_doc_ids = [
        int(x) for x in request.form.getlist('segmentlist')
    ]
    active_docs = []
    if chosen_doc_ids:
        for file_id in chosen_doc_ids:
            active_docs.append(file_id)
    else:
        for l_file in file_manager.files.values():
            if l_file.active:
                active_docs.append(l_file.id)
    # Get a sorted list of the labels for each selected doc
    labels = []
    for file_id in active_docs:
        labels.append(file_manager.files[file_id].label)
    labels = sorted(labels)
    # Get the contents of all selected/active docs
    all_contents = []
    for file_id in active_docs:
        if file_manager.files[file_id].active:
            content = file_manager.files[file_id].load_contents()
            all_contents.append(content)
    # Generate a DTM
    dtm, vocab = utility.simple_vectorizer(all_contents,
                                           token_type,
                                           token_size)
    # Convert the DTM to a pandas dataframe with terms
    # as column headers
    df = pd.DataFrame(dtm, columns=vocab)  # Automatically sorts terms
    # Create a dict for each document.
    # Format:
    # {0: [{u'term1': 1}, {u'term2': 0}], 1: [{u'term1': 1},
    # {u'term2': 0}]}
    docs = {}
    for i, row in df.iterrows():
        countslist = []
        for k, term in enumerate(sorted(vocab)):
            countslist.append({term: row[k]})
        docs[i] = countslist
    # Build the JSON object expected by d3.js
    json_obj = []
    for i, doc in enumerate(docs.items()):
        children = []
        # Convert simple json values to full json values: {u'a': 1} >
        # {'text': u'a', 'size': 1}
        for simple_values in doc[1]:
            for val in simple_values.items():
                values = {"text": val[0], "size": str(val[1])}
                # Append the new values to the children list
                children.append(values)
        # Append the new doc object to the JSON object
        json_obj.append({"name": labels[i], "children": children})
    # Replaces client-side array generator
    word_counts_array = []
    for doc in json_obj:
        name = doc["name"]
        children = doc["children"]
        word_counts = {}
        for item in children:
            word_counts[item["text"]] = item["size"]
        word_counts_array.append(
            {"name": name, "word_counts": word_counts,
                "words": children})
    # The front end needs a string in the response
    response = json.dumps([json_obj, word_counts_array])
    session_manager.cache_cloud_option()
    session_manager.cache_multi_cloud_options()
    return response