Example #1
0
def test_add_dictionary():
    test = ContentAnalysisModel()
    test.add_dictionary(file_name="dict1", label="dict1", content="test")
    assert test.dictionaries[0].name == "dict1"
    assert test.dictionaries[0].label == "dict1"
    assert test.dictionaries[0].content == ["test"]
    assert test.dictionaries[0].active
Example #2
0
def test_join_active_dicts():
    test = ContentAnalysisModel()
    test.add_dictionary(file_name="dict1.txt", label="dict1", content="test1")
    test.add_dictionary(file_name="dict2.txt", label="dict2", content="test2")
    joined_dicts = test.join_active_dicts()
    assert joined_dicts[0].dict_label == 'dict1'
    assert joined_dicts[0].content == 'test1'
    assert joined_dicts[1].dict_label == 'dict2'
    assert joined_dicts[1].content == 'test2'
Example #3
0
def test_dictionary_colors():
    test = ContentAnalysisModel()
    test.add_dictionary(file_name="dict1.txt", label="dict1", content="test")
    test.add_dictionary(file_name="dict2.txt", label="dict2", content="test2")
    colors = test.dictionary_colors
    assert isinstance(colors["dict1"], str) and len(colors["dict1"]) > 0
    assert isinstance(colors["dict2"], str) and len(colors["dict1"]) > 0
Example #4
0
def analyze():
    """ Analyzes the files.
    :return: The results of the analysis.
    """

    path = get_path()
    analysis = ContentAnalysisModel()
    file_manager = load_file_manager()
    active_files = file_manager.get_active_files()

    # Set the formula
    session["formula"] = ContentAnalysisReceiver() \
        .options_from_front_end().formula

    # Add the files to analyze
    for file in active_files:
        analysis.add_file(file_name=file.name,
                          label=file.label,
                          content=file.load_contents())

    # Add the dictionaries
    for name in os.listdir(path):
        analysis.add_dictionary(file_name=name,
                                label=name,
                                content=open(os.path.join(path, name),
                                             'r').read())

    # Analyze
    overview_results, overview_csv, corpus_results, corpus_csv, \
        document_results, errors = analysis.analyze()

    # Return the results
    if len(errors):
        return jsonify({"error": errors})

    if not len(corpus_results):
        return jsonify({"error": "Failed to perform the analysis."})

    return jsonify({
        "overview-table-head": overview_results[0],
        "overview-table-body": overview_results[1:],
        "overview-table-csv": overview_csv,
        "corpus-table-head": ["Dictionary", "Phrase", "Count"],
        "corpus-table-body": corpus_results,
        "corpus-table-csv": corpus_csv,
        "documents": document_results,
        "error": False
    })
Example #5
0
def test_count_words():
    test = ContentAnalysisModel()
    test.add_file(file_name="file1", label='file1', content='test')
    test.add_dictionary(file_name="dict1.txt", label="dict1", content="test")
    test.count()
    assert test.counters[0][0] == 1

    test = ContentAnalysisModel()
    test.add_file(file_name="file1", label='file1', content='test test test')
    test.add_dictionary(file_name="dict1.txt", label="dict1", content="test")
    test.count()
    assert test.counters[0][0] == 3

    test = ContentAnalysisModel()
    test.add_file(file_name="file1", label='file1', content='a test')
    test.add_dictionary(file_name="dict1.txt", label="dict1",
                        content="test, a")
    test.count()
    assert test.counters[0][0] == 2

    test = ContentAnalysisModel()
    test.add_file(file_name="file1", label='file1', content='a test')
    test.add_dictionary(file_name="dict1.txt", label="dict1",
                        content="test, a, a test")
    test.count()
    assert test.counters[0][0] == 1

    test = ContentAnalysisModel()
    test.add_file(file_name="file1", label='file1', content='a test test')
    test.add_dictionary(file_name="dict1.txt", label="dict1",
                        content="test, a, a test")
    test.count()
    assert test.counters[0][0] == 2

    test = ContentAnalysisModel()
    test.add_file(file_name="file1", label='file1', content='a test test a')
    test.add_dictionary(file_name="dict1.txt", label="dict1",
                        content="test, a, a test")
    test.count()
    assert test.counters[0][0] == 3
Example #6
0
def test_generate_scores():
    test = ContentAnalysisModel(TestOptions(formula=""))
    test.add_file(file_name="file1", label='file1', content='test')
    test.add_dictionary(file_name="dict1.txt", label="dict1", content="test")
    test.count()
    test.generate_scores()
    assert test.scores[0] == 0.0
    test.test_option = TestOptions(formula="[dict1]")
    test.save_formula()
    test.generate_scores()
    assert test.scores[0] == 1

    test = ContentAnalysisModel()
    test.add_file(file_name="file1", label='file1', content='test a')
    test.add_dictionary(file_name="dict1.txt", label="dict1", content="test")
    test.count()
    test.test_option = TestOptions(formula="[dict1]")
    test.save_formula()
    test.generate_scores()
    assert test.scores[0] == 0.5

    test = ContentAnalysisModel()
    test.add_file(file_name="file1", label='file1', content='a test')
    test.add_dictionary(file_name="dict1.txt", label="dict1", content="test")
    test.count()
    test.test_option = TestOptions(formula="[dict1]*2")
    test.save_formula()
    test.generate_scores()
    assert test.scores[0] == 1

    test = ContentAnalysisModel()
    test.add_file(file_name="file1", label='file1', content='a test a')
    test.add_dictionary(file_name="dict1.txt", label="dict1", content="test")
    test.count()
    test.test_option = TestOptions(formula="[dict1]")
    test.save_formula()
    test.generate_scores()
    assert test.scores[0] == round(1 / 3, 3)
Example #7
0
def test_analyze():
    test = ContentAnalysisModel()
    test.test_option = TestOptions(formula="[]")
    test.save_formula()
    test.add_file(file_name="file1", label='file1', content='test')
    test.add_dictionary(file_name="dict1.txt", label="dict1", content="test")
    test.add_dictionary(file_name="dict2.txt", label="dict2", content="test2")
    overview_results, overview_csv, corpus_results, corpus_csv, \
        document_results, errors = test.analyze()
    assert overview_results == ""
    assert isinstance(errors, str)
Example #8
0
def test_get_active_dicts():
    test = ContentAnalysisModel()
    test.add_dictionary(file_name="dict1.txt", label="dict1", content="test")
    test.add_dictionary(file_name="dict2.txt", label="dict2", content="test")
    active = test.get_active_dicts()
    assert len(active) == 2
Example #9
0
def test_generate_corpus_counts_table():
    test = ContentAnalysisModel()
    test.test_option = TestOptions(formula="[]")
    test.save_formula()
    test.add_file(file_name="file1", label='file1', content='test')
    test.add_dictionary(file_name="dict1.txt", label="dict1", content="test")
    test.add_dictionary(file_name="dict2.txt", label="dict2", content="test2")
    html_table = test.generate_corpus_results(test.count())
    assert html_table[0][0] == "dict1"
    assert html_table[-1][0] == "dict2"
Example #10
0
def test_generate_files_raw_counts_tables():
    test = ContentAnalysisModel()
    test.test_option = TestOptions(formula="[]")
    test.save_formula()
    test.add_file(file_name="file1", label='file1', content='test')
    test.add_dictionary(file_name="dict1.txt", label="dict1", content="test")
    test.add_dictionary(file_name="dict2.txt", label="dict2", content="test2")
    html_tables = test.generate_document_results(test.count())
    for html_table in html_tables:
        assert html_table["name"] == "file1"
Example #11
0
def test_check_formula():
    test = ContentAnalysisModel(TestOptions(formula="()sin(1)"))
    assert test.check_formula() == ""
    test.test_option = TestOptions(formula="(")
    test.save_formula()
    assert test.check_formula() == "Formula errors:<br>" \
                                   "Mismatched parenthesis<br>"
    test.test_option = TestOptions(formula="sin()")
    test.save_formula()
    assert test.check_formula() == "Formula errors:<br>" \
                                   "sin takes exactly one argument (0 given)" \
                                   "<br>"
    test.test_option = TestOptions(formula="cos()")
    test.save_formula()
    assert test.check_formula() == "Formula errors:<br>" \
                                   "cos takes exactly one argument (0 given)" \
                                   "<br>"
    test.test_option = TestOptions(formula="tan()")
    test.save_formula()
    assert test.check_formula() == "Formula errors:<br>" \
                                   "tan takes exactly one argument (0 given)" \
                                   "<br>"
    test.test_option = TestOptions(formula="log()")
    test.save_formula()
    assert test.check_formula() == "Formula errors:<br>" \
                                   "log takes exactly one argument (0 given)" \
                                   "<br>"
def test_generate_files_raw_counts_tables():
    test = ContentAnalysisModel()
    test.test_option = TestOptions(formula="[]")
    test.save_formula()
    test.add_file(file_name="file1", label='file1', content='test')
    test.add_dictionary(file_name="dict1.txt", label="dict1", content="test")
    test.add_dictionary(file_name="dict2.txt", label="dict2", content="test2")
    html_tables = test.generate_files_raw_counts_tables(
        test.count(), test.dictionary_colors)
    for html_table in html_tables:
        assert html_table.startswith("<table")
        assert html_table.endswith("</table>")
Example #13
0
def content_analysis():
    """Handles the functionality on the contentanalysis page.

    :return: a response object (often a render_template call) to flask and
    eventually to the browser.
    """
    analysis = ContentAnalysisModel()
    path = os.path.join(constants.TMP_FOLDER,
                        constants.UPLOAD_FOLDER,
                        session['id'], 'content_analysis/')
    if os.path.isdir(path):
        dictionary_names = [name for name in os.listdir(path)]
    else:
        dictionary_names = []
    if request.method == 'GET':
        if 'dictionary_labels' in session:
            dict_labels = session['dictionary_labels']
        else:
            dict_labels = []
        if 'active_dictionaries' in session:
            active_dicts = session['active_dictionaries']
        else:
            active_dicts = [True] * len(dict_labels)
        if 'toggle_all_value' in session:
            toggle_all_value = session['toggle_all_value']
        else:
            toggle_all_value = True
        if 'formula' in session:
            formula = session['formula']
        else:
            formula = ""
        return render_template('contentanalysis.html',
                               dictionary_labels=dict_labels,
                               active_dictionaries=active_dicts,
                               toggle_all_value=toggle_all_value,
                               itm="content-analysis",
                               formula=formula)
    else:
        num_active_docs = detect_active_docs()
        active_dicts = ContentAnalysisReceiver().options_from_front_end(
        ).active_dicts
        dict_labels = ContentAnalysisReceiver().options_from_front_end(
        ).dict_labels
        session['formula'] = ContentAnalysisReceiver().options_from_front_end(
        ).formula
        if len(dict_labels) == 0:
            dict_labels = [os.path.splitext(dict_name)[0]
                           for dict_name in dictionary_names]
            active_dicts = [True] * len(dict_labels)
        num_active_dicts = active_dicts.count(True)
        if num_active_docs == 0 and num_active_dicts == 0:
            return error("At least 1 active document and 1 active "
                         "dictionary are required to perform a "
                         "content analysis.")
        elif num_active_docs == 0:
            return error("At least 1 active document is required to perform "
                         "a content analysis.")
        elif num_active_dicts == 0:
            return error("At least 1 active dictionary is required to perform"
                         " a content analysis.")
        file_manager = load_file_manager()
        active_files = file_manager.get_active_files()
        for file in active_files:
            analysis.add_file(file_name=file.name,
                              label=file.label,
                              content=file.load_contents())
        for dict_name, dict_label, active in zip(dictionary_names,
                                                 dict_labels,
                                                 active_dicts):
            if active:
                f = open(os.path.join(path, dict_name), "r")
                content = f.read()
                analysis.add_dictionary(file_name=dict_name,
                                        label=dict_label,
                                        content=content)
        result_table, corpus_raw_counts_table, files_raw_counts_tables,\
            formula_errors = analysis.analyze()
        if len(formula_errors) != 0 or result_table is None:
            return error(formula_errors)
        data = {"result_table": result_table,
                "dictionary_labels": dict_labels,
                "active_dictionaries": active_dicts,
                "corpus_raw_counts_table": corpus_raw_counts_table,
                "files_raw_counts_tables": files_raw_counts_tables,
                "error": False}
        return json.dumps(data)
Example #14
0
def test_save_formula():
    test = ContentAnalysisModel(TestOptions(formula="√([dict1])^([dict2])"))
    test.save_formula()
    assert test._formula == "sqrt([dict1])**([dict2])"
def test_analyze():
    test = ContentAnalysisModel()
    test.test_option = TestOptions(formula="[]")
    test.save_formula()
    test.add_file(file_name="file1", label='file1', content='test')
    test.add_dictionary(file_name="dict1.txt", label="dict1", content="test")
    test.add_dictionary(file_name="dict2.txt", label="dict2", content="test2")
    result_table, individual_counts_table, files_raw_counts_tables, \
        formula_errors = test.analyze()
    assert result_table == ""
    assert isinstance(formula_errors, str)
    test.test_option = TestOptions(formula="[dict1]")
    test.save_formula()
    result_table, individual_counts_table, files_raw_counts_tables, \
        formula_errors = test.analyze()
    assert result_table == test.to_html()
    assert formula_errors == ""
Example #16
0
def test_is_secure():
    test = ContentAnalysisModel()
    test.add_dictionary(file_name="dict1.txt", label="dict1", content="test")
    test.add_dictionary(file_name="dict2.txt", label="dict2", content="test")
    test.test_option = TestOptions(formula="")
    test.save_formula()
    assert test.is_secure()
    test.test_option = TestOptions(formula="[dict1][dict2]")
    test.save_formula()
    assert test.is_secure()
    test.test_option = TestOptions(
        formula="0123456789 +-*/ () sin cos tan log sqrt")
    test.save_formula()
    assert test.is_secure()
    test.test_option = TestOptions(formula="os.system()")
    test.save_formula()
    assert test.is_secure() is False
Example #17
0
def test_to_data_frame():
    test = ContentAnalysisModel()
    test.add_file(file_name="file1", label='file1', content='test')
    test.add_file(file_name="file2", label='file2', content='other file')
    test.add_dictionary(file_name="dict1.txt", label="dict1", content="test")
    test.add_dictionary(file_name="dict2.txt", label="dict2", content="test")
    test.count()
    test.test_option = TestOptions(formula="")
    test.save_formula()
    test.generate_scores()
    test.generate_averages()
    assert isinstance(test.to_data_frame(), type(pd.DataFrame()))
Example #18
0
def test_generate_averages():
    test = ContentAnalysisModel()
    test.add_file(file_name="file1", label='file1', content='test')
    test.add_dictionary(file_name="dict1.txt", label="dict1", content="test")
    test.count()
    test.test_option = TestOptions(formula="0")
    test.save_formula()
    test.generate_scores()
    test.generate_averages()
    assert test.averages == [1.0, 0.0, 1.0, 0.0]

    test = ContentAnalysisModel()
    test.add_file(file_name="file1", label='file1', content='test')
    test.add_file(file_name="file2", label='file2', content='other file')
    test.add_dictionary(file_name="dict1.txt", label="dict1", content="test")
    test.count()
    test.test_option = TestOptions(formula="0")
    test.save_formula()
    test.generate_scores()
    test.generate_averages()
    assert test.averages == [0.5, 0.0, 1.5, 0.0]

    test.count()
    test.test_option = TestOptions(formula="4*[dict1]**2")
    test.save_formula()
    test.generate_scores()
    test.generate_averages()
    assert test.averages == [0.5, 2.0, 1.5, 2.0]
Example #19
0
def test_add_corpus():
    test = ContentAnalysisModel()
    test.add_file(file_name="file1", label='file1', content='test')
    assert test.corpus[0].name == "file1"
    assert test.corpus[0].label == "file1"
    assert test.corpus[0].content == "test"