Ejemplo n.º 1
0
def test_cpp_lf_line_length_measures_returns_correct_measures():
    unigrams = _create_unigrams([SOURCE_CODE_1])
    cpp_lf = CppLexicalFeatures([SOURCE_CODE_1], [], unigrams)

    line_length_measures = cpp_lf.line_length_measures(SOURCE_CODE_1)

    assert line_length_measures == [29.928571428571427, 13.071038245538359]
Ejemplo n.º 2
0
def test_cpp_lf_function_parameters_measures_returns_correct_measures():
    unigrams = _create_unigrams([SOURCE_CODE_9])
    cpp_lf = CppLexicalFeatures([SOURCE_CODE_9], [], unigrams)

    function_parameters_measures = cpp_lf.function_parameters_measures(SOURCE_CODE_9)

    assert function_parameters_measures == [2, 0]
Ejemplo n.º 3
0
def test_cpp_lf_tokens_returns_correct_number_of_tokens():
    unigrams = _create_unigrams([SOURCE_CODE_5])
    cpp_lf = CppLexicalFeatures([SOURCE_CODE_5], [], unigrams)

    tokens_count = cpp_lf.tokens(SOURCE_CODE_5)

    assert tokens_count == math.log(6. / len(SOURCE_CODE_5))
Ejemplo n.º 4
0
def test_cpp_lf_macros_returns_correct_number_of_macros():
    unigrams = _create_unigrams([SOURCE_CODE_8])
    cpp_lf = CppLexicalFeatures([SOURCE_CODE_8], [], unigrams)

    macros_count = cpp_lf.macros(SOURCE_CODE_8)

    assert macros_count == math.log(3. / len(SOURCE_CODE_8))
Ejemplo n.º 5
0
def test_cpp_lf_literals_returns_tabscorrect_number_of_comments():
    unigrams = _create_unigrams([SOURCE_CODE_8])
    cpp_lf = CppLexicalFeatures([SOURCE_CODE_8], [], unigrams)

    literals_count = cpp_lf.literals(SOURCE_CODE_8)

    assert literals_count == math.log(2. / len(SOURCE_CODE_8))
Ejemplo n.º 6
0
def test_cpp_lf_functions_returns_correct_number_of_functions():
    unigrams = _create_unigrams([SOURCE_CODE_9])
    cpp_lf = CppLexicalFeatures([SOURCE_CODE_9], [], unigrams)

    function_count = cpp_lf.functions(SOURCE_CODE_9)

    assert function_count == math.log(3. / len(SOURCE_CODE_9))
Ejemplo n.º 7
0
def test_cpp_lf_comments_returns_correct_number_of_comments():
    unigrams = _create_unigrams([SOURCE_CODE_7])
    cpp_lf = CppLexicalFeatures([SOURCE_CODE_7], [], unigrams)

    comments_count = cpp_lf.comments(SOURCE_CODE_7)

    assert comments_count == math.log(2. / len(SOURCE_CODE_7))
Ejemplo n.º 8
0
def test_cpp_lf_ternary_operator_number_returns_correct_number_of_ternary_op():
    unigrams = _create_unigrams([SOURCE_CODE_6])
    cpp_lf = CppLexicalFeatures([SOURCE_CODE_6], [], unigrams)

    ternary_count = cpp_lf.ternary_operators(SOURCE_CODE_6)

    assert ternary_count == math.log(2. / len(SOURCE_CODE_6))
Ejemplo n.º 9
0
def test_cpp_lf_keyword_returns_number_of_keyword_occured():
    unigrams = _create_unigrams([SOURCE_CODE_3])
    cpp_lf = CppLexicalFeatures([SOURCE_CODE_3], [], unigrams)

    keywords_count = cpp_lf.keywords(SOURCE_CODE_3)

    assert keywords_count == math.log(8. / len(SOURCE_CODE_3))
Ejemplo n.º 10
0
def test_cpp_lf_unigram_features_returns_correct_freq_of_unigrams():
    unigrams = _create_unigrams([SOURCE_CODE_2])
    cpp_lf = CppLexicalFeatures([SOURCE_CODE_2], [], unigrams)

    freq = cpp_lf.unigram_features(SOURCE_CODE_2)

    assert freq == [1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1]
Ejemplo n.º 11
0
def test_cpp_lf_variable_names_returns_correct_freq_of_variable_names():
    g_variable_names = _get_variable_names([SOURCE_CODE_13])
    cpp_lf = CppLexicalFeatures([SOURCE_CODE_13], [], variable_names=g_variable_names)
    

    variable_freq = cpp_lf.variable_freq(SOURCE_CODE_13)

    assert variable_freq == [4, 1, 1, 2, 2]
Ejemplo n.º 12
0
def test_cpp_lf_keywords_freq_returns_correct_freq():
    unigrams = _create_unigrams([SOURCE_CODE_1])
    cpp_lf = CppLexicalFeatures([SOURCE_CODE_1], [], unigrams)

    freq = cpp_lf.keyword_freq(SOURCE_CODE_1)

    assert freq == [math.log(1. / len(SOURCE_CODE_1)), math.log(1. / len(SOURCE_CODE_1)), 
                    math.log(1. / len(SOURCE_CODE_1)), math.log(1. / len(SOURCE_CODE_1)), 
                    math.log(1. / len(SOURCE_CODE_1)), math.log(1. / len(SOURCE_CODE_1)), 
                    math.log(1. / len(SOURCE_CODE_1))]
Ejemplo n.º 13
0
def test_cpp_lf_operators_returns_correct_number_of_operators():
    cpp_lf = CppLexicalFeatures([SOURCE_CODE_14], [])

    operators = cpp_lf.operators(SOURCE_CODE_14)

    print operators

    assert operators == math.log(4. / len(SOURCE_CODE_14))



# def test_cpp_sf_average_node_depth_returns_correct_avg_depths():
#     cpp_sf = CppSyntacticFeatures([], [])

#     avg_depths = cpp_sf.average_node_depth(AST_NODES_1)

#     assert avg_depths == [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 3, 0, 0, 0, 
#                           0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 3, 0, 4, 0, 0, 0, 0, 2]


# def test_cpp_sf_max_node_depth_returns_correct_depth():
#     cpp_sf = CppSyntacticFeatures([], [])

#     max_depth = cpp_sf.maximum_node_depth(AST_NODES_1)

#     assert max_depth == 4


# def test_cpp_sf_keywords_returns_correct_keywords_freq():
#     cpp_sf = CppSyntacticFeatures([], [])

#     freq = cpp_sf.keywords(AST_NODES_2)

#     assert freq == [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
#                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 
#                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


# def test_cpp_sf_leaf_values_freq_returns_correct_freq():
#     lv = get_leaf_values([AST_NODES_1])
#     cpp_sf = CppSyntacticFeatures([AST_NODES_1], [], lv)

#     freq = cpp_sf.leaf_values_freq(AST_NODES_1)

#     assert freq == [1, 2, 1, 1, 1, 1, 1]


# def test_cpp_sf_leaf_values_depths_returns_correct_avg_depth():
#     lv = get_leaf_values([AST_NODES_1])
#     cpp_sf = CppSyntacticFeatures([AST_NODES_1], [], lv)

#     avg_depths = cpp_sf.leaf_values_avg_depth(AST_NODES_1)

#     assert avg_depths == [4.0, 5.0, 5.0, 5.0, 2.0, 5.0, 5.0]


# def test_cpp_sf_inverse_leaf_values_returns_correct_tfidf():
#     lv = get_leaf_values([AST_NODES_1, AST_NODES_2])
#     aplv = authors_per_leaf_node([AST_NODES_1, AST_NODES_2])
#     cpp_sf = CppSyntacticFeatures([AST_NODES_1, AST_NODES_2], [], lv, aplv)

#     inverse = cpp_sf.inverse_leaf_values_freq(AST_NODES_1)

#     assert inverse == [2, 2, 1, 1, 2, 1, 2]


# def test_cpp_sf_node_type_freq_returns_correct_tf():
#     cpp_sf = CppSyntacticFeatures([AST_NODES_1], [])

#     node_type_freq = cpp_sf.node_type_freq(AST_NODES_1)

#     assert node_type_freq == [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#                               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 
#                               0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0]

# def test_cpp_sf_node_type_freq_inverse_returns_correct_tfidf():
#     cpp_sf = CppSyntacticFeatures([AST_NODES_1], [])

#     node_type_freq_inv = cpp_sf.node_type_freq_inverse(AST_NODES_1)

#     assert node_type_freq_inv == [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#                                   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 
#                                   0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0]