コード例 #1
0
ファイル: test_graph.py プロジェクト: task4233/malgrapher
def test_get_graph_embeddings():
    g1 = Graph('ls', convert_dot_to_networkx('./out/ls.dot'))
    assert g1.graph is not None
    g2 = Graph('cat', convert_dot_to_networkx('./out/cat.dot'))
    assert g2.graph is not None
    graphs = [g1, g2]

    model = get_graph_embeddings(graphs)
    assert type(model) is Doc2Vec
コード例 #2
0
ファイル: get_diff.py プロジェクト: task4233/malgrapher
if __name__ == "__main__":
    datasets = get_datasets(dir='/bin/')

    graphs = []
    for dataset in datasets:
        dataset.set_dot_file_path(
            generate_dot_by_radare2(dataset.bin_file_path,
                                    dataset.sample_name))
        if not os.path.exists(dataset.dot_file_path):
            print(f'{dataset.dot_file_path} is not created.')
            print('Please check the reason manually...')
            continue
        graphs.append(
            Graph(dataset.sample_name,
                  convert_dot_to_networkx(dataset.dot_file_path)))

    model = get_graph_embeddings(graphs)

    results = pd.DataFrame({},
                           columns=[
                               '1st Similar file', '1st Similarity Score',
                               '2nd Similar file', '2nd Similarity Score',
                               '3rd Similar file', '3rd Similarity Score'
                           ])

    for graph in graphs:
        print(f'get files which are similar to {graph.name}')
        sim_files = model.dv.most_similar(f'g_{graph.name}')
        print(sim_files)
        print('-----------------------------------------------')
コード例 #3
0
ファイル: test_graph.py プロジェクト: task4233/malgrapher
def test_get_graph_embeddings_with_single_graph():
    G = convert_dot_to_networkx(test_file_path)
    assert G is not None

    with pytest.raises(AssertionError):
        get_graph_embeddings(G)
コード例 #4
0
def test_convert_to_networkx():
    dot_file_path = './out/ls.dot'
    G = convert_dot_to_networkx(dot_file_path)
    assert G is not None
コード例 #5
0
ファイル: test_edge.py プロジェクト: task4233/malgrapher
def test_get_edge_embeddings():
    G = convert_dot_to_networkx(test_file_path)
    assert G is not None

    wv = get_edge_embeddings(G)
    assert type(wv) is HadamardEmbedder
コード例 #6
0
def test_convert_to_networkx_with_empty_saved_path():
    dot_file_path = './out/ls.dot'
    saved_file_path = './out/ls_cfg.png'
    G = convert_dot_to_networkx(dot_file_path, saved_file_path)
    assert G is not None
コード例 #7
0
def test_convert_to_networkx_with_empty_dotfile_path():
    with pytest.raises(FileNotFoundError):
        convert_dot_to_networkx('')
コード例 #8
0

def get_edge_embeddings(G: Union[MultiDiGraph, MultiGraph],
                        debug: bool = False) -> HadamardEmbedder:
    if G is None:
        raise TypeError(' A type of G must be MultiDiGraph or MultiGraph')

    wv = None
    try:
        wv = HadamardEmbedder(keyed_vectors=get_node_embedings(G))
    except Exception as e:
        raise e

    if debug:
        for idxI in range(len(G.nodes())):
            for idxJ in range(idxI):
                print('similar vector: ', (idxI, idxJ))
                print('similar_edge', wv.most_similar((idxI, idxJ)))
                print('')

    return wv


if __name__ == '__main__':
    try:
        G = convert_dot_to_networkx('./out/ls.dot')
        embeds = get_edge_embeddings(G)

    except Exception:
        print(traceback.format_exc())
コード例 #9
0
                    min_count=min_count)
    return model


def save_embeddings(output_path: str, model: Doc2Vec,
                    graphs: List[TaggedDocument], dimensions: int):
    if not os.path.exists(output_path):
        with open(output_path, 'wb'):
            pass

    out = []
    for graph in graphs:
        out.append([graph.name] + list(model.dv["g_" + graph.name]))

    column_names = ["types"] + ["x_" + str(dim) for dim in range(dimensions)]
    out = pd.DataFrame(out, columns=column_names)
    out.to_csv(output_path, index=None)


if __name__ == "__main__":
    dimensions = 128

    try:
        g1 = Graph('ls', convert_dot_to_networkx('./out/ls.dot'))
        g2 = Graph('cat', convert_dot_to_networkx('./out/cat.dot'))
        graphs = [g1, g2]
        model = get_graph_embeddings(graphs, dimensions=dimensions)
        save_embeddings('./out/ls.model', model, graphs, dimensions)
    except Exception:
        print(traceback.format_exc())
コード例 #10
0
def test_get_node_embeddings():
    G = convert_dot_to_networkx(test_file_path)
    assert G is not None

    wv = get_node_embedings(G)
    assert type(wv) is KeyedVectors