def call_gspan_from_library(file_path):
    """ Runs the gSpan algorithm to find frequent subgraphs """
    from gspan_mining.config import parser
    from gspan_mining.gspan import gSpan

    # args_str = ' -s ' + str(s) + ' -l ' + str(l) + ' -u 4 -v False ' + '-p ' + str(plot) + ' ' + file_path
    old_stdout = sys.stdout
    redirected_output = sys.stdout = StringIO()

    args_str = file_path + ' -l 3 ' + '-s 5000'
    FLAGS, _ = parser.parse_known_args(args=args_str.split())
    gs = gSpan(
        database_file_name=FLAGS.database_file_name,
        min_support=FLAGS.min_support,
        min_num_vertices=FLAGS.lower_bound_of_num_vertices,
        max_num_vertices=FLAGS.upper_bound_of_num_vertices,
        max_ngraphs=FLAGS.num_graphs,
        is_undirected=(not FLAGS.directed),
        verbose=FLAGS.verbose,
        visualize=FLAGS.plot,
        where=FLAGS.where
    )

    gs.run()
    name = file_path.split('/')[-1]
    with open(SUGRAPHS_PATH + name.replace('.txt', '_frequent_subgraphs.txt'), 'w') as f:
        f.write(redirected_output.getvalue())
    sys.stdout = old_stdout
Beispiel #2
0
def test(ds_name, minSup, params='', random=False, graphlet=False, cork=False):
    if len(helper.tests_run) == 0:
        if random:
            helper.tests_run += "random"
        if graphlet:
            helper.tests_run += "graphlet"
        if cork:
            helper.tests_run += "cork"

    print_info('\n-----[BEGIN]-----\n')
    dataset = TUDataset(root='./tmp/{}'.format(ds_name), name=ds_name)
    print_info('Starting Tests with dataset: {}, containing {} Graphs'.format(
        ds_name, len(dataset)))

    is_directed = dataset[0].is_directed()
    # create the run arguments for gSpan-python
    cwd = os.getcwd()
    f_name = '{}.data.txt'.format(ds_name)
    f_path = path.join(cwd, 'graphs', f_name)
    args_str = '--min_support {min} --directed {directed} {params} --verbose FALSE --where TRUE {ds}'.format(
        ds=f_path,
        min=int(len(dataset) * minSup),
        directed=is_directed,
        params=params)
    print_info('Running with params: {}'.format(args_str))
    FLAGS, _ = parser.parse_known_args(args=args_str.split())

    # mine with gSpan
    print_info(
        "Starting mining with gSpan-Algorithm and minSup sigma = {}%".format(
            minSup * 100))
    gs = main(FLAGS)
    _report = gs._report_df

    print_info("\nFinished mining. Found {} freq. subgraphs.".format(
        len(_report)))

    _freq = []
    sum_edges = 0
    sum_vertices = 0
    for dfs in _report['dfs']:
        _freq.append(dfs)
        sum_edges += len(dfs)
        sum_vertices += dfs.get_num_vertices()
    print_info("Durchschnitt Knoten: {}, Durchschnitt Kanten: {}\n".format(
        sum_vertices / len(_freq), sum_edges / len(_freq)))

    _desc = [desc for desc in _report['description']]

    # get info needed for testing
    ds_graph_classes = dataset.data.y.tolist()  # graph classes
    isomorph_graphs = [gids for gids in _report['isomorph_graphs']]

    # perform test
    _tests = tests.Tests(_freq, isomorph_graphs, ds_graph_classes, _desc)
    _tests.run(random, graphlet, cork)

    print_info('\n-----[END]-----\n')
def generate_subgraphs_one_core(gspan_file_name,
                                s,
                                gspan_data_folder,
                                l=3,
                                plot=False):
    filepath = gspan_data_folder + gspan_file_name + '.txt'
    args_str = ' -s ' + str(s) + ' -l ' + str(
        l) + ' -u 4 -v False ' + '-p ' + str(plot) + ' ' + filepath
    FLAGS, _ = parser.parse_known_args(args=args_str.split())
    gs = gSpan(database_file_name=FLAGS.database_file_name,
               min_support=FLAGS.min_support,
               min_num_vertices=FLAGS.lower_bound_of_num_vertices,
               max_num_vertices=FLAGS.upper_bound_of_num_vertices,
               max_ngraphs=FLAGS.num_graphs,
               is_undirected=(not FLAGS.directed),
               verbose=FLAGS.verbose,
               visualize=FLAGS.plot,
               where=FLAGS.where)

    gs.run()
    report = gs._report_df
    gs = None
    return report
Beispiel #4
0
		else:
			print("ENABLEMENT:")
			print(pm,'\n')	
		
		l_sol = solutions(l_sol,pm)

		stc = stc + 1
		s = s + 1

	# il file viene chiuso esternamente alla funzione per evitare sovrascritture dei dati di input 
	sub_graph.write("t # -1")	
	sub_graph.write("\n")		
	sub_graph.close()

	# richiamo gSpan e visualizzazione grafi, per ogni enablement del doc considerato
	args_str = '-d True /Users/filippo/Desktop/UNIMI/Tesi/inputgSpan_sentence.data'
	FLAGS, _ = parser.parse_known_args(args=args_str.split())
	gs = main(FLAGS)

	for g in gs.graphs.values():
		g.plot()

	print('\n')

	match_caus(l_sol)