Exemple #1
0
def remove_best_graph(ds_name,
                      mpg_options,
                      kernel_options,
                      ged_options,
                      mge_options,
                      save_results=True,
                      save_medians=True,
                      plot_medians=True,
                      load_gm='auto',
                      dir_save='',
                      irrelevant_labels=None,
                      edge_required=False,
                      cut_range=None):
    """Remove the best graph from the median set w.r.t. distance in kernel space, and to see if it is possible to generate the removed graph using the graphs left in the median set.
	"""
    # 1. get dataset.
    print('1. getting dataset...')
    dataset_all = Dataset()
    dataset_all.load_predefined_dataset(ds_name)
    dataset_all.trim_dataset(edge_required=edge_required)
    if irrelevant_labels is not None:
        dataset_all.remove_labels(**irrelevant_labels)
    if cut_range is not None:
        dataset_all.cut_graphs(cut_range)
    datasets = split_dataset_by_target(dataset_all)

    if save_results:
        # create result files.
        print('creating output files...')
        fn_output_detail, fn_output_summary = __init_output_file(
            ds_name, kernel_options['name'], mpg_options['fit_method'],
            dir_save)
    else:
        fn_output_detail, fn_output_summary = None, None

    # 2. compute/load Gram matrix a priori.
    print('2. computing/loading Gram matrix...')
    gram_matrix_unnorm_list, time_precompute_gm_list = __get_gram_matrix(
        load_gm, dir_save, ds_name, kernel_options, datasets)

    sod_sm_list = []
    sod_gm_list = []
    dis_k_sm_list = []
    dis_k_gm_list = []
    dis_k_gi_min_list = []
    time_optimize_ec_list = []
    time_generate_list = []
    time_total_list = []
    itrs_list = []
    converged_list = []
    num_updates_ecc_list = []
    mge_decrease_order_list = []
    mge_increase_order_list = []
    mge_converged_order_list = []
    nb_sod_sm2gm = [0, 0, 0]
    nb_dis_k_sm2gm = [0, 0, 0]
    nb_dis_k_gi2sm = [0, 0, 0]
    nb_dis_k_gi2gm = [0, 0, 0]
    dis_k_max_list = []
    dis_k_min_list = []
    dis_k_mean_list = []
    best_dis_list = []
    print('starting experiment for each class of target...')
    idx_offset = 0
    for idx, dataset in enumerate(datasets):
        target = dataset.targets[0]
        print('\ntarget =', target, '\n')
        #		if target != 1:
        # 			continue

        num_graphs = len(dataset.graphs)
        if num_graphs < 2:
            print('\nnumber of graphs = ', num_graphs, ', skip.\n')
            idx_offset += 1
            continue

        # 3. get the best graph and remove it from median set.
        print('3. getting and removing the best graph...')
        gram_matrix_unnorm = gram_matrix_unnorm_list[idx - idx_offset]
        best_index, best_dis, best_graph = __get_best_graph(
            [g.copy() for g in dataset.graphs],
            normalize_gram_matrix(gram_matrix_unnorm.copy()))
        median_set_new = [
            dataset.graphs[i] for i in range(len(dataset.graphs))
            if i != best_index
        ]
        num_graphs -= 1
        if num_graphs == 1:
            continue
        best_dis_list.append(best_dis)

        dataset.load_graphs(median_set_new, targets=None)
        gram_matrix_unnorm_new = np.delete(gram_matrix_unnorm,
                                           best_index,
                                           axis=0)
        gram_matrix_unnorm_new = np.delete(gram_matrix_unnorm_new,
                                           best_index,
                                           axis=1)

        # 4. set parameters.
        print('4. initializing mpg and setting parameters...')
        mpg_options['gram_matrix_unnorm'] = gram_matrix_unnorm_new
        mpg_options['runtime_precompute_gm'] = time_precompute_gm_list[
            idx - idx_offset]
        mpg = MedianPreimageGenerator()
        mpg.dataset = dataset
        mpg.set_options(**mpg_options.copy())
        mpg.kernel_options = kernel_options.copy()
        mpg.ged_options = ged_options.copy()
        mpg.mge_options = mge_options.copy()

        # 5. compute median preimage.
        print('5. computing median preimage...')
        mpg.run()
        results = mpg.get_results()

        # 6. compute pairwise kernel distances.
        print('6. computing pairwise kernel distances...')
        _, dis_k_max, dis_k_min, dis_k_mean = mpg.graph_kernel.compute_distance_matrix(
        )
        dis_k_max_list.append(dis_k_max)
        dis_k_min_list.append(dis_k_min)
        dis_k_mean_list.append(dis_k_mean)

        # 7. save results (and median graphs).
        print('7. saving results (and median graphs)...')
        # write result detail.
        if save_results:
            print('writing results to files...')
            sod_sm2gm = get_relations(
                np.sign(results['sod_gen_median'] - results['sod_set_median']))
            dis_k_sm2gm = get_relations(
                np.sign(results['k_dis_gen_median'] -
                        results['k_dis_set_median']))
            dis_k_gi2sm = get_relations(
                np.sign(results['k_dis_set_median'] -
                        results['k_dis_dataset']))
            dis_k_gi2gm = get_relations(
                np.sign(results['k_dis_gen_median'] -
                        results['k_dis_dataset']))

            f_detail = open(dir_save + fn_output_detail, 'a')
            csv.writer(f_detail).writerow([
                ds_name,
                kernel_options['name'],
                ged_options['edit_cost'],
                ged_options['method'],
                ged_options['attr_distance'],
                mpg_options['fit_method'],
                num_graphs,
                target,
                1,
                results['sod_set_median'],
                results['sod_gen_median'],
                results['k_dis_set_median'],
                results['k_dis_gen_median'],
                results['k_dis_dataset'],
                best_dis,
                best_index,
                sod_sm2gm,
                dis_k_sm2gm,
                dis_k_gi2sm,
                dis_k_gi2gm,
                results['edit_cost_constants'],
                results['runtime_precompute_gm'],
                results['runtime_optimize_ec'],
                results['runtime_generate_preimage'],
                results['runtime_total'],
                results['itrs'],
                results['converged'],
                results['num_updates_ecc'],
                results['mge']['num_decrease_order'] >
                0,  # @todo: not suitable for multi-start mge
                results['mge']['num_increase_order'] > 0,
                results['mge']['num_converged_descents'] > 0
            ])
            f_detail.close()

            # compute result summary.
            sod_sm_list.append(results['sod_set_median'])
            sod_gm_list.append(results['sod_gen_median'])
            dis_k_sm_list.append(results['k_dis_set_median'])
            dis_k_gm_list.append(results['k_dis_gen_median'])
            dis_k_gi_min_list.append(results['k_dis_dataset'])
            time_precompute_gm_list.append(results['runtime_precompute_gm'])
            time_optimize_ec_list.append(results['runtime_optimize_ec'])
            time_generate_list.append(results['runtime_generate_preimage'])
            time_total_list.append(results['runtime_total'])
            itrs_list.append(results['itrs'])
            converged_list.append(results['converged'])
            num_updates_ecc_list.append(results['num_updates_ecc'])
            mge_decrease_order_list.append(
                results['mge']['num_decrease_order'] > 0)
            mge_increase_order_list.append(
                results['mge']['num_increase_order'] > 0)
            mge_converged_order_list.append(
                results['mge']['num_converged_descents'] > 0)
            # # SOD SM -> GM
            if results['sod_set_median'] > results['sod_gen_median']:
                nb_sod_sm2gm[0] += 1
    #			repeats_better_sod_sm2gm.append(1)
            elif results['sod_set_median'] == results['sod_gen_median']:
                nb_sod_sm2gm[1] += 1
            elif results['sod_set_median'] < results['sod_gen_median']:
                nb_sod_sm2gm[2] += 1
            # # dis_k SM -> GM
            if results['k_dis_set_median'] > results['k_dis_gen_median']:
                nb_dis_k_sm2gm[0] += 1
    #			repeats_better_dis_k_sm2gm.append(1)
            elif results['k_dis_set_median'] == results['k_dis_gen_median']:
                nb_dis_k_sm2gm[1] += 1
            elif results['k_dis_set_median'] < results['k_dis_gen_median']:
                nb_dis_k_sm2gm[2] += 1
            # # dis_k gi -> SM
            if results['k_dis_dataset'] > results['k_dis_set_median']:
                nb_dis_k_gi2sm[0] += 1
    #			repeats_better_dis_k_gi2sm.append(1)
            elif results['k_dis_dataset'] == results['k_dis_set_median']:
                nb_dis_k_gi2sm[1] += 1
            elif results['k_dis_dataset'] < results['k_dis_set_median']:
                nb_dis_k_gi2sm[2] += 1
            # # dis_k gi -> GM
            if results['k_dis_dataset'] > results['k_dis_gen_median']:
                nb_dis_k_gi2gm[0] += 1
    #			repeats_better_dis_k_gi2gm.append(1)
            elif results['k_dis_dataset'] == results['k_dis_gen_median']:
                nb_dis_k_gi2gm[1] += 1
            elif results['k_dis_dataset'] < results['k_dis_gen_median']:
                nb_dis_k_gi2gm[2] += 1

            # write result summary for each letter.
            f_summary = open(dir_save + fn_output_summary, 'a')
            csv.writer(f_summary).writerow([
                ds_name,
                kernel_options['name'],
                ged_options['edit_cost'],
                ged_options['method'],
                ged_options['attr_distance'],
                mpg_options['fit_method'],
                num_graphs,
                target,
                results['sod_set_median'],
                results['sod_gen_median'],
                results['k_dis_set_median'],
                results['k_dis_gen_median'],
                results['k_dis_dataset'],
                best_dis,
                best_index,
                sod_sm2gm,
                dis_k_sm2gm,
                dis_k_gi2sm,
                dis_k_gi2gm,
                results['runtime_precompute_gm'],
                results['runtime_optimize_ec'],
                results['runtime_generate_preimage'],
                results['runtime_total'],
                results['itrs'],
                results['converged'],
                results['num_updates_ecc'],
                results['mge']['num_decrease_order'] >
                0,  # @todo: not suitable for multi-start mge
                results['mge']['num_increase_order'] > 0,
                results['mge']['num_converged_descents'] > 0,
                nb_sod_sm2gm,
                nb_dis_k_sm2gm,
                nb_dis_k_gi2sm,
                nb_dis_k_gi2gm
            ])
            f_summary.close()

        # save median graphs.
        if save_medians:
            if not os.path.exists(dir_save + 'medians/'):
                os.makedirs(dir_save + 'medians/')
            print('Saving median graphs to files...')
            fn_pre_sm = dir_save + 'medians/set_median.' + mpg_options[
                'fit_method'] + '.nbg' + str(num_graphs) + '.y' + str(
                    target) + '.repeat' + str(1)
            saveGXL(mpg.set_median,
                    fn_pre_sm + '.gxl',
                    method='default',
                    node_labels=dataset.node_labels,
                    edge_labels=dataset.edge_labels,
                    node_attrs=dataset.node_attrs,
                    edge_attrs=dataset.edge_attrs)
            fn_pre_gm = dir_save + 'medians/gen_median.' + mpg_options[
                'fit_method'] + '.nbg' + str(num_graphs) + '.y' + str(
                    target) + '.repeat' + str(1)
            saveGXL(mpg.gen_median,
                    fn_pre_gm + '.gxl',
                    method='default',
                    node_labels=dataset.node_labels,
                    edge_labels=dataset.edge_labels,
                    node_attrs=dataset.node_attrs,
                    edge_attrs=dataset.edge_attrs)
            fn_best_dataset = dir_save + 'medians/g_best_dataset.' + mpg_options[
                'fit_method'] + '.nbg' + str(num_graphs) + '.y' + str(
                    target) + '.repeat' + str(1)
            saveGXL(best_graph,
                    fn_best_dataset + '.gxl',
                    method='default',
                    node_labels=dataset.node_labels,
                    edge_labels=dataset.edge_labels,
                    node_attrs=dataset.node_attrs,
                    edge_attrs=dataset.edge_attrs)
            fn_best_median_set = dir_save + 'medians/g_best_median_set.' + mpg_options[
                'fit_method'] + '.nbg' + str(num_graphs) + '.y' + str(
                    target) + '.repeat' + str(1)
            saveGXL(mpg.best_from_dataset,
                    fn_best_median_set + '.gxl',
                    method='default',
                    node_labels=dataset.node_labels,
                    edge_labels=dataset.edge_labels,
                    node_attrs=dataset.node_attrs,
                    edge_attrs=dataset.edge_attrs)

        # plot median graphs.
        if plot_medians and save_medians:
            if ged_options['edit_cost'] == 'LETTER2' or ged_options[
                    'edit_cost'] == 'LETTER' or ds_name == 'Letter-high' or ds_name == 'Letter-med' or ds_name == 'Letter-low':
                draw_Letter_graph(mpg.set_median, fn_pre_sm)
                draw_Letter_graph(mpg.gen_median, fn_pre_gm)
                draw_Letter_graph(mpg.best_from_dataset, fn_best_dataset)

    # write result summary for each letter.
    if save_results:
        sod_sm_mean = np.mean(sod_sm_list)
        sod_gm_mean = np.mean(sod_gm_list)
        dis_k_sm_mean = np.mean(dis_k_sm_list)
        dis_k_gm_mean = np.mean(dis_k_gm_list)
        dis_k_gi_min_mean = np.mean(dis_k_gi_min_list)
        best_dis_mean = np.mean(best_dis_list)
        time_precompute_gm_mean = np.mean(time_precompute_gm_list)
        time_optimize_ec_mean = np.mean(time_optimize_ec_list)
        time_generate_mean = np.mean(time_generate_list)
        time_total_mean = np.mean(time_total_list)
        itrs_mean = np.mean(itrs_list)
        num_converged = np.sum(converged_list)
        num_updates_ecc_mean = np.mean(num_updates_ecc_list)
        num_mge_decrease_order = np.sum(mge_decrease_order_list)
        num_mge_increase_order = np.sum(mge_increase_order_list)
        num_mge_converged = np.sum(mge_converged_order_list)
        sod_sm2gm_mean = get_relations(np.sign(sod_gm_mean - sod_sm_mean))
        dis_k_sm2gm_mean = get_relations(np.sign(dis_k_gm_mean -
                                                 dis_k_sm_mean))
        dis_k_gi2sm_mean = get_relations(
            np.sign(dis_k_sm_mean - dis_k_gi_min_mean))
        dis_k_gi2gm_mean = get_relations(
            np.sign(dis_k_gm_mean - dis_k_gi_min_mean))
        f_summary = open(dir_save + fn_output_summary, 'a')
        csv.writer(f_summary).writerow([
            ds_name, kernel_options['name'], ged_options['edit_cost'],
            ged_options['method'], ged_options['attr_distance'],
            mpg_options['fit_method'], num_graphs, 'all', sod_sm_mean,
            sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean, dis_k_gi_min_mean,
            best_dis_mean, '-', sod_sm2gm_mean, dis_k_sm2gm_mean,
            dis_k_gi2sm_mean, dis_k_gi2gm_mean, time_precompute_gm_mean,
            time_optimize_ec_mean, time_generate_mean, time_total_mean,
            itrs_mean, num_converged, num_updates_ecc_mean,
            num_mge_decrease_order, num_mge_increase_order, num_mge_converged
        ])
        f_summary.close()

    # save total pairwise kernel distances.
    dis_k_max = np.max(dis_k_max_list)
    dis_k_min = np.min(dis_k_min_list)
    dis_k_mean = np.mean(dis_k_mean_list)
    print('The maximum pairwise distance in kernel space:', dis_k_max)
    print('The minimum pairwise distance in kernel space:', dis_k_min)
    print('The average pairwise distance in kernel space:', dis_k_mean)

    print('\ncomplete.\n')
Exemple #2
0
def generate_median_preimages_by_class(ds_name,
                                       mpg_options,
                                       kernel_options,
                                       ged_options,
                                       mge_options,
                                       save_results=True,
                                       save_medians=True,
                                       plot_medians=True,
                                       load_gm='auto',
                                       dir_save='',
                                       irrelevant_labels=None,
                                       edge_required=False,
                                       cut_range=None):
    import os.path
    from gklearn.preimage import MedianPreimageGenerator
    from gklearn.utils import split_dataset_by_target
    from gklearn.utils.graphfiles import saveGXL

    # 1. get dataset.
    print('1. getting dataset...')
    dataset_all = Dataset()
    dataset_all.load_predefined_dataset(ds_name)
    dataset_all.trim_dataset(edge_required=edge_required)
    if irrelevant_labels is not None:
        dataset_all.remove_labels(**irrelevant_labels)
    if cut_range is not None:
        dataset_all.cut_graphs(cut_range)
    datasets = split_dataset_by_target(dataset_all)

    if save_results:
        # create result files.
        print('creating output files...')
        fn_output_detail, fn_output_summary = _init_output_file_preimage(
            ds_name, kernel_options['name'], mpg_options['fit_method'],
            dir_save)

    sod_sm_list = []
    sod_gm_list = []
    dis_k_sm_list = []
    dis_k_gm_list = []
    dis_k_gi_min_list = []
    time_optimize_ec_list = []
    time_generate_list = []
    time_total_list = []
    itrs_list = []
    converged_list = []
    num_updates_ecc_list = []
    mge_decrease_order_list = []
    mge_increase_order_list = []
    mge_converged_order_list = []
    nb_sod_sm2gm = [0, 0, 0]
    nb_dis_k_sm2gm = [0, 0, 0]
    nb_dis_k_gi2sm = [0, 0, 0]
    nb_dis_k_gi2gm = [0, 0, 0]
    dis_k_max_list = []
    dis_k_min_list = []
    dis_k_mean_list = []
    if load_gm == 'auto':
        gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options[
            'name'] + '.gm.npz'
        gmfile_exist = os.path.isfile(os.path.abspath(gm_fname))
        if gmfile_exist:
            gmfile = np.load(gm_fname,
                             allow_pickle=True)  # @todo: may not be safe.
            gram_matrix_unnorm_list = [
                item for item in gmfile['gram_matrix_unnorm_list']
            ]
            time_precompute_gm_list = gmfile['run_time_list'].tolist()
        else:
            gram_matrix_unnorm_list = []
            time_precompute_gm_list = []
    elif not load_gm:
        gram_matrix_unnorm_list = []
        time_precompute_gm_list = []
    else:
        gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options[
            'name'] + '.gm.npz'
        gmfile = np.load(gm_fname,
                         allow_pickle=True)  # @todo: may not be safe.
        gram_matrix_unnorm_list = [
            item for item in gmfile['gram_matrix_unnorm_list']
        ]
        time_precompute_gm_list = gmfile['run_time_list'].tolist()


#	repeats_better_sod_sm2gm = []
#	repeats_better_dis_k_sm2gm = []
#	repeats_better_dis_k_gi2sm = []
#	repeats_better_dis_k_gi2gm = []

    print('starting generating preimage for each class of target...')
    idx_offset = 0
    for idx, dataset in enumerate(datasets):
        target = dataset.targets[0]
        print('\ntarget =', target, '\n')
        #		if target != 1:
        # 			continue

        num_graphs = len(dataset.graphs)
        if num_graphs < 2:
            print('\nnumber of graphs = ', num_graphs, ', skip.\n')
            idx_offset += 1
            continue

        # 2. set parameters.
        print('2. initializing mpg and setting parameters...')
        if load_gm:
            if gmfile_exist:
                mpg_options['gram_matrix_unnorm'] = gram_matrix_unnorm_list[
                    idx - idx_offset]
                mpg_options['runtime_precompute_gm'] = time_precompute_gm_list[
                    idx - idx_offset]
        mpg = MedianPreimageGenerator()
        mpg.dataset = dataset
        mpg.set_options(**mpg_options.copy())
        mpg.kernel_options = kernel_options.copy()
        mpg.ged_options = ged_options.copy()
        mpg.mge_options = mge_options.copy()

        # 3. compute median preimage.
        print('3. computing median preimage...')
        mpg.run()
        results = mpg.get_results()

        # 4. compute pairwise kernel distances.
        print('4. computing pairwise kernel distances...')
        _, dis_k_max, dis_k_min, dis_k_mean = mpg.graph_kernel.compute_distance_matrix(
        )
        dis_k_max_list.append(dis_k_max)
        dis_k_min_list.append(dis_k_min)
        dis_k_mean_list.append(dis_k_mean)

        # 5. save results (and median graphs).
        print('5. saving results (and median graphs)...')
        # write result detail.
        if save_results:
            print('writing results to files...')
            sod_sm2gm = get_relations(
                np.sign(results['sod_gen_median'] - results['sod_set_median']))
            dis_k_sm2gm = get_relations(
                np.sign(results['k_dis_gen_median'] -
                        results['k_dis_set_median']))
            dis_k_gi2sm = get_relations(
                np.sign(results['k_dis_set_median'] -
                        results['k_dis_dataset']))
            dis_k_gi2gm = get_relations(
                np.sign(results['k_dis_gen_median'] -
                        results['k_dis_dataset']))

            f_detail = open(dir_save + fn_output_detail, 'a')
            csv.writer(f_detail).writerow([
                ds_name,
                kernel_options['name'],
                ged_options['edit_cost'],
                ged_options['method'],
                ged_options['attr_distance'],
                mpg_options['fit_method'],
                num_graphs,
                target,
                1,
                results['sod_set_median'],
                results['sod_gen_median'],
                results['k_dis_set_median'],
                results['k_dis_gen_median'],
                results['k_dis_dataset'],
                sod_sm2gm,
                dis_k_sm2gm,
                dis_k_gi2sm,
                dis_k_gi2gm,
                results['edit_cost_constants'],
                results['runtime_precompute_gm'],
                results['runtime_optimize_ec'],
                results['runtime_generate_preimage'],
                results['runtime_total'],
                results['itrs'],
                results['converged'],
                results['num_updates_ecc'],
                results['mge']['num_decrease_order'] >
                0,  # @todo: not suitable for multi-start mge
                results['mge']['num_increase_order'] > 0,
                results['mge']['num_converged_descents'] > 0
            ])
            f_detail.close()

            # compute result summary.
            sod_sm_list.append(results['sod_set_median'])
            sod_gm_list.append(results['sod_gen_median'])
            dis_k_sm_list.append(results['k_dis_set_median'])
            dis_k_gm_list.append(results['k_dis_gen_median'])
            dis_k_gi_min_list.append(results['k_dis_dataset'])
            time_precompute_gm_list.append(results['runtime_precompute_gm'])
            time_optimize_ec_list.append(results['runtime_optimize_ec'])
            time_generate_list.append(results['runtime_generate_preimage'])
            time_total_list.append(results['runtime_total'])
            itrs_list.append(results['itrs'])
            converged_list.append(results['converged'])
            num_updates_ecc_list.append(results['num_updates_ecc'])
            mge_decrease_order_list.append(
                results['mge']['num_decrease_order'] > 0)
            mge_increase_order_list.append(
                results['mge']['num_increase_order'] > 0)
            mge_converged_order_list.append(
                results['mge']['num_converged_descents'] > 0)
            # # SOD SM -> GM
            if results['sod_set_median'] > results['sod_gen_median']:
                nb_sod_sm2gm[0] += 1
    #			repeats_better_sod_sm2gm.append(1)
            elif results['sod_set_median'] == results['sod_gen_median']:
                nb_sod_sm2gm[1] += 1
            elif results['sod_set_median'] < results['sod_gen_median']:
                nb_sod_sm2gm[2] += 1
            # # dis_k SM -> GM
            if results['k_dis_set_median'] > results['k_dis_gen_median']:
                nb_dis_k_sm2gm[0] += 1
    #			repeats_better_dis_k_sm2gm.append(1)
            elif results['k_dis_set_median'] == results['k_dis_gen_median']:
                nb_dis_k_sm2gm[1] += 1
            elif results['k_dis_set_median'] < results['k_dis_gen_median']:
                nb_dis_k_sm2gm[2] += 1
            # # dis_k gi -> SM
            if results['k_dis_dataset'] > results['k_dis_set_median']:
                nb_dis_k_gi2sm[0] += 1
    #			repeats_better_dis_k_gi2sm.append(1)
            elif results['k_dis_dataset'] == results['k_dis_set_median']:
                nb_dis_k_gi2sm[1] += 1
            elif results['k_dis_dataset'] < results['k_dis_set_median']:
                nb_dis_k_gi2sm[2] += 1
            # # dis_k gi -> GM
            if results['k_dis_dataset'] > results['k_dis_gen_median']:
                nb_dis_k_gi2gm[0] += 1
    #			repeats_better_dis_k_gi2gm.append(1)
            elif results['k_dis_dataset'] == results['k_dis_gen_median']:
                nb_dis_k_gi2gm[1] += 1
            elif results['k_dis_dataset'] < results['k_dis_gen_median']:
                nb_dis_k_gi2gm[2] += 1

            # write result summary for each letter.
            f_summary = open(dir_save + fn_output_summary, 'a')
            csv.writer(f_summary).writerow([
                ds_name,
                kernel_options['name'],
                ged_options['edit_cost'],
                ged_options['method'],
                ged_options['attr_distance'],
                mpg_options['fit_method'],
                num_graphs,
                target,
                results['sod_set_median'],
                results['sod_gen_median'],
                results['k_dis_set_median'],
                results['k_dis_gen_median'],
                results['k_dis_dataset'],
                sod_sm2gm,
                dis_k_sm2gm,
                dis_k_gi2sm,
                dis_k_gi2gm,
                results['runtime_precompute_gm'],
                results['runtime_optimize_ec'],
                results['runtime_generate_preimage'],
                results['runtime_total'],
                results['itrs'],
                results['converged'],
                results['num_updates_ecc'],
                results['mge']['num_decrease_order'] >
                0,  # @todo: not suitable for multi-start mge
                results['mge']['num_increase_order'] > 0,
                results['mge']['num_converged_descents'] > 0,
                nb_sod_sm2gm,
                nb_dis_k_sm2gm,
                nb_dis_k_gi2sm,
                nb_dis_k_gi2gm
            ])
            f_summary.close()

        # save median graphs.
        if save_medians:
            os.makedirs(dir_save + 'medians/', exist_ok=True)
            print('Saving median graphs to files...')
            fn_pre_sm = dir_save + 'medians/set_median.' + mpg_options[
                'fit_method'] + '.nbg' + str(num_graphs) + '.y' + str(
                    target) + '.repeat' + str(1)
            saveGXL(mpg.set_median,
                    fn_pre_sm + '.gxl',
                    method='default',
                    node_labels=dataset.node_labels,
                    edge_labels=dataset.edge_labels,
                    node_attrs=dataset.node_attrs,
                    edge_attrs=dataset.edge_attrs)
            fn_pre_gm = dir_save + 'medians/gen_median.' + mpg_options[
                'fit_method'] + '.nbg' + str(num_graphs) + '.y' + str(
                    target) + '.repeat' + str(1)
            saveGXL(mpg.gen_median,
                    fn_pre_gm + '.gxl',
                    method='default',
                    node_labels=dataset.node_labels,
                    edge_labels=dataset.edge_labels,
                    node_attrs=dataset.node_attrs,
                    edge_attrs=dataset.edge_attrs)
            fn_best_dataset = dir_save + 'medians/g_best_dataset.' + mpg_options[
                'fit_method'] + '.nbg' + str(num_graphs) + '.y' + str(
                    target) + '.repeat' + str(1)
            saveGXL(mpg.best_from_dataset,
                    fn_best_dataset + '.gxl',
                    method='default',
                    node_labels=dataset.node_labels,
                    edge_labels=dataset.edge_labels,
                    node_attrs=dataset.node_attrs,
                    edge_attrs=dataset.edge_attrs)

        # plot median graphs.
        if plot_medians and save_medians:
            if ged_options['edit_cost'] == 'LETTER2' or ged_options[
                    'edit_cost'] == 'LETTER' or ds_name == 'Letter-high' or ds_name == 'Letter-med' or ds_name == 'Letter-low':
                draw_Letter_graph(mpg.set_median, fn_pre_sm)
                draw_Letter_graph(mpg.gen_median, fn_pre_gm)
                draw_Letter_graph(mpg.best_from_dataset, fn_best_dataset)

        if (load_gm == 'auto' and not gmfile_exist) or not load_gm:
            gram_matrix_unnorm_list.append(mpg.gram_matrix_unnorm)

    # write result summary for each class.
    if save_results:
        sod_sm_mean = np.mean(sod_sm_list)
        sod_gm_mean = np.mean(sod_gm_list)
        dis_k_sm_mean = np.mean(dis_k_sm_list)
        dis_k_gm_mean = np.mean(dis_k_gm_list)
        dis_k_gi_min_mean = np.mean(dis_k_gi_min_list)
        time_precompute_gm_mean = np.mean(time_precompute_gm_list)
        time_optimize_ec_mean = np.mean(time_optimize_ec_list)
        time_generate_mean = np.mean(time_generate_list)
        time_total_mean = np.mean(time_total_list)
        itrs_mean = np.mean(itrs_list)
        num_converged = np.sum(converged_list)
        num_updates_ecc_mean = np.mean(num_updates_ecc_list)
        num_mge_decrease_order = np.sum(mge_decrease_order_list)
        num_mge_increase_order = np.sum(mge_increase_order_list)
        num_mge_converged = np.sum(mge_converged_order_list)
        sod_sm2gm_mean = get_relations(np.sign(sod_gm_mean - sod_sm_mean))
        dis_k_sm2gm_mean = get_relations(np.sign(dis_k_gm_mean -
                                                 dis_k_sm_mean))
        dis_k_gi2sm_mean = get_relations(
            np.sign(dis_k_sm_mean - dis_k_gi_min_mean))
        dis_k_gi2gm_mean = get_relations(
            np.sign(dis_k_gm_mean - dis_k_gi_min_mean))
        f_summary = open(dir_save + fn_output_summary, 'a')
        csv.writer(f_summary).writerow([
            ds_name, kernel_options['name'], ged_options['edit_cost'],
            ged_options['method'], ged_options['attr_distance'],
            mpg_options['fit_method'], num_graphs, 'all', sod_sm_mean,
            sod_gm_mean, dis_k_sm_mean, dis_k_gm_mean, dis_k_gi_min_mean,
            sod_sm2gm_mean, dis_k_sm2gm_mean, dis_k_gi2sm_mean,
            dis_k_gi2gm_mean, time_precompute_gm_mean, time_optimize_ec_mean,
            time_generate_mean, time_total_mean, itrs_mean, num_converged,
            num_updates_ecc_mean, num_mge_decrease_order,
            num_mge_increase_order, num_mge_converged
        ])
        f_summary.close()

    # save total pairwise kernel distances.
    dis_k_max = np.max(dis_k_max_list)
    dis_k_min = np.min(dis_k_min_list)
    dis_k_mean = np.mean(dis_k_mean_list)
    print('The maximum pairwise distance in kernel space:', dis_k_max)
    print('The minimum pairwise distance in kernel space:', dis_k_min)
    print('The average pairwise distance in kernel space:', dis_k_mean)

    # write Gram matrices to file.
    if (load_gm == 'auto' and not gmfile_exist) or not load_gm:
        np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' +
                 kernel_options['name'] + '.gm',
                 gram_matrix_unnorm_list=gram_matrix_unnorm_list,
                 run_time_list=time_precompute_gm_list)

    print('\ncomplete.\n')
Exemple #3
0
def generate_random_preimages_by_class(ds_name,
                                       rpg_options,
                                       kernel_options,
                                       save_results=True,
                                       save_preimages=True,
                                       load_gm='auto',
                                       dir_save='',
                                       irrelevant_labels=None,
                                       edge_required=False,
                                       cut_range=None):
    # 1. get dataset.
    print('1. getting dataset...')
    dataset_all = Dataset()
    dataset_all.load_predefined_dataset(ds_name)
    dataset_all.trim_dataset(edge_required=edge_required)
    if irrelevant_labels is not None:
        dataset_all.remove_labels(**irrelevant_labels)
    if cut_range is not None:
        dataset_all.cut_graphs(cut_range)
    datasets = split_dataset_by_target(dataset_all)

    if save_results:
        # create result files.
        print('creating output files...')
        fn_output_detail, fn_output_summary = _init_output_file_preimage(
            ds_name, kernel_options['name'], dir_save)

    dis_k_dataset_list = []
    dis_k_preimage_list = []
    time_precompute_gm_list = []
    time_generate_list = []
    time_total_list = []
    itrs_list = []
    num_updates_list = []
    if load_gm == 'auto':
        gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options[
            'name'] + '.gm.npz'
        gmfile_exist = os.path.isfile(os.path.abspath(gm_fname))
        if gmfile_exist:
            gmfile = np.load(gm_fname,
                             allow_pickle=True)  # @todo: may not be safe.
            gram_matrix_unnorm_list = [
                item for item in gmfile['gram_matrix_unnorm_list']
            ]
            time_precompute_gm_list = gmfile['run_time_list'].tolist()
        else:
            gram_matrix_unnorm_list = []
            time_precompute_gm_list = []
    elif not load_gm:
        gram_matrix_unnorm_list = []
        time_precompute_gm_list = []
    else:
        gm_fname = dir_save + 'gram_matrix_unnorm.' + ds_name + '.' + kernel_options[
            'name'] + '.gm.npz'
        gmfile = np.load(gm_fname,
                         allow_pickle=True)  # @todo: may not be safe.
        gram_matrix_unnorm_list = [
            item for item in gmfile['gram_matrix_unnorm_list']
        ]
        time_precompute_gm_list = gmfile['run_time_list'].tolist()

    print('starting generating preimage for each class of target...')
    idx_offset = 0
    for idx, dataset in enumerate(datasets):
        target = dataset.targets[0]
        print('\ntarget =', target, '\n')
        #		if target != 1:
        # 			continue

        num_graphs = len(dataset.graphs)
        if num_graphs < 2:
            print('\nnumber of graphs = ', num_graphs, ', skip.\n')
            idx_offset += 1
            continue

        # 2. set parameters.
        print('2. initializing mpg and setting parameters...')
        if load_gm:
            if gmfile_exist:
                rpg_options['gram_matrix_unnorm'] = gram_matrix_unnorm_list[
                    idx - idx_offset]
                rpg_options['runtime_precompute_gm'] = time_precompute_gm_list[
                    idx - idx_offset]
        rpg = RandomPreimageGenerator()
        rpg.dataset = dataset
        rpg.set_options(**rpg_options.copy())
        rpg.kernel_options = kernel_options.copy()

        # 3. compute preimage.
        print('3. computing preimage...')
        rpg.run()
        results = rpg.get_results()

        # 4. save results (and median graphs).
        print('4. saving results (and preimages)...')
        # write result detail.
        if save_results:
            print('writing results to files...')

            f_detail = open(dir_save + fn_output_detail, 'a')
            csv.writer(f_detail).writerow([
                ds_name, kernel_options['name'], num_graphs, target, 1,
                results['k_dis_dataset'], results['k_dis_preimage'],
                results['runtime_precompute_gm'],
                results['runtime_generate_preimage'], results['runtime_total'],
                results['itrs'], results['num_updates']
            ])
            f_detail.close()

            # compute result summary.
            dis_k_dataset_list.append(results['k_dis_dataset'])
            dis_k_preimage_list.append(results['k_dis_preimage'])
            time_precompute_gm_list.append(results['runtime_precompute_gm'])
            time_generate_list.append(results['runtime_generate_preimage'])
            time_total_list.append(results['runtime_total'])
            itrs_list.append(results['itrs'])
            num_updates_list.append(results['num_updates'])

            # write result summary for each letter.
            f_summary = open(dir_save + fn_output_summary, 'a')
            csv.writer(f_summary).writerow([
                ds_name, kernel_options['name'], num_graphs, target,
                results['k_dis_dataset'], results['k_dis_preimage'],
                results['runtime_precompute_gm'],
                results['runtime_generate_preimage'], results['runtime_total'],
                results['itrs'], results['num_updates']
            ])
            f_summary.close()

        # save median graphs.
        if save_preimages:
            os.makedirs(dir_save + 'preimages/', exist_ok=True)
            print('Saving preimages to files...')
            fn_best_dataset = dir_save + 'preimages/g_best_dataset.' + 'nbg' + str(
                num_graphs) + '.y' + str(target) + '.repeat' + str(1)
            saveGXL(rpg.best_from_dataset,
                    fn_best_dataset + '.gxl',
                    method='default',
                    node_labels=dataset.node_labels,
                    edge_labels=dataset.edge_labels,
                    node_attrs=dataset.node_attrs,
                    edge_attrs=dataset.edge_attrs)
            fn_preimage = dir_save + 'preimages/g_preimage.' + 'nbg' + str(
                num_graphs) + '.y' + str(target) + '.repeat' + str(1)
            saveGXL(rpg.preimage,
                    fn_preimage + '.gxl',
                    method='default',
                    node_labels=dataset.node_labels,
                    edge_labels=dataset.edge_labels,
                    node_attrs=dataset.node_attrs,
                    edge_attrs=dataset.edge_attrs)

        if (load_gm == 'auto' and not gmfile_exist) or not load_gm:
            gram_matrix_unnorm_list.append(rpg.gram_matrix_unnorm)

    # write result summary for each class.
    if save_results:
        dis_k_dataset_mean = np.mean(dis_k_dataset_list)
        dis_k_preimage_mean = np.mean(dis_k_preimage_list)
        time_precompute_gm_mean = np.mean(time_precompute_gm_list)
        time_generate_mean = np.mean(time_generate_list)
        time_total_mean = np.mean(time_total_list)
        itrs_mean = np.mean(itrs_list)
        num_updates_mean = np.mean(num_updates_list)
        f_summary = open(dir_save + fn_output_summary, 'a')
        csv.writer(f_summary).writerow([
            ds_name, kernel_options['name'], num_graphs, 'all',
            dis_k_dataset_mean, dis_k_preimage_mean, time_precompute_gm_mean,
            time_generate_mean, time_total_mean, itrs_mean, num_updates_mean
        ])
        f_summary.close()

    # write Gram matrices to file.
    if (load_gm == 'auto' and not gmfile_exist) or not load_gm:
        np.savez(dir_save + 'gram_matrix_unnorm.' + ds_name + '.' +
                 kernel_options['name'] + '.gm',
                 gram_matrix_unnorm_list=gram_matrix_unnorm_list,
                 run_time_list=time_precompute_gm_list)

    print('\ncomplete.\n')