예제 #1
0
import featuresList
from features_calculator import featuresCalculator
from learning.TagsLoader import TagsLoader
import main_manager as mm
import numpy as np
currentDirectory = str(os.getcwd())

if __name__ == "__main__":
    np.random.seed(123)
    #step 1: features calculate
    wdir = os.getcwd()
    file_in = str(wdir) + r'/../data/directed/DIP/input/DIP.txt'

    output_dir = str(wdir) + r'/../data/directed/DIP/features/take_connected'

    calculator = featuresCalculator()
    features_list = featuresList.featuresList(
        directed=True, analysisType='nodes').getFeatures()
    features_list.remove('kcore')
    features_list.remove('flow')
    features_list.remove('ab')
    features_list.remove('motif4')
    result = calculator.calculateFeatures(features_list,
                                          file_in,
                                          output_dir,
                                          directed=True,
                                          analysisType='nodes')

    # step 2: learning phase
    all_MF_classifications = [
        '0005326', '0005261', '0019215', '0004016', '0001076', '0019911',
def calc_by_train_size(graph_name, test_sizes, f_output, random_state, load,
                       deep):
    output_result_dir = {}
    for test_size in test_sizes:
        output_result_dir[
            test_size] = './../data/result_social_sign/' + graph_name + '/' + str(
                test_size) + '/'
        if (not os.path.exists(output_result_dir[test_size])):
            os.mkdir(output_result_dir[test_size])
    if not load:
        wdir = os.getcwd()
        file_in = str(
            wdir
        ) + r'/../data/directed/social_sign/' + graph_name + '/input/' + graph_name + '.txt'
        classification_result = [graph_name + '-tags']
        directory_tags_path = str(
            wdir) + r'/../data/directed/social_sign/' + graph_name + '/tags/'

        # file_in = str(wdir) + r'/../data/directed/social_sign/epinions/input/epinions.txt'
        # classification_wiki_result = ['epinions-tags']
        # directory_tags_path = str(wdir) + r'/../data/directed/social_sign/epinions/tags/'

        # file_in = str(wdir) + r'/../data/roi-graph.txt'
        # classification_wiki_result = ['roi-tags']
        # directory_tags_path = str(wdir) + r'/../data/'

        print(' start reload graph')
        # [ggt,   gnx] = initGraph.init_graph(draw = False);
        gnx = initGraph.init_graph(draw=False,
                                   file_name=file_in,
                                   directed=True,
                                   Connected=True)
        print(' finish reload graph')

        calculator = featuresCalculator()
        features_list = featuresList.featuresList(True, 'edges').getFeatures()
        features_list.remove('flow')
        features_list.remove('ab')
        features_list.remove('hierarchy_energy')
        features_list.remove('edge_flow')
        features_list.remove('edge_betweenness')
        features_list.remove('motif4')
        if (graph_name in ['epinions']):
            features_list.remove('kcore')

        print(features_list)
        # output_dir = str(wdir) + r'/../data/directed/social_sign/epinions/features'
        output_dir = str(
            wdir
        ) + r'/../data/directed/social_sign/' + graph_name + '/features'
        # output_dir = str(wdir) + r'/../data/'
        result = calculator.calculateFeatures(features_list,
                                              file_in,
                                              output_dir,
                                              True,
                                              'edges',
                                              parallel=False)

        tagsLoader = TagsLoader(directory_tags_path, classification_result)
        tagsLoader.load_edges()
        # tags = tagsLoader.calssification_to_edge_to_tag['epinions-tags']
        tags = tagsLoader.calssification_to_edge_to_tag[
            classification_result[0]]
        # tags = tagsLoader.calssification_to_edge_to_tag['roi-tags']
        print(tagsLoader.calssification_to_edge_to_tag.keys())

        remove_sign_zero(gnx, tags)
        edges = gnx.edges()
        print(len(edges))
        print(len(gnx.nodes()))
        print(len(tags))
        s_e = set(edges)
        s_t = set(tags.keys())
        print(s_e.difference(s_t))
        print(s_t.difference(s_e))
        new_tags = []
        for e in edges:
            new_tags.append(tags[e])

        # random_state = random.randint(0,len(edges))
        # random_state = 2
        X_train = {}
        X_test = {}
        Y_train = {}
        Y_test = {}
        for test_size in test_sizes:
            x_train, x_test, y_train, y_test = train_test_split(
                edges,
                new_tags,
                test_size=test_size,
                random_state=random_state)

            X_train[test_size] = x_train
            X_test[test_size] = x_test
            Y_train[test_size] = y_train
            Y_test[test_size] = y_test

        # print X_train
        # print X_test
        # print result[1]
        train_features, test_features = calc_local_sign_features(
            gnx, X_train, tags, result[1])
        # print train_features
        # print test_features
        #
        for test_size in train_features.keys():
            with open(
                    output_result_dir[test_size] + 'train_features_' +
                    graph_name + '.dump', 'wb') as f:
                pickle.dump(train_features[test_size], f)
            with open(
                    output_result_dir[test_size] + 'test_features_' +
                    graph_name + '.dump', 'wb') as f:
                pickle.dump(test_features[test_size], f)
        #
    else:
        for test_size in test_sizes:
            print(output_result_dir[test_size] + '/train_features_' +
                  graph_name + '.dump')
            with open(
                    output_result_dir[test_size] + '/train_features_' +
                    graph_name + '.dump', 'rb') as f:
                train_features_specific = pickle.load(f)
            with open(
                    output_result_dir[test_size] + '/test_features_' +
                    graph_name + '.dump', 'rb') as f:
                test_features_specific = pickle.load(f)

            print(test_size)
            print(graph_name)
            f_output.writelines(str(test_size) + '\n')
            # train_features_specific = train_features[test_size]
            # test_features_specific = test_features[test_size]
            print('local')
            f_output.writelines('local\n')
            perform_learning(train_features_specific,
                             test_features_specific,
                             f_output,
                             local=True,
                             gglobal=False,
                             deep=deep)
            print('global')
            f_output.writelines('global\n')
            perform_learning(train_features_specific,
                             test_features_specific,
                             f_output,
                             local=False,
                             gglobal=True,
                             deep=deep)
            print('Both')
            f_output.writelines('Both\n')
            perform_learning(train_features_specific,
                             test_features_specific,
                             f_output,
                             local=True,
                             gglobal=True,
                             deep=deep)
            f_output.flush()