import featuresList from features_calculator import featuresCalculator from learning.TagsLoader import TagsLoader import main_manager as mm import numpy as np currentDirectory = str(os.getcwd()) if __name__ == "__main__": np.random.seed(123) #step 1: features calculate wdir = os.getcwd() file_in = str(wdir) + r'/../data/directed/DIP/input/DIP.txt' output_dir = str(wdir) + r'/../data/directed/DIP/features/take_connected' calculator = featuresCalculator() features_list = featuresList.featuresList( directed=True, analysisType='nodes').getFeatures() features_list.remove('kcore') features_list.remove('flow') features_list.remove('ab') features_list.remove('motif4') result = calculator.calculateFeatures(features_list, file_in, output_dir, directed=True, analysisType='nodes') # step 2: learning phase all_MF_classifications = [ '0005326', '0005261', '0019215', '0004016', '0001076', '0019911',
def calc_by_train_size(graph_name, test_sizes, f_output, random_state, load, deep): output_result_dir = {} for test_size in test_sizes: output_result_dir[ test_size] = './../data/result_social_sign/' + graph_name + '/' + str( test_size) + '/' if (not os.path.exists(output_result_dir[test_size])): os.mkdir(output_result_dir[test_size]) if not load: wdir = os.getcwd() file_in = str( wdir ) + r'/../data/directed/social_sign/' + graph_name + '/input/' + graph_name + '.txt' classification_result = [graph_name + '-tags'] directory_tags_path = str( wdir) + r'/../data/directed/social_sign/' + graph_name + '/tags/' # file_in = str(wdir) + r'/../data/directed/social_sign/epinions/input/epinions.txt' # classification_wiki_result = ['epinions-tags'] # directory_tags_path = str(wdir) + r'/../data/directed/social_sign/epinions/tags/' # file_in = str(wdir) + r'/../data/roi-graph.txt' # classification_wiki_result = ['roi-tags'] # directory_tags_path = str(wdir) + r'/../data/' print(' start reload graph') # [ggt, gnx] = initGraph.init_graph(draw = False); gnx = initGraph.init_graph(draw=False, file_name=file_in, directed=True, Connected=True) print(' finish reload graph') calculator = featuresCalculator() features_list = featuresList.featuresList(True, 'edges').getFeatures() features_list.remove('flow') features_list.remove('ab') features_list.remove('hierarchy_energy') features_list.remove('edge_flow') features_list.remove('edge_betweenness') features_list.remove('motif4') if (graph_name in ['epinions']): features_list.remove('kcore') print(features_list) # output_dir = str(wdir) + r'/../data/directed/social_sign/epinions/features' output_dir = str( wdir ) + r'/../data/directed/social_sign/' + graph_name + '/features' # output_dir = str(wdir) + r'/../data/' result = calculator.calculateFeatures(features_list, file_in, output_dir, True, 'edges', parallel=False) tagsLoader = TagsLoader(directory_tags_path, classification_result) tagsLoader.load_edges() # tags = tagsLoader.calssification_to_edge_to_tag['epinions-tags'] tags = tagsLoader.calssification_to_edge_to_tag[ classification_result[0]] # tags = tagsLoader.calssification_to_edge_to_tag['roi-tags'] print(tagsLoader.calssification_to_edge_to_tag.keys()) remove_sign_zero(gnx, tags) edges = gnx.edges() print(len(edges)) print(len(gnx.nodes())) print(len(tags)) s_e = set(edges) s_t = set(tags.keys()) print(s_e.difference(s_t)) print(s_t.difference(s_e)) new_tags = [] for e in edges: new_tags.append(tags[e]) # random_state = random.randint(0,len(edges)) # random_state = 2 X_train = {} X_test = {} Y_train = {} Y_test = {} for test_size in test_sizes: x_train, x_test, y_train, y_test = train_test_split( edges, new_tags, test_size=test_size, random_state=random_state) X_train[test_size] = x_train X_test[test_size] = x_test Y_train[test_size] = y_train Y_test[test_size] = y_test # print X_train # print X_test # print result[1] train_features, test_features = calc_local_sign_features( gnx, X_train, tags, result[1]) # print train_features # print test_features # for test_size in train_features.keys(): with open( output_result_dir[test_size] + 'train_features_' + graph_name + '.dump', 'wb') as f: pickle.dump(train_features[test_size], f) with open( output_result_dir[test_size] + 'test_features_' + graph_name + '.dump', 'wb') as f: pickle.dump(test_features[test_size], f) # else: for test_size in test_sizes: print(output_result_dir[test_size] + '/train_features_' + graph_name + '.dump') with open( output_result_dir[test_size] + '/train_features_' + graph_name + '.dump', 'rb') as f: train_features_specific = pickle.load(f) with open( output_result_dir[test_size] + '/test_features_' + graph_name + '.dump', 'rb') as f: test_features_specific = pickle.load(f) print(test_size) print(graph_name) f_output.writelines(str(test_size) + '\n') # train_features_specific = train_features[test_size] # test_features_specific = test_features[test_size] print('local') f_output.writelines('local\n') perform_learning(train_features_specific, test_features_specific, f_output, local=True, gglobal=False, deep=deep) print('global') f_output.writelines('global\n') perform_learning(train_features_specific, test_features_specific, f_output, local=False, gglobal=True, deep=deep) print('Both') f_output.writelines('Both\n') perform_learning(train_features_specific, test_features_specific, f_output, local=True, gglobal=True, deep=deep) f_output.flush()