from features_calculator import featuresCalculator from learning.TagsLoader import TagsLoader import main_manager as mm import numpy as np currentDirectory = str(os.getcwd()) if __name__ == "__main__": np.random.seed(123) #step 1: features calculate wdir = os.getcwd() file_in = str(wdir) + r'/../data/directed/DIP/input/DIP.txt' output_dir = str(wdir) + r'/../data/directed/DIP/features/take_connected' calculator = featuresCalculator() features_list = featuresList.featuresList( directed=True, analysisType='nodes').getFeatures() features_list.remove('kcore') features_list.remove('flow') features_list.remove('ab') features_list.remove('motif4') result = calculator.calculateFeatures(features_list, file_in, output_dir, directed=True, analysisType='nodes') # step 2: learning phase all_MF_classifications = [ '0005326', '0005261', '0019215', '0004016', '0001076', '0019911', '0003913', '0016866', '0004812', '0005544', '0003712', '0000166', '0016740', '0004715', '0016746', '0004896', '0005319', '0004383',
from learning.TagsLoader import TagsLoader import main_manager as mm currentDirectory = str(os.getcwd()) if __name__ == "__main__": #step 1: features calculate wdir = os.getcwd() file_in = str( wdir) + r'/../data/directed/social_sign/slashdot/input/slashdot.txt' output_dir = str(wdir) + r'/../data/directed/social_sign/slashdot/features' calculator = featuresCalculator() features_list = featuresList.featuresList(True, 'edges').getFeatures() features_list.remove('motif4') features_list.remove('flow') features_list.remove('ab') features_list.remove('hierarchy_energy') features_list.remove('edge_flow') features_list.remove('edge_betweenness') print(features_list) result = calculator.calculateFeatures(features_list, file_in, output_dir, True, 'edges', parallel=False) print(result[1].keys()) print(len(result[1]))
if __name__ == "__main__": wdir = os.getcwd() for i in range(1,2): if(i<10): snap = '000'+str(i) else: snap = '00' + str(i) print (snap) # snap = '0001' file_in = str(wdir) + r'/../data/directed/live_journal/'+snap+r'/input/graph.txt' output_dir = str(wdir) + r'/../data/directed/live_journal/'+snap+r'/features' calculator = featuresCalculator() features_list = featuresList.featuresList(True, 'nodes').getFeatures() features_list.remove('flow') features_list.remove('ab') features_list.remove('eccentricity') features_list.remove('load_centrality') features_list.remove('hierarchy_energy') result = calculator.calculateFeatures(features_list, file_in, output_dir, True, 'nodes') motif_path = str(wdir) + r'/../graph-fetures/algo/motifVariations' result = features.calc_fetures_vertices(file_in, motif_path, output_dir, directed=True, takeConnected=True, fetures_list=directed_lj, return_map=True) print (result[1].keys()) doi = ['summer' , 'the beatles' , 'cars'
def calc_by_train_size(graph_name, test_sizes, f_output, random_state, load, deep): output_result_dir = {} for test_size in test_sizes: output_result_dir[ test_size] = './../data/result_social_sign/' + graph_name + '/' + str( test_size) + '/' if (not os.path.exists(output_result_dir[test_size])): os.mkdir(output_result_dir[test_size]) if not load: wdir = os.getcwd() file_in = str( wdir ) + r'/../data/directed/social_sign/' + graph_name + '/input/' + graph_name + '.txt' classification_result = [graph_name + '-tags'] directory_tags_path = str( wdir) + r'/../data/directed/social_sign/' + graph_name + '/tags/' # file_in = str(wdir) + r'/../data/directed/social_sign/epinions/input/epinions.txt' # classification_wiki_result = ['epinions-tags'] # directory_tags_path = str(wdir) + r'/../data/directed/social_sign/epinions/tags/' # file_in = str(wdir) + r'/../data/roi-graph.txt' # classification_wiki_result = ['roi-tags'] # directory_tags_path = str(wdir) + r'/../data/' print(' start reload graph') # [ggt, gnx] = initGraph.init_graph(draw = False); gnx = initGraph.init_graph(draw=False, file_name=file_in, directed=True, Connected=True) print(' finish reload graph') calculator = featuresCalculator() features_list = featuresList.featuresList(True, 'edges').getFeatures() features_list.remove('flow') features_list.remove('ab') features_list.remove('hierarchy_energy') features_list.remove('edge_flow') features_list.remove('edge_betweenness') features_list.remove('motif4') if (graph_name in ['epinions']): features_list.remove('kcore') print(features_list) # output_dir = str(wdir) + r'/../data/directed/social_sign/epinions/features' output_dir = str( wdir ) + r'/../data/directed/social_sign/' + graph_name + '/features' # output_dir = str(wdir) + r'/../data/' result = calculator.calculateFeatures(features_list, file_in, output_dir, True, 'edges', parallel=False) tagsLoader = TagsLoader(directory_tags_path, classification_result) tagsLoader.load_edges() # tags = tagsLoader.calssification_to_edge_to_tag['epinions-tags'] tags = tagsLoader.calssification_to_edge_to_tag[ classification_result[0]] # tags = tagsLoader.calssification_to_edge_to_tag['roi-tags'] print(tagsLoader.calssification_to_edge_to_tag.keys()) remove_sign_zero(gnx, tags) edges = gnx.edges() print(len(edges)) print(len(gnx.nodes())) print(len(tags)) s_e = set(edges) s_t = set(tags.keys()) print(s_e.difference(s_t)) print(s_t.difference(s_e)) new_tags = [] for e in edges: new_tags.append(tags[e]) # random_state = random.randint(0,len(edges)) # random_state = 2 X_train = {} X_test = {} Y_train = {} Y_test = {} for test_size in test_sizes: x_train, x_test, y_train, y_test = train_test_split( edges, new_tags, test_size=test_size, random_state=random_state) X_train[test_size] = x_train X_test[test_size] = x_test Y_train[test_size] = y_train Y_test[test_size] = y_test # print X_train # print X_test # print result[1] train_features, test_features = calc_local_sign_features( gnx, X_train, tags, result[1]) # print train_features # print test_features # for test_size in train_features.keys(): with open( output_result_dir[test_size] + 'train_features_' + graph_name + '.dump', 'wb') as f: pickle.dump(train_features[test_size], f) with open( output_result_dir[test_size] + 'test_features_' + graph_name + '.dump', 'wb') as f: pickle.dump(test_features[test_size], f) # else: for test_size in test_sizes: print(output_result_dir[test_size] + '/train_features_' + graph_name + '.dump') with open( output_result_dir[test_size] + '/train_features_' + graph_name + '.dump', 'rb') as f: train_features_specific = pickle.load(f) with open( output_result_dir[test_size] + '/test_features_' + graph_name + '.dump', 'rb') as f: test_features_specific = pickle.load(f) print(test_size) print(graph_name) f_output.writelines(str(test_size) + '\n') # train_features_specific = train_features[test_size] # test_features_specific = test_features[test_size] print('local') f_output.writelines('local\n') perform_learning(train_features_specific, test_features_specific, f_output, local=True, gglobal=False, deep=deep) print('global') f_output.writelines('global\n') perform_learning(train_features_specific, test_features_specific, f_output, local=False, gglobal=True, deep=deep) print('Both') f_output.writelines('Both\n') perform_learning(train_features_specific, test_features_specific, f_output, local=True, gglobal=True, deep=deep) f_output.flush()