lr = 0.001 max_epoch = 200 # it takes a long epochs to converge, probably more than 2000 x_size = nfeature hidden_size = intermediate_size = 32 num_attention_heads = 2 num_hidden_layers = 2 y_size = nclass graph_size = ngraph residual_type = 'graph_raw' # -------------------------- print('************ Start ************') print('GrapBert, dataset: ' + dataset_name + ', Pre-training, Node Attribute Reconstruction.') # ---- objection initialization setction --------------- data_obj = DatasetLoader() data_obj.dataset_source_folder_path = './data/' + dataset_name + '/' data_obj.dataset_name = dataset_name data_obj.k = k data_obj.load_all_tag = True bert_config = GraphBertConfig(residual_type = residual_type, k=k, x_size=nfeature, y_size=y_size, hidden_size=hidden_size, intermediate_size=intermediate_size, num_attention_heads=num_attention_heads, num_hidden_layers=num_hidden_layers) method_obj = MethodGraphBertNodeConstruct(bert_config) method_obj.max_epoch = max_epoch method_obj.lr = lr method_obj.save_pretrained_path = './result/PreTrained_GraphBert/' + dataset_name + '/node_reconstruct_model/' result_obj = ResultSaving() result_obj.result_destination_folder_path = './result/GraphBert/' result_obj.result_destination_file_name = dataset_name + '_' + str(k) + '_node_reconstruction'
max_graph_size = 40 elif dataset_name == 'PTC': max_graph_size = 120 elif dataset_name == 'NCI1': max_graph_size = 120 elif dataset_name == 'PROTEINS': max_graph_size = 620 elif dataset_name == 'COLLAB': max_graph_size = 500 #---- Step 1: Load Raw Graphs for Train/Test Partition ---- if 1: print('************ Start ************') print('Preprocessing dataset: ' + dataset_name) # ---- objection initialization setction --------------- data_obj = DatasetLoader() data_obj.dataset_source_folder_path = './data/' + dataset_name + '/' data_obj.dataset_name = dataset_name data_obj.load_type = 'Raw' method_obj = MethodProcessRaw() result_obj = ResultSaving() result_obj.result_destination_folder_path = './result/Preprocess/' result_obj.result_destination_file_name = dataset_name setting_obj = Settings() evaluate_obj = None # ------------------------------------------------------
ngraph = 2708 elif dataset_name == 'citeseer': nclass = 6 nfeature = 3703 ngraph = 3312 elif dataset_name == 'pubmed': nclass = 3 nfeature = 500 ngraph = 19717 #---- Step 1: WL based graph coloring ---- if 1: print('************ Start ************') print('WL, dataset: ' + dataset_name) # ---- objection initialization setction --------------- data_obj = DatasetLoader() data_obj.dataset_source_folder_path = './data/' + dataset_name + '/' data_obj.dataset_name = dataset_name method_obj = MethodWLNodeColoring() result_obj = ResultSaving() result_obj.result_destination_folder_path = './result/WL/' result_obj.result_destination_file_name = dataset_name setting_obj = Settings() evaluate_obj = None # ------------------------------------------------------ # ---- running section ---------------------------------
x_size = nfeature hidden_size = intermediate_size = 32 num_attention_heads = 2 num_hidden_layers = 2 y_size = nclass graph_size = ngraph residual_type = 'none' # -------------------------- print('************ Start ************') print('GrapBert, dataset: ' + dataset_name + ', residual: ' + residual_type + ', k: ' + str(k) + ', hidden dimension: ' + str(hidden_size) + ', hidden layer: ' + str(num_hidden_layers) + ', attention head: ' + str(num_attention_heads)) # ---- objection initialization setction --------------- data_obj = DatasetLoader() data_obj.dataset_source_folder_path = './result/Padding/' data_obj.dataset_source_file_name = dataset_name data_obj.k = k bert_config = GraphBertConfig(residual_type=residual_type, k=k, x_size=nfeature, y_size=y_size, hidden_size=hidden_size, intermediate_size=intermediate_size, num_attention_heads=num_attention_heads, num_hidden_layers=num_hidden_layers) method_obj = MethodGraphBertNodeClassification(bert_config) #---- set to false to run faster ---- method_obj.spy_tag = True
from code.DatasetLoader import DatasetLoader import numpy as np import scipy.sparse as sp ''' To learn how data is loaded and passed to WL. Also, how WL process the data and returns the output. ''' if 0: adj = np.random.randint(2, size=(5, 5)) print(adj) adj = sp.coo_matrix(adj) print(adj) _adj_T = adj.T.multiply(adj.T > adj) _adj = adj.multiply(adj.T > adj) print(_adj_T.toarray()) print(_adj.toarray()) adj2 = adj + _adj_T - _adj print(adj2.toarray()) assert False dataset_name = 'cora' data_obj = DatasetLoader() data_obj.dataset_source_folder_path = './data/' + dataset_name + '/' data_obj.dataset_name = dataset_name data_obj.load()