def test_create_community(): num_surfaces = 18 num_points = 400 num_perm = 3 #types=['caveman_2','caveman_4'] graphs_create = create_graphs.create(create_graphs.Graph_Args('caveman_4')) np.random.shuffle(graphs_create) feature_graphs, pos, graphs = generate_feature_list( graphs_create, num_perm) feature_graphs[:], pos[:], graphs[:] = shuffle_list( list(feature_graphs), list(pos), list(graphs)) #graphs = np.asarray(graphs, dtype=np.float32) for i in range(len(graphs)): graphs[i] = nx.to_numpy_matrix(graphs[i]) graphs = np.array(graphs, dtype=np.float32) feature_graphs = np.array(feature_graphs, dtype=np.float32) print("graphs[0].shape: ", graphs[0].shape) print("feature_graphs[0].shape: ", feature_graphs[0].shape) counter = 0 draw_graph(G_arr=graphs[0:40], row=2, col=2, pos=feature_graphs[0:40], fname='comm/comm_' + str(counter))
def get_loaders(args): graphs = create_graphs.create(args) # split datasets random.seed(123) shuffle(graphs) graphs_len = len(graphs) graphs_test = graphs[int(0.8 * graphs_len):] graphs_train = graphs[0:int(0.8 * graphs_len)] graphs_validate = graphs[0:int(0.2 * graphs_len)] graph_validate_len = 0 for graph in graphs_validate: graph_validate_len += graph.number_of_nodes() graph_validate_len /= len(graphs_validate) print('graph_validate_len', graph_validate_len) graph_test_len = 0 for graph in graphs_test: graph_test_len += graph.number_of_nodes() graph_test_len /= len(graphs_test) print('graph_test_len', graph_test_len) args.max_num_node = max( [graphs[i].number_of_nodes() for i in range(len(graphs))]) max_num_edge = max( [graphs[i].number_of_edges() for i in range(len(graphs))]) min_num_edge = min( [graphs[i].number_of_edges() for i in range(len(graphs))]) # show graphs statistics print('total graph num: {}, training set: {}'.format( len(graphs), len(graphs_train))) print('max number node: {}'.format(args.max_num_node)) print('max/min number edge: {}; {}'.format(max_num_edge, min_num_edge)) print('max previous node: {}'.format(args.max_prev_node)) # save ground truth graphs ## To get train and test set, after loading you need to manually slice save_graph_list(graphs, args.result_dir + args.fname_train + '0.dat') save_graph_list(graphs, args.result_dir + args.fname_test + '0.dat') print('train and test graphs saved at: ', args.result_dir + args.fname_test + '0.dat') ### dataset initialization train_dataset = DualGraph_sampler_flow(graphs_train, max_num_node=args.max_num_node) test_dataset = DualGraph_sampler_flow(graphs_test, max_num_node=args.max_num_node) aaa = train_dataset.__getitem__(1) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.num_workers) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers) return train_loader, test_loader
def __init__(self, type_dataset='caveman_small', proportion=(0.8, 0.2), proportion_edge=[.8, .2], num_perm=10): graph_args = create_graphs.Graph_Args(type=type_dataset) graphs_create = create_graphs.create(graph_args) np.random.shuffle(graphs_create) self.num_nodes = graphs_create[0].number_of_nodes() self.num_edges = graphs_create[0].number_of_edges() feature_graphs, pos, graphs = generate_feature_list( graphs_create, num_perm) self.num_graphs = len(graphs) feature_graphs[:], pos[:], graphs[:] = shuffle_list( list(feature_graphs), list(pos), list(graphs)) #feature_graphs = np.array(feature_graphs) #X_features = feature_graphs.reshape(-1,feature_graphs.shape[-1]) #std_scale = preprocessing.StandardScaler().fit(X_features) #X_std = std_scale.transform(X_features) #feature_graphs = X_std.reshape(feature_graphs.shape[0],feature_graphs.shape[1],feature_graphs.shape[2]) #another, identity, full input_graphs = self.generate_input_graphs('identity', self.num_graphs, self.num_nodes, proportion=proportion_edge) #('identity', self.num_graphs, self.num_nodes) self.num_features = feature_graphs[0].shape[-1] self.graphs_test = graphs[int(proportion[0] * self.num_graphs):] #0.8 #save_graph_list(self.graphs_test, 'gt.dat') # for i in range(self.num_graphs): # graphs[i] = nx.to_numpy_matrix(graphs[i]) n_training = 0.96 #0.8 n_eval = 0.02 #0.1 n_test = 0.02 #0.1 graphs_test = graphs[int(self.num_graphs * n_training) + int(self.num_graphs * n_eval):] #0.2 graphs_train = graphs[0:int(self.num_graphs * n_training)] #0.8 graphs_validate = graphs[int(self.num_graphs * n_training):int(self.num_graphs * n_training) + int(self.num_graphs * n_eval)] #0.2 feature_test = feature_graphs[int(self.num_graphs * n_training) + int(self.num_graphs * n_eval):] #0.2 feature_train = feature_graphs[0:int(self.num_graphs * n_training)] #0.8 feature_validate = feature_graphs[int(self.num_graphs * n_training):int(self.num_graphs * n_training) + int(self.num_graphs * n_eval)] #0.2 input_graph_test = input_graphs[int(self.num_graphs * n_training) + int(self.num_graphs * n_eval):] #0.2 input_graph_train = input_graphs[0:int(self.num_graphs * n_training)] #0.8 input_graph_validate = input_graphs[ int(self.num_graphs * n_training):int(self.num_graphs * n_training) + int(self.num_graphs * n_eval)] #0.2 self.pos_test = pos[int(self.num_graphs * n_training) + int(self.num_graphs * n_eval):] #0.2 self.pos_train = pos[0:int(self.num_graphs * n_training)] #0.8 self.pos_validate = pos[int(self.num_graphs * n_training):int(self.num_graphs * n_training) + int(self.num_graphs * n_eval)] #0.2 self.num_val = len(graphs_validate) self.num_test = len(graphs_test) self.num_training = len(graphs_train) self.train_generator = self.batch_generator(graphs_train, feature_train, input_graph_train) self.valid_generator = self.batch_generator(graphs_validate, feature_validate, input_graph_validate) self.test_generator = self.batch_generator(graphs_test, feature_test, input_graph_test) print("DATASET:", type_dataset) print("num_graphs:", self.num_graphs) print("num_nodes by graph:", self.num_nodes) print("num_edges by graph:", self.num_edges) print("num_features by node:", self.num_features) print("num_training:", self.num_training) print("num_val:", self.num_val) print("num_test:", self.num_test)
os.makedirs(args.figure_save_path) if not os.path.isdir(args.timing_save_path): os.makedirs(args.timing_save_path) if not os.path.isdir(args.figure_prediction_save_path): os.makedirs(args.figure_prediction_save_path) if not os.path.isdir(args.nll_save_path): os.makedirs(args.nll_save_path) time = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") # logging.basicConfig(filename='logs/train' + time + '.log', level=logging.DEBUG) if args.clean_tensorboard: if os.path.isdir("tensorboard"): shutil.rmtree("tensorboard") configure(f"tensorboard/run-{time}", flush_secs=5) graphs = create_graphs.create(args) # split datasets random.seed(123) random.shuffle(graphs) graphs_len = len(graphs) graphs_test = graphs[int(0.8 * graphs_len) :] graphs_train = graphs[0 : int(0.8 * graphs_len)] graphs_validate = graphs[0 : int(0.2 * graphs_len)] # if use pre-saved graphs # dir_input = "/dfs/scratch0/jiaxuany0/graphs/" # fname_test = dir_input + args.note + '_' + args.graph_type + '_' + str(args.num_layers) + '_' + str( # args.hidden_size_rnn) + '_test_' + str(0) + '.dat' # graphs = load_graph_list(fname_test, is_real=True) # graphs_test = graphs[int(0.8 * graphs_len):]
from utils import prepare_for_MADE os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"]="0" # if __name__ == '__main__': device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # random.seed(123) # np.random.seed(123) # torch.manual_seed(123) args = Args() graphs = create_graphs.create(args) ## do not comment this line when use_pre_savede_graphs is True. This line sets args.max_prev_node too. if args.use_pre_saved_graphs: with open(args.graph_save_path + args.fname_test + '0.dat', 'rb') as fin: graphs = pickle.load(fin) # if use pre-saved graphs # dir_input = "/dfs/scratch0/jiaxuany0/graphs/" # fname_test = dir_input + args.note + '_' + args.graph_type + '_' + str(args.num_layers) + '_' + str( # args.hidden_size_rnn) + '_test_' + str(0) + '.dat' # graphs = load_graph_list(fname_test, is_real=True) # graphs_test = graphs[int(0.8 * graphs_len):] # graphs_train = graphs[0:int(0.8 * graphs_len)] # graphs_validate = graphs[int(0.2 * graphs_len):int(0.4 * graphs_len)]