def test_create_community():
    num_surfaces = 18
    num_points = 400
    num_perm = 3
    #types=['caveman_2','caveman_4']
    graphs_create = create_graphs.create(create_graphs.Graph_Args('caveman_4'))
    np.random.shuffle(graphs_create)
    feature_graphs, pos, graphs = generate_feature_list(
        graphs_create, num_perm)
    feature_graphs[:], pos[:], graphs[:] = shuffle_list(
        list(feature_graphs), list(pos), list(graphs))

    #graphs = np.asarray(graphs, dtype=np.float32)
    for i in range(len(graphs)):
        graphs[i] = nx.to_numpy_matrix(graphs[i])
    graphs = np.array(graphs, dtype=np.float32)
    feature_graphs = np.array(feature_graphs, dtype=np.float32)

    print("graphs[0].shape: ", graphs[0].shape)
    print("feature_graphs[0].shape: ", feature_graphs[0].shape)
    counter = 0
    draw_graph(G_arr=graphs[0:40],
               row=2,
               col=2,
               pos=feature_graphs[0:40],
               fname='comm/comm_' + str(counter))
Beispiel #2
0
def get_loaders(args):
    graphs = create_graphs.create(args)

    # split datasets
    random.seed(123)
    shuffle(graphs)
    graphs_len = len(graphs)
    graphs_test = graphs[int(0.8 * graphs_len):]
    graphs_train = graphs[0:int(0.8 * graphs_len)]
    graphs_validate = graphs[0:int(0.2 * graphs_len)]

    graph_validate_len = 0
    for graph in graphs_validate:
        graph_validate_len += graph.number_of_nodes()
    graph_validate_len /= len(graphs_validate)
    print('graph_validate_len', graph_validate_len)

    graph_test_len = 0
    for graph in graphs_test:
        graph_test_len += graph.number_of_nodes()
    graph_test_len /= len(graphs_test)
    print('graph_test_len', graph_test_len)

    args.max_num_node = max(
        [graphs[i].number_of_nodes() for i in range(len(graphs))])
    max_num_edge = max(
        [graphs[i].number_of_edges() for i in range(len(graphs))])
    min_num_edge = min(
        [graphs[i].number_of_edges() for i in range(len(graphs))])

    # show graphs statistics
    print('total graph num: {}, training set: {}'.format(
        len(graphs), len(graphs_train)))
    print('max number node: {}'.format(args.max_num_node))
    print('max/min number edge: {}; {}'.format(max_num_edge, min_num_edge))
    print('max previous node: {}'.format(args.max_prev_node))

    # save ground truth graphs
    ## To get train and test set, after loading you need to manually slice
    save_graph_list(graphs, args.result_dir + args.fname_train + '0.dat')
    save_graph_list(graphs, args.result_dir + args.fname_test + '0.dat')
    print('train and test graphs saved at: ',
          args.result_dir + args.fname_test + '0.dat')

    ### dataset initialization
    train_dataset = DualGraph_sampler_flow(graphs_train,
                                           max_num_node=args.max_num_node)
    test_dataset = DualGraph_sampler_flow(graphs_test,
                                          max_num_node=args.max_num_node)
    aaa = train_dataset.__getitem__(1)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               num_workers=args.num_workers)
    test_loader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=args.batch_size,
                                              num_workers=args.num_workers)

    return train_loader, test_loader
    def __init__(self,
                 type_dataset='caveman_small',
                 proportion=(0.8, 0.2),
                 proportion_edge=[.8, .2],
                 num_perm=10):

        graph_args = create_graphs.Graph_Args(type=type_dataset)
        graphs_create = create_graphs.create(graph_args)
        np.random.shuffle(graphs_create)
        self.num_nodes = graphs_create[0].number_of_nodes()
        self.num_edges = graphs_create[0].number_of_edges()
        feature_graphs, pos, graphs = generate_feature_list(
            graphs_create, num_perm)
        self.num_graphs = len(graphs)
        feature_graphs[:], pos[:], graphs[:] = shuffle_list(
            list(feature_graphs), list(pos), list(graphs))

        #feature_graphs = np.array(feature_graphs)
        #X_features = feature_graphs.reshape(-1,feature_graphs.shape[-1])
        #std_scale = preprocessing.StandardScaler().fit(X_features)
        #X_std = std_scale.transform(X_features)
        #feature_graphs = X_std.reshape(feature_graphs.shape[0],feature_graphs.shape[1],feature_graphs.shape[2])

        #another, identity, full
        input_graphs = self.generate_input_graphs('identity',
                                                  self.num_graphs,
                                                  self.num_nodes,
                                                  proportion=proportion_edge)
        #('identity', self.num_graphs, self.num_nodes)
        self.num_features = feature_graphs[0].shape[-1]

        self.graphs_test = graphs[int(proportion[0] * self.num_graphs):]  #0.8
        #save_graph_list(self.graphs_test, 'gt.dat')

        # for i in range(self.num_graphs):
        #   graphs[i] = nx.to_numpy_matrix(graphs[i])

        n_training = 0.96  #0.8
        n_eval = 0.02  #0.1
        n_test = 0.02  #0.1
        graphs_test = graphs[int(self.num_graphs * n_training) +
                             int(self.num_graphs * n_eval):]  #0.2
        graphs_train = graphs[0:int(self.num_graphs * n_training)]  #0.8
        graphs_validate = graphs[int(self.num_graphs *
                                     n_training):int(self.num_graphs *
                                                     n_training) +
                                 int(self.num_graphs * n_eval)]  #0.2

        feature_test = feature_graphs[int(self.num_graphs * n_training) +
                                      int(self.num_graphs * n_eval):]  #0.2
        feature_train = feature_graphs[0:int(self.num_graphs *
                                             n_training)]  #0.8
        feature_validate = feature_graphs[int(self.num_graphs *
                                              n_training):int(self.num_graphs *
                                                              n_training) +
                                          int(self.num_graphs * n_eval)]  #0.2

        input_graph_test = input_graphs[int(self.num_graphs * n_training) +
                                        int(self.num_graphs * n_eval):]  #0.2
        input_graph_train = input_graphs[0:int(self.num_graphs *
                                               n_training)]  #0.8
        input_graph_validate = input_graphs[
            int(self.num_graphs *
                n_training):int(self.num_graphs * n_training) +
            int(self.num_graphs * n_eval)]  #0.2

        self.pos_test = pos[int(self.num_graphs * n_training) +
                            int(self.num_graphs * n_eval):]  #0.2
        self.pos_train = pos[0:int(self.num_graphs * n_training)]  #0.8
        self.pos_validate = pos[int(self.num_graphs *
                                    n_training):int(self.num_graphs *
                                                    n_training) +
                                int(self.num_graphs * n_eval)]  #0.2

        self.num_val = len(graphs_validate)
        self.num_test = len(graphs_test)
        self.num_training = len(graphs_train)

        self.train_generator = self.batch_generator(graphs_train,
                                                    feature_train,
                                                    input_graph_train)
        self.valid_generator = self.batch_generator(graphs_validate,
                                                    feature_validate,
                                                    input_graph_validate)
        self.test_generator = self.batch_generator(graphs_test, feature_test,
                                                   input_graph_test)

        print("DATASET:", type_dataset)
        print("num_graphs:", self.num_graphs)
        print("num_nodes by graph:", self.num_nodes)
        print("num_edges by graph:", self.num_edges)
        print("num_features by node:", self.num_features)
        print("num_training:", self.num_training)
        print("num_val:", self.num_val)
        print("num_test:", self.num_test)
        os.makedirs(args.figure_save_path)
    if not os.path.isdir(args.timing_save_path):
        os.makedirs(args.timing_save_path)
    if not os.path.isdir(args.figure_prediction_save_path):
        os.makedirs(args.figure_prediction_save_path)
    if not os.path.isdir(args.nll_save_path):
        os.makedirs(args.nll_save_path)

    time = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
    # logging.basicConfig(filename='logs/train' + time + '.log', level=logging.DEBUG)
    if args.clean_tensorboard:
        if os.path.isdir("tensorboard"):
            shutil.rmtree("tensorboard")
    configure(f"tensorboard/run-{time}", flush_secs=5)

    graphs = create_graphs.create(args)

    # split datasets
    random.seed(123)
    random.shuffle(graphs)
    graphs_len = len(graphs)
    graphs_test = graphs[int(0.8 * graphs_len) :]
    graphs_train = graphs[0 : int(0.8 * graphs_len)]
    graphs_validate = graphs[0 : int(0.2 * graphs_len)]

    # if use pre-saved graphs
    # dir_input = "/dfs/scratch0/jiaxuany0/graphs/"
    # fname_test = dir_input + args.note + '_' + args.graph_type + '_' + str(args.num_layers) + '_' + str(
    #     args.hidden_size_rnn) + '_test_' + str(0) + '.dat'
    # graphs = load_graph_list(fname_test, is_real=True)
    # graphs_test = graphs[int(0.8 * graphs_len):]
Beispiel #5
0
from utils import prepare_for_MADE

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

# if __name__ == '__main__':

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# random.seed(123)
# np.random.seed(123)
# torch.manual_seed(123)

args = Args()

graphs = create_graphs.create(args)   ## do not comment this line when use_pre_savede_graphs is True. This line sets args.max_prev_node too.

if args.use_pre_saved_graphs:

    with open(args.graph_save_path + args.fname_test + '0.dat', 'rb') as fin:
        graphs = pickle.load(fin)

    # if use pre-saved graphs
    # dir_input = "/dfs/scratch0/jiaxuany0/graphs/"
    # fname_test = dir_input + args.note + '_' + args.graph_type + '_' + str(args.num_layers) + '_' + str(
    #     args.hidden_size_rnn) + '_test_' + str(0) + '.dat'
    # graphs = load_graph_list(fname_test, is_real=True)
    # graphs_test = graphs[int(0.8 * graphs_len):]
    # graphs_train = graphs[0:int(0.8 * graphs_len)]
    # graphs_validate = graphs[int(0.2 * graphs_len):int(0.4 * graphs_len)]