예제 #1
0
def MineModel(feature, support, ACT, weight_deacy, G, G1):
    X_in1 = Input(shape=(feature.shape[1], ))
    H = Dropout(0.3)(X_in1)

    H0 = GraphConvolution(16,
                          support,
                          activation=ACT,
                          kernel_regularizer=l2(weight_deacy))([H] + G)
    H0 = Dropout(0.3)(H0)
    Y0 = GraphConvolution(2, support, activation=ACT)([H0] + G)

    H1 = GraphConvolution(16,
                          support,
                          activation=ACT,
                          kernel_regularizer=l2(weight_deacy))([H] + G1)
    H1 = Dropout(0.3)(H1)
    Y1 = GraphConvolution(2, support, activation=ACT)([H1] + G1)

    out = GraphFlusing('attention', activation='softmax')(
        [Y0, Y1])  #Activation('softmax')(add([Y0,Y1]))#

    # Compile model
    model = Model(inputs=[X_in1] + G + G1, outputs=out)
    model.compile(loss='categorical_crossentropy',
                  weighted_metrics=['acc'],
                  optimizer=Adam(lr=0.005))
    return model
예제 #2
0
 def __init__(self):
     super(GAE, self).__init__()
     self.conv1 = GraphConvolution(
         16, 1 , activation='relu', kernel_regularizer=l2(5e-4)
     )
     self.conv2 = GraphConvolution(
         7, 1
     )
예제 #3
0
 def __init__(self, latent_dim = 7, num_component = 7):
     super(MDGAE_tfp1, self).__init__()
     self.num_component = num_component
     self.latent_dim = latent_dim
     self.conv1 = GraphConvolution(
         self.latent_dim * 2, 1 , activation='relu', kernel_regularizer=l2(5e-4)
     )
     self.conv2 = GraphConvolution(
         self.latent_dim * 2, 1, activation='relu'
     )
     self.dense1 = tf.keras.layers.Dense(2 * latent_dim)
     self.prior = make_gaussian_mixture_prior(self.latent_dim, self.num_component)
예제 #4
0
 def __init__(self):
     super(VGAE_tfp2, self).__init__()
     self.conv1 = GraphConvolution(
         16, 1, activation='relu', kernel_regularizer=l2(5e-4)
     )
     self.conv2 = GraphConvolution(
         7, 1
     )
     self.conv3 = GraphConvolution(
         7, 1
     )
     self.prior = tfd.Independent(tfd.Normal(loc=tf.zeros(7), scale=1),
                                  reinterpreted_batch_ndims=1)
예제 #5
0
 def __init__(self):
     super(VGAE_tfp1, self).__init__()
     self.conv1 = GraphConvolution(
         16, 1 , activation='relu', kernel_regularizer=l2(5e-4)
     )
     self.conv2 = GraphConvolution(
         16, 1
     )
     self.dense1 = tkl.Dense(tfpl.MultivariateNormalTriL.params_size(7))
     self.prior = tfd.Independent(tfd.Normal(loc=tf.zeros(7), scale=1),
                     reinterpreted_batch_ndims=1)
     self.dist1 = tfpl.MultivariateNormalTriL(7,
         activity_regularizer=tfpl.KLDivergenceRegularizer(self.prior, weight=BETA_VAE))
예제 #6
0
 def __init__(self, latent_dim = 7, num_component = 7, perturb_rank = 2):
     super(MDGAE_tfp2, self).__init__()
     self.num_component = num_component
     self.latent_dim = latent_dim
     self.perturb_rank = perturb_rank
     self.conv1 = GraphConvolution(
         self.latent_dim * 2, 1 , activation='relu', kernel_regularizer=l2(5e-4)
     )
     self.conv2 = GraphConvolution(
         self.latent_dim * 2, 1, activation='relu'
     )
     self.dense1 = tf.keras.layers.Dense(2 * self.latent_dim, activation='tanh')
     self.dense2 = tf.keras.layers.Dense((self.latent_dim + 1)* self.perturb_rank, activation='tanh')
     self.prior = make_gaussian_mixture_prior_lowrank(self.latent_dim, self.num_component, self.perturb_rank)
예제 #7
0
 def __init__(self, latent_dim = 7, num_component = 7):
     super(MDGAE, self).__init__()
     self.num_component = num_component
     self.latent_dim = latent_dim
     self.conv1 = GraphConvolution(
         self.latent_dim * 2, 1 , activation='relu', kernel_regularizer=l2(5e-4)
     )
     self.conv2 = GraphConvolution(
         self.num_component, 1, activation='softmax'
     )
     self.conv3 = GraphConvolution(
         self.num_component, 1, activation='softplus'
     )
     self.conv4 = GraphConvolution(
         self.latent_dim * self.num_component, 1
     )
예제 #8
0
def build_model(X, Y, A, config):
    layers = config['model']['layers']
    assert len(layers) >= 2
    logger.debug("Starting model build")

    support = len(A)
    A_in = [InputAdj(sparse=True) for _ in range(support)]

    # input layer
    X_in = Input(shape=(X.shape[1], ), sparse=True)
    H = GraphConvolution(layers[0]['hidden_nodes'],
                         support,
                         num_bases=layers[0]['num_bases'],
                         featureless=layers[0]['featureless'],
                         activation=layers[0]['activation'],
                         W_regularizer=l2(layers[0]['l2norm']))([X_in] + A_in)
    H = Dropout(layers[0]['dropout'])(H)

    # intermediate layers (if any)
    for i, layer in enumerate(layers[1:-1], 1):
        H = GraphConvolution(layers[i]['hidden_nodes'],
                             support,
                             num_bases=layers[i]['num_bases'],
                             featureless=layers[i]['featureless'],
                             activation=layers[i]['activation'],
                             W_regularizer=l2(layers[i]['l2norm']))([H] + A_in)
        H = Dropout(layers[i]['dropout'])(H)

    # output layer
    Y_out = GraphConvolution(Y.shape[1],
                             support,
                             num_bases=layers[-1]['num_bases'],
                             activation=layers[-1]['activation'])([H] + A_in)

    # Compile model
    logger.debug("Compiling model")
    model = Model(inputs=[X_in] + A_in, outputs=Y_out)
    model.compile(loss=config['model']['loss'],
                  optimizer=Adam(lr=config['model']['learning_rate']))

    return model
예제 #9
0
# Normalize adjacency matrices individually
for i in range(len(A)):
    d = np.array(A[i].sum(1)).flatten()
    d_inv = 1. / (d + 1e-5)
    d_inv[np.isinf(d_inv)] = 0.
    D_inv = sp.diags(d_inv)
    A[i] = D_inv.dot(A[i]).tocsr()

A_in = [InputAdj(sparse=True) for _ in range(support)]
X_in = Input(shape=(X.shape[1], ), sparse=True)

# Define model architecture
H = GraphConvolution(HIDDEN,
                     support,
                     num_bases=BASES,
                     featureless=True,
                     activation='relu',
                     W_regularizer=l2(L2))([X_in] + A_in)

H = Dropout(DO)(H)
# print ("A_in.shape=({0},)".format(len(A_in),)) # (47,)
# print ("H.shape={0}".format(H.shape)) # (23644, 16)
# print ("support={0}".format(support)) # 47
Y = GraphConvolution(y_train.shape[1],
                     support,
                     num_bases=BASES,
                     activation='softmax')([H] + A_in)

# Compile model
model = Model(input=[X_in] + A_in, output=Y)
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=LR))
예제 #10
0
    L_scaled = rescale_laplacian(L)
    T_k = chebyshev_polynomial(L_scaled, MAX_DEGREE)
    support = MAX_DEGREE + 1
    graph = [X]+T_k
    G = [Input(shape=(None, None), batch_shape=(None, None), sparse=True) for _ in range(support)]

else:
    raise Exception('Invalid filter type.')

X_in = Input(shape=(X.shape[1],))

# Define model architecture
# NOTE: We pass arguments for graph convolutional layers as a list of tensors.
# This is somewhat hacky, more elegant options would require rewriting the Layer base class.
H = Dropout(0.1)(X_in)
H = GraphConvolution(16, support, activation='relu', kernel_regularizer=l2(5e-6))([H]+G)
H = Dropout(0.1)(H)
Y = GraphConvolution(y.shape[1], support, activation='softmax')([H]+G)


# Compile model
model = Model(inputs=[X_in]+G, outputs=Y)
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.01))

# Helper variables for main training loop
wait = 0
preds = None
best_val_loss = 99999

# Fit
for epoch in range(1, NB_EPOCH+1):
예제 #11
0
def experiment(args):
    # Get data
    normalizer = preprocessing.StandardScaler()

    if args.FILTER in ['localpool', 'dense']:
        X, A, y, train_test_idx = load_data(path=args.PATH,
                                                dataset=args.DATASET,
                                                normalizer=normalizer,
                                                max_adjacency=0,
                                                symmetric=args.SYMMETRIC,
                                                add_node_one_hot=args.ADD_NODE_ONE_HOT)

    elif args.FILTER in ['efgcn', 'lgcn']:
        X, A, y, train_test_idx = load_data(path=args.PATH,
                                                dataset=args.DATASET,
                                                normalizer=normalizer,
                                                max_adjacency=args.MAX_ADJACENCY,
                                                symmetric=args.SYMMETRIC,
                                                add_node_one_hot=args.ADD_NODE_ONE_HOT,
                                                self_links=args.SELF_LINKS)

    else:
        raise Exception('Invalid filter type for loading data')

    if args.DATASET in ['aifb', 'mutag', 'rita_tts', 'rita_tts_hard', 'rita_tts_hard_lstm', 'rita_tts_lstm', 'nell_tts']:
        y_train, y_val, y_test, idx_train, idx_val, idx_test, train_mask = get_splits_predefined(y,train_test_idx,
                                                                                                args.TRAIN_SPLIT,
                                                                                                args.VAL_SPLIT,
                                                                                                args.TESTING)
    elif args.DATASET in ['cora', 'cora_plus']:
        y_train, y_val, y_test, idx_train, idx_val, idx_test, train_mask = get_splits(y)
    else:
        y_train, y_val, y_test, idx_train, idx_val, idx_test, train_mask = get_splits_weighted(y, args.TRAIN_SPLIT,
                                                                                                args.VAL_SPLIT)
    batch_size=X.shape[0] # number of nodes

    if args.FILTER == 'localpool':
        """ Local pooling filters (see 'renormalization trick' in Kipf & Welling, arXiv 2016) """
        if args.VERBOSE >= 1: print('Using local pooling filters...')
        A_ = preprocess_adj(A[0], args.SYM_NORM)
        support = 1
        graph = [X, A_]
        G = [GraphInput(sparse=True, name='adjacency')]

    elif args.FILTER == 'dense':
        """Running a regular dense network """
        if args.VERBOSE >= 1: print('Running a regular dense network')
        graph = [X]
        G = []

    elif args.FILTER in ['efgcn','lgcn']:
        """Running the Latent-Graph Convolutional Network algorithm """
        if args.VERBOSE >= 1: print('Splitting up adjacency matrices...')
        support = len(A)
        A = [a.tocsr() for a in A]
        graph = [X]+A
        G = [GraphInput(sparse=True, name='adjacency_'+str(s)) for s in range(support)]

    else:
        raise Exception('Invalid filter type for creating processing data')


    # Define model architecture

    X_in = Input(shape=(X.shape[1],))
    H = Dropout(args.DROPOUT)(X_in)

    if args.FILTER in ['localpool']:
        for hidden_nodes in args.NETWORK_LAYERS:
            H = GraphConvolution(hidden_nodes,
                                    support,
                                    activation=args.ACTIVATION,
                                    kernel_regularizer=l2(args.REG_STRENGTH),
                                    use_bias=True,
                                    self_links=args.SELF_LINKS)([H]+G)
            H = Dropout(args.DROPOUT)(H)
        Y = GraphConvolution(y.shape[1], support, activation='softmax')([H]+G)

    elif args.FILTER in ['efgcn', 'lgcn']:

        # selecting the normalize function
        if args.ADJ_NORMALIZER == 'sym':
            vector_to_adjacency_function = vector_to_adjacency_sym_normalized
        elif args.ADJ_NORMALIZER == 'right':
            vector_to_adjacency_function = vector_to_adjacency_normalized
        elif args.ADJ_NORMALIZER == 'none':
            vector_to_adjacency_function = vector_to_adjacency
        elif args.ADJ_NORMALIZER == 'sym_sparse':
            vector_to_adjacency_function = vector_to_adjacency_sym_sparse
        elif args.ADJ_NORMALIZER == 'softmax':
            vector_to_adjacency_function = vector_to_adjacency_softmax
        else:
            raise Exception('Invalid normalizing mode for L-GCN')

        # turn sparse adjacency tensor into dense edge features matrix
        def latent_relation_layer(A, G, args, name):
            He = Lambda(extract_from_adjs, output_shape=(A[0].data.shape[0], len(G)))(G)


            if args.FILTER == 'lgcn':
                # embedding hidden layers
                for i, embedding_len in enumerate(args.EMBEDDING_LAYERS):
                    # add dropout if specified
                    if args.EMB_DROPOUT > 0.0:
                        He = Dropout(args.EMB_DROPOUT)(He)

                    # if it is the rita lstm dataset 12 months 20 edge features per month
                    if args.DATASET in ['rita_lstm', 'rita_tts_lstm', 'rita_tts_hard_lstm']:
                        output_shape = (12, 20)
                        He = Lambda(reshape_for_lstm)(He)
                        He = LSTM(units=embedding_len,
                                    activation=args.EMBEDDING_ACT,
                                    input_shape=output_shape,
                                    kernel_initializer=initializers.RandomUniform())(He)
                    else:
                        He = Dense(embedding_len,
                                   activation=args.EMBEDDING_ACT,
                                #    kernel_initializer=initializers.RandomUniform(),
                                   name='latent_relation_' + name + '_{}'.format(i))(He)



            elif args.FILTER == 'efgcn':
                embedding_len = len(G)
                He = Activation('relu')(He)

            # helper functions for the vector_to_adjacency_function
            # may be nicer to put these all into one Keras Layer instance, but it's not necessary
            tensor_shape = Lambda(get_tensor_shape, output_shape=(2,))(G[0])
            output_shape = (A[0].shape[0], A[0].shape[1])
            Ge = []

            # slice the dense edge feature matrix and make adjacency matrices from them
            for slice_index in xrange(embedding_len):
                #slice
                sli = Lambda(lambda x: x[:, slice_index])(He)
                #to adjacency matrices
                Ge += [Lambda(vector_to_adjacency_function, output_shape=output_shape)([G[0], sli, tensor_shape])]
            return Ge, embedding_len

        # Ge, embedding_len = latent_relation_layer(A, G, args, 'hidden')

        # loop over hidden layers args.NETWORK_LAYERS = [16]: it goes from input to 16 to output input->32->16->output = [32,16]
        for l, hidden_nodes in enumerate(args.NETWORK_LAYERS):
            # if it is the first layer, and its one_hot node features, we remove the self links
            if args.ADD_NODE_ONE_HOT == True and l == 0:
                first_layer_one_hot = True
            else:
                first_layer_one_hot = False
            Ge, embedding_len = latent_relation_layer(A, G, args, 'hidden_' + str(l))

            H = GraphConvolution(hidden_nodes,
                                 embedding_len,
                                 activation=args.ACTIVATION,
                                 kernel_regularizer=l2(args.REG_STRENGTH),
                                 use_bias=True,
                                 self_links=args.SELF_LINKS,
                                 first_layer_one_hot=first_layer_one_hot)([H]+Ge)

            H = Dropout(args.DROPOUT)(H)
        args.EMBEDDING_LAYERS = [4,1]
        Ge, embedding_len = latent_relation_layer(A, G, args, 'final')
        Y = GraphConvolution(y.shape[1], embedding_len, activation='softmax', use_bias=True, self_links=args.SELF_LINKS)([H]+Ge)


    elif args.FILTER == 'dense':
        for hidden_nodes in args.NETWORK_LAYERS:
            H = Dense(hidden_nodes,
                        activation=args.ACTIVATION,
                        kernel_regularizer=l2(args.REG_STRENGTH)
                        )(H)
            H = Dropout(args.DROPOUT)(H)
        Y = Dense(y.shape[1], activation='softmax')(H)

    else:
        raise Exception('invalid filter type for network creation')

    # Compile model
    model = Model(inputs=[X_in]+G, outputs=Y)
    model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=args.LEARNING_RATE))
    model.summary()


    wait = 0
    preds = None
    best_val_loss = 99999



    # class balance
    if args.BALANCE_CW == True:
        class_balance = np.sum(y, axis=0)
        class_weight = {}
        class_balance = 1.0/class_balance
        class_balance = class_balance/np.mean(class_balance)
        for c in range(len(class_balance)):
            class_weight[c] = class_balance[c]
        print('class weight:', class_weight)
    else:
        class_weight = None

    # Fit
    for epoch in range(1, args.NB_EPOCH+1):

        # Log wall-clock time
        t = time.time()

        # Single training iteration (we mask nodes without labels for loss calculation)
        model.fit(graph, y_train, sample_weight=train_mask,
                  batch_size=batch_size, epochs=1, shuffle=False, verbose=0, class_weight=class_weight)

        # Predict on full dataset
        preds = model.predict(graph, batch_size=batch_size)

        # Train / validation scores
        train_val_loss, train_val_acc = evaluate_preds(preds, [y_train, y_val],
                                                       [idx_train, idx_val])
        if args.VERBOSE == 2:
            print("Epoch: {:04d}".format(epoch),
                  "train_loss= {:.4f}".format(train_val_loss[0]),
                  "train_acc= {:.4f}".format(train_val_acc[0]),
                  "val_loss= {:.4f}".format(train_val_loss[1]),
                  "val_acc= {:.4f}".format(train_val_acc[1]),
                  "time= {:.4f}".format(time.time() - t),
                  "stopping= {}/{}".format(wait, args.PATIENCE))

        # Early stopping
        if train_val_loss[1] < best_val_loss:
            best_val_loss = train_val_loss[1]
            wait = 0
        else:
            if wait >= args.PATIENCE:
                if args.VERBOSE >= 1:
                    print('Epoch {}: early stopping'.format(epoch))
                if args.VERBOSE == 1:
                    print("train_loss= {:.4f}".format(train_val_loss[0]),
                          "train_acc= {:.4f}".format(train_val_acc[0]),
                          "val_loss= {:.4f}".format(train_val_loss[1]),
                          "val_acc= {:.4f}".format(train_val_acc[1]),
                          "time= {:.4f}".format(time.time() - t),
                          "stopping= {}/{}".format(wait, args.PATIENCE))
                break
            wait += 1

    # Testing
    if args.VERBOSE == 2:
        print('predictions')
        for i in idx_test:
            print(preds[i], np.argmax(preds[i]), np.argmax(y_test[i]))

    if args.VERBOSE == 2:
        for layer in model.layers:
            if 'latent_relation' in layer.name:
                print(layer.name)
                print(layer.get_weights())

    test_loss, test_acc = evaluate_preds(preds, [y_test], [idx_test])

    if args.VERBOSE >= 1:
        print(confusion_matrix(np.argmax(preds[idx_test], axis=1), np.argmax(y_test[idx_test], axis=1)))
        print("Test set results:",
            "loss= {:.4f}".format(test_loss[0]),
            "accuracy= {:.4f}".format(test_acc[0]))

    return test_acc
예제 #12
0
파일: train.py 프로젝트: omarmaddouri/GCNCC
    G = [
        Input(shape=(None, None), batch_shape=(None, None), sparse=True)
        for _ in range(support)
    ]

else:
    raise Exception('Invalid filter type.')

X_in = Input(shape=(X.shape[1], ))

# Define model architecture
# NOTE: We pass arguments for graph convolutional layers as a list of tensors.
# This is somewhat hacky, more elegant options would require rewriting the Layer base class.
H = Dropout(0.2)(X_in)
H = GraphConvolution(10,
                     support,
                     activation='tanh',
                     kernel_regularizer=l2(5e-4))([H] + G)
H = Dropout(0.2)(H)
Y = GraphConvolution(y.shape[1], support, activation='softmax')([H] + G)

# Compile model
model = Model(inputs=[X_in] + G, outputs=Y)
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.01))

# Helper variables for main training loop
wait = 0
preds = None
best_val_loss = 99999

# Fit
for epoch in range(1, NB_EPOCH + 1):
예제 #13
0
    raise Exception('Invalid filter type.')

#  shape,不包括batch size, 例如 shape=(32,), 意味着输入是1行32列的向量
X_in = Input(shape=(X.shape[1], ))  #一个节点有1433 features.,所以输入形状就是X.shape[1],

# Define model architecture
# NOTE: We pass arguments for graph convolutional layers as a list of tensors.
# This is somewhat hacky, more elegant options would require rewriting the Layer base class.
H = Dropout(0.5)(X_in)
# print("H.shape ", H.shape)
# print("[H] len  ", len([H]))
# print("G len ", len(G))
# print("[H]+G len ", len([H]+G) )
H = GraphConvolution(16,
                     support,
                     activation='relu',
                     kernel_regularizer=l2(5e-4))(
                         [H] + G)  ##数组[H, G],包含 H G 这2个元素 在tensor里面表示为[H]+G
H = Dropout(0.5)(H)
Y = GraphConvolution(y.shape[1], support, activation='softmax')([H] + G)

# Compile model
#  keras函数式模型(区别于序贯模型)  可构造拥有多输入和多输出的模型  如 model = Model(inputs=[a1, a2], outputs=[b1, b3, b3])
model = Model(inputs=[X_in] + G, outputs=Y)
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.01))

# Helper variables for main training loop
wait = 0
preds = None
best_val_loss = 99999
예제 #14
0
def main(debug=False, dataset='sch2graph'):

    # Define parameters
    DATASET = dataset
    if DATASET == 'sch2graph':
        PATH = 'data/'
        PREFIX = 'dly_cell'
        PREFIX = 'mcdlycellbwcb_psd2x'
    else:
        DATASET = 'cora'
        PATH = 'data/cora/'
        PREFIX = ''
    FILTER = 'localpool'  # 'chebyshev'
    MAX_DEGREE = 2  # maximum polynomial degree
    SYM_NORM = True  # symmetric (True) vs. left-only (False) normalization
    NB_EPOCH = 200
    PATIENCE = 20  # early stopping patience

    if debug:
        sess = K.get_session()
        sess = tf_debug.LocalCLIDebugWrapperSession(sess)
        K.set_session(sess)
    # Get data
    X, A, y = load_data(path=PATH, dataset=DATASET, prefix=PREFIX)
    y_train, y_val, y_test, idx_train, idx_val, idx_test, train_mask = get_splits(
        y, DATASET)

    #pdb.set_trace()
    # Normalize X
    X /= X.sum(1).reshape(-1, 1)
    if FILTER == 'localpool':
        """ Local pooling filters (see 'renormalization trick' in Kipf & Welling, arXiv 2016) """
        print('Using local pooling filters...')
        A_ = preprocess_adj(A, SYM_NORM)
        support = 1
        graph = [X, A_]
        #G = [Input(shape=(None, None), batch_shape=(None, None), sparse=True)]
        G = [Input(shape=(None, None), batch_shape=(None, None), sparse=False)]

    elif FILTER == 'chebyshev':
        """ Chebyshev polynomial basis filters (Defferard et al., NIPS 2016)  """
        print('Using Chebyshev polynomial basis filters...')
        L = normalized_laplacian(A, SYM_NORM)
        L_scaled = rescale_laplacian(L)
        T_k = chebyshev_polynomial(L_scaled, MAX_DEGREE)
        support = MAX_DEGREE + 1
        graph = [X] + T_k
        G = [
            Input(shape=(None, None), batch_shape=(None, None), sparse=True)
            for _ in range(support)
        ]

    else:
        raise Exception('Invalid filter type.')

    X_in = Input(shape=(X.shape[1], ))

    # Define model architecture
    # NOTE: We pass arguments for graph convolutional layers as a list of tensors.
    # This is somewhat hacky, more elegant options would require rewriting the Layer base class.
    H = Dropout(0.5)(X_in)
    H = GraphConvolution(12,
                         support,
                         activation='relu',
                         kernel_regularizer=l2(5e-4))([H] + G)
    H = Dropout(0.5)(H)
    Y = GraphConvolution(y.shape[1], support, activation='softmax')([H] + G)

    # Compile model
    model = Model(inputs=[X_in] + G, outputs=Y)
    model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.01))

    # Helper variables for main training loop
    wait = 0
    preds = None
    best_val_loss = 99999

    #dump_checkpoints()
    #checkpoint = ModelCheckpoint(monitor='val_acc', filepath='checkpoints/model_gcn.txt',save_best_only=False)
    # Fit
    for epoch in range(1, NB_EPOCH + 1):

        # Log wall-clock time
        t = time.time()
        #pdb.set_trace()
        # Single training iteration (we mask nodes without labels for loss calculation)
        model.fit(graph,
                  y_train,
                  sample_weight=train_mask,
                  batch_size=A.shape[0],
                  epochs=1,
                  shuffle=False,
                  verbose=0)

        # Predict on full dataset
        preds = model.predict(graph, batch_size=A.shape[0])

        #print(preds)
        # Train / validation scores
        train_val_loss, train_val_acc = evaluate_preds(preds, [y_train, y_val],
                                                       [idx_train, idx_val])
        print("Epoch: {:04d}".format(epoch),
              "train_loss= {:.4f}".format(train_val_loss[0]),
              "train_acc= {:.4f}".format(train_val_acc[0]),
              "val_loss= {:.4f}".format(train_val_loss[1]),
              "val_acc= {:.4f}".format(train_val_acc[1]),
              "time= {:.4f}".format(time.time() - t))
        #print(len(graph))
        #X = graph[0]
        #print(X.shape)

        # Early stopping
        if train_val_loss[1] < best_val_loss:
            best_val_loss = train_val_loss[1]
            wait = 0
        else:
            if wait >= PATIENCE:
                print('Epoch {}: early stopping'.format(epoch))
                break
            wait += 1

    # Testing
    test_loss, test_acc = evaluate_preds(preds, [y_test], [idx_test])
    print("Test set results:", "loss= {:.4f}".format(test_loss[0]),
          "accuracy= {:.4f}".format(test_acc[0]))