Ejemplo n.º 1
0
    def normalize_adj(self):
        for r in range(self.n_rel):
            train_edges = self.train_edges[r]
            data = np.ones(train_edges.shape[0])
            ### NOTE: A_ij = 1 if j->i
            adj = sp.csr_matrix((data, (train_edges[:, 1], train_edges[:, 0])),
                                shape=(self.n_entity, self.n_entity))
            # #adj = sp.csr_matrix((data, (train_edges[:, 0], train_edges[:, 1])), shape=(self.n_entity,self.n_entity))

            rowsum, colsum = np.array(adj.sum(1)), np.array(adj.sum(0))
            rowdegree_mat_half_inv = sp.diags(
                np.nan_to_num(np.power(rowsum,
                                       self.adj_norm_factor)).flatten())
            coldegree_mat_half_inv = sp.diags(
                np.nan_to_num(np.power(colsum,
                                       self.adj_norm_factor)).flatten())
            adj_normalized = rowdegree_mat_half_inv.dot(adj).dot(
                coldegree_mat_half_inv).tocoo()
            self.adj_train[r] = sparse_to_tuple(adj_normalized)

            if self.has_bias:
                rowdegree_mat_inv = sp.diags(
                    np.nan_to_num(np.power(rowsum, -0.)).flatten())
                adj4bias_normalized = rowdegree_mat_inv.dot(adj).tocoo()
                self.adj_train_4bias[r] = sparse_to_tuple(adj4bias_normalized)

            # compute neg samp prob based on degree
            deg = np.asarray(rowsum.squeeze())[self.rel2end[r]]
            prob_unnorm = np.power(deg, self.neg_sample_power)
            self.neg_samp_prob[r] = prob_unnorm / np.sum(prob_unnorm)
Ejemplo n.º 2
0
 def to_pyt_sp(self):
     adj_norm_tuple = sparse_to_tuple(self.adj_norm)
     adj_label_tuple = sparse_to_tuple(self.adj_label)
     features_tuple = sparse_to_tuple(self.features_orig)
     self.adj_norm = torch.sparse.FloatTensor(torch.LongTensor(adj_norm_tuple[0].T),
                                             torch.FloatTensor(adj_norm_tuple[1]),
                                             torch.Size(adj_norm_tuple[2]))
     self.adj_label = torch.sparse.FloatTensor(torch.LongTensor(adj_label_tuple[0].T),
                                             torch.FloatTensor(adj_label_tuple[1]),
                                             torch.Size(adj_label_tuple[2]))
     self.features = torch.sparse.FloatTensor(torch.LongTensor(features_tuple[0].T),
                                             torch.FloatTensor(features_tuple[1]),
                                             torch.Size(features_tuple[2]))
Ejemplo n.º 3
0
def get_feed_dict(entity_pairs, train_edges, paths, labels, start, end):
    feed_dict = {}

    if args.use_context:
        feed_dict["entity_pairs"] = entity_pairs[start:end]
        if train_edges is not None:
            feed_dict["train_edges"] = train_edges[start:end]
        else:
            # for evaluation no edges should be masked out
            feed_dict["train_edges"] = torch.LongTensor(np.array([-1] * (end - start), np.int32)).cuda() if args.cuda \
                        else torch.LongTensor(np.array([-1] * (end - start), np.int32))

    if args.use_path:
        if args.path_type == 'embedding':
            indices, values, shape = sparse_to_tuple(paths[start:end])
            indices = torch.LongTensor(
                indices).cuda() if args.cuda else torch.LongTensor(indices)
            values = torch.Tensor(
                values).cuda() if args.cuda else torch.Tensor(values)
            feed_dict["path_features"] = torch.sparse.FloatTensor(
                indices.t(), values, torch.Size(shape)).to_dense()
        elif args.path_type == 'rnn':
            feed_dict["path_ids"] = torch.LongTensor(paths[start:end]).cuda() if args.cuda \
                    else torch.LongTensor(paths[start:end])

    feed_dict["labels"] = labels[start:end]

    return feed_dict
Ejemplo n.º 4
0
    def get_data(self, feed_dict):
        input = self.features
        f0 = feed_dict[self.placeholders['fields'][0]]
        dropout = feed_dict.get(self.placeholders['dropout'], 0.0)
        if self.sparse_input:
            input = slice(input, f0)
            if FLAGS.reverse:
                input = sparse_to_tuple(
                    np_sparse_dropout(tuple_to_coo(input), 1 - dropout))
        else:
            input = dense_slice(input, f0)
            if FLAGS.reverse:
                input = np_dropout(input, 1 - dropout)
            #input = input[f0,:]
        feed_dict[self.inputs_ph] = input

        for l in range(self.L):
            dim = self.agg0_dim if l == 0 else FLAGS.hidden1
            adj = feed_dict[self.placeholders['adj'][l]][0]
            self.g_ops += adj.shape[0] * dim * 4
            self.adj_sizes[l] += adj.shape[0]
            self.amt_data += adj.shape[0]
        for l in range(self.L + 1):
            self.field_sizes[l] += feed_dict[self.placeholders['fields']
                                             [l]].size

        for c, l in self.layer_comp:
            self.nn_ops += c * feed_dict[self.placeholders['fields']
                                         [l]].size * 4
Ejemplo n.º 5
0
 def __init__(self, adj, x, y, W, b, K=2, normalize_grad=True):
     self.num_classes = y.max() + 1
     self.normalize_grad = normalize_grad
     self.num_nodes = adj.shape[0]
     self.K = K
     self.surrogate = Surrogate(x @ W, b, K=K)
     self.shape = (self.num_nodes, self.num_nodes)
     self.adj = adj
     self.adj_sparse = utils.sparse_to_tuple(utils.preprocess_adj(adj))
     self.y = y
     self.y_onehot = np.eye(int(self.num_classes))[y]
Ejemplo n.º 6
0
def build_model(adj, features, n_classes):
    placeholders = {
        'features':
        tf.sparse_placeholder(tf.float32,
                              shape=tf.constant(features[2], dtype=tf.int64)),
        'labels':
        tf.placeholder(tf.float32, shape=(None, n_classes)),
        'labels_mask':
        tf.placeholder(tf.int32),
        'noise':
        tf.placeholder(tf.float32, shape=()),
        'dropout':
        tf.placeholder_with_default(0., shape=()),
        'alfa':
        tf.placeholder(tf.float32, shape=()),
        'beta':
        tf.placeholder(tf.float32, shape=()),
    }

    if FLAGS.model == 'COOL':
        support = [sparse_to_tuple(preprocess_adj(adj))]
        model_func = GCN

    elif FLAGS.model == 'COOLnorm':
        support = [
            sparse_to_tuple(
                preprocess_high_order_adj(adj, FLAGS.order, FLAGS.threshold))
        ]
        model_func = GCN

    else:
        raise ValueError('Invalid argument for model: ' + str(FLAGS.model))

    placeholders['support'] = [
        tf.sparse_placeholder(tf.float32) for _ in support
    ]

    model = model_func(placeholders)
    return model, support, placeholders
Ejemplo n.º 7
0
 def __init__(self, edgelist, weighted, directed, labelfile, featurefile):
     self.edgelist = edgelist
     self.weighted = weighted
     self.directed = directed
     self.G = self.build_graph()
     self.node_list = list(self.G.nodes())
     self.look_up = {}
     self.node_size = 0
     for node in self.node_list:
         self.look_up[node] = self.node_size
         self.node_size += 1
     self.labels = self.read_node_labels(labelfile)
     if featurefile is None:
         self.features = np.identity(n=len(self.node_list))
         self.features = sparse_to_tuple(sp.coo_matrix(self.features))
     else:
         self.features = self.read_node_features(featurefile)
Ejemplo n.º 8
0
def get_feed_dict(entity_pairs, train_edges, paths, labels, start, end):
    feed_dict = {}

    if args.use_context:
        feed_dict[model.entity_pairs] = entity_pairs[start:end]
        if train_edges is not None:
            feed_dict[model.train_edges] = train_edges[start:end]
        else:
            # for evaluation no edges should be masked out
            feed_dict[model.train_edges] = np.array([-1] * (end - start),
                                                    np.int32)

    if args.use_path:
        if args.path_type == 'embedding':
            feed_dict[model.path_features] = sparse_to_tuple(paths[start:end])
        elif args.path_type == 'rnn':
            feed_dict[model.path_ids] = paths[start:end]

    feed_dict[model.labels] = labels[start:end]

    return feed_dict
Ejemplo n.º 9
0
 def __init__(self, layers_config, num_features, adj, latent_dim, placeholders,
              pos_weight, **kwargs):
   self.layers_config = layers_config
   self.num_features = num_features
   self.latent_dim = latent_dim
   self.dropout = placeholders['dropout']
   self.pos_weight = pos_weight
   self.batch_size = adj.shape[0]  # Full batch.
   self.loss_norm = (self.batch_size * self.batch_size
                    ) / float(self.pos_weight * adj.sum() +
                              (self.batch_size * self.batch_size) - adj.sum())
   A_gcn_tuple = normalize_graph_gcn(adj.astype(np.float32))
   self.A_gcn = tf.SparseTensor(A_gcn_tuple[0].astype(np.float32),
                                A_gcn_tuple[1].astype(np.float32),
                                A_gcn_tuple[2])
   A_tuple = sparse_to_tuple(adj)
   self.A = tf.SparseTensor(A_tuple[0].astype(np.float32),
                            A_tuple[1].astype(np.float32), A_tuple[2])
   self.node_indices = tf.range(0, self.batch_size, 1)  # [0,..., N]
   self.v_sender = tf.nn.embedding_lookup(placeholders['v_sender_all'],
                                          self.node_indices)
   self.v_receiver = tf.nn.embedding_lookup(placeholders['v_receiver_all'],
                                            self.node_indices)
Ejemplo n.º 10
0
def gcn():
    g = nx.read_edgelist('karate.edgelist',nodetype=int,create_using=nx.Graph())
    
    adj = nx.to_numpy_matrix(g)
    
    # Get important parameters of adjacency matrix
    n_nodes = adj.shape[0]
    
    # Some preprocessing
    adj_tilde = adj + np.identity(n=n_nodes)
    #np.squeeze()--从数组的形状中删除单维度条目,即把shape中为1的维度去掉
    d_tilde_diag = np.squeeze(np.sum(np.array(adj_tilde), axis=1))
    d_tilde_inv_sqrt_diag = np.power(d_tilde_diag, -1/2)
    d_tilde_inv_sqrt = np.diag(d_tilde_inv_sqrt_diag)
    adj_norm = np.dot(np.dot(d_tilde_inv_sqrt, adj_tilde), d_tilde_inv_sqrt)
    adj_norm_tuple = us.sparse_to_tuple(scipy.sparse.coo_matrix(adj_norm))
#    print(adj_norm_tuple)
    
    # Features are just the identity matrix
    feat_x = np.identity(n=n_nodes)
    feat_x_tuple = us.sparse_to_tuple(scipy.sparse.coo_matrix(feat_x))
    
    # TensorFlow placeholders
    '''
    ###sparse_placeholder demo:###
    x = tf.sparse_placeholder(tf.float32) 
    y = tf.sparse_reduce_sum(x) 
    with tf.Session() as sess: 
        print(sess.run(y)) # ERROR: will fail because x was not fed. 
        indices = np.array([[3, 2, 0], [4, 5, 1]], dtype=np.int64) 
        values = np.array([1.0, 2.0], dtype=np.float32) 
        shape = np.array([7, 9, 2], dtype=np.int64) 
        print(sess.run(y, feed_dict={x: tf.SparseTensorValue(indices, values, shape)})) 
        # Will succeed. 
        print(sess.run(y, feed_dict={ x: (indices, values, shape)})) # Will succeed. 
        sp = tf.SparseTensor(indices=indices, values=values, shape=shape) 
        sp_value = sp.eval(session) 
        print(sess.run(y, feed_dict={x: sp_value})) # Will succeed. 
    '''
    ph = {
        'adj_norm': tf.sparse_placeholder(tf.float32, name="adj_mat"),
        'x': tf.sparse_placeholder(tf.float32, name="x")}
    
    l_sizes = [32,16,8]
    
    o_fc1 = lg.GraphConvLayer(input_dim=feat_x.shape[-1],
                              output_dim=l_sizes[0],
                              name='fc1',
                              act=tf.nn.tanh)(adj_norm=ph['adj_norm'],
                                              x=ph['x'], sparse=True)
    
    o_fc2 = lg.GraphConvLayer(input_dim=l_sizes[0],
                              output_dim=l_sizes[1],
                              name='fc2',
                              act=tf.nn.tanh)(adj_norm=ph['adj_norm'], x=o_fc1)
    
    o_fc3 = lg.GraphConvLayer(input_dim=l_sizes[1],
                              output_dim=l_sizes[2],
                              name='fc3',
                              act=tf.nn.tanh)(adj_norm=ph['adj_norm'], x=o_fc2)
    
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    
    feed_dict = {ph['adj_norm']: adj_norm_tuple,
                 ph['x']: feat_x_tuple}
    
    outputs = sess.run(o_fc3, feed_dict=feed_dict)
    print(outputs.shape)
    nodes = list(g.nodes())
    labels = node2label(nodes)
    return outputs,labels,nodes
Ejemplo n.º 11
0
flags.DEFINE_integer('epochs', 20, 'Number of epochs to train.')
flags.DEFINE_integer('hidden1', 32, 'Number of units in hidden layer 1.')
flags.DEFINE_integer('hidden2', 16, 'Number of units in hidden layer 2.')
flags.DEFINE_integer('hidden3', 16, 'Number of units in hidden layer 3.')
flags.DEFINE_integer('hidden4', 16, 'Number of units in hidden layer 4.')
flags.DEFINE_float('dropout', 0.1, 'Dropout rate (1 - keep probability).')


# 今回のデータの読み込み
adj = load_data('data/yeast.edgelist')
num_nodes = adj.shape[0]
num_edges = adj.sum()  # 今,重みなしグラフを考えているので,全て足すとエッジ数になる

# featureless
# 現状,featuresがないので,単位行列(one-hotベクトル)を入れる
features = sparse_to_tuple(sp.identity(num_nodes))
num_features = features[2][1]
features_nonzero = features[1].shape[0]  # [1]成分は,ノンゼロvalues

# Store original adjacency matrix (without diagonal entries) for later
adj_orig = adj - sp.dia_matrix((adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape)
adj_orig.eliminate_zeros()

adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(adj)
adj = adj_train

adj_norm = preprocess_graph(adj)


# Define placeholders
# 現状, featureはないので, sp.sparse表現でone-hotベクトルを入れているが, ここは変更する(一般にsparse表現ではない)
def test_fitb(args):
    args = namedtuple("Args", args.keys())(*args.values())
    load_from = args.load_from
    config_file = load_from + '/results.json'
    log_file = load_from + '/log.json'

    with open(config_file) as f:
        config = json.load(f)
    with open(log_file) as f:
        log = json.load(f)

    DATASET = config['dataset']
    NUMCLASSES = 2
    BN_AS_TRAIN = False
    ADJ_SELF_CONNECTIONS = True

    def norm_adj(adj_to_norm):
        return normalize_nonsym_adj(adj_to_norm)

    # Dataloader
    if DATASET == 'fashiongen':
        dl = DataLoaderFashionGen()
    elif DATASET == 'polyvore':
        dl = DataLoaderPolyvore()
    train_features, adj_train, train_labels, train_r_indices, train_c_indices = dl.get_phase(
        'train')
    val_features, adj_val, val_labels, val_r_indices, val_c_indices = dl.get_phase(
        'valid')
    test_features, adj_test, test_labels, test_r_indices, test_c_indices = dl.get_phase(
        'test')
    adj_q, q_r_indices, q_c_indices, q_labels, q_ids, q_valid = dl.get_test_questions(
    )
    train_features, mean, std = dl.normalize_features(train_features,
                                                      get_moments=True)
    val_features = dl.normalize_features(val_features, mean=mean, std=std)
    test_features = dl.normalize_features(test_features, mean=mean, std=std)

    train_support = get_degree_supports(adj_train,
                                        config['degree'],
                                        adj_self_con=ADJ_SELF_CONNECTIONS)
    val_support = get_degree_supports(adj_val,
                                      config['degree'],
                                      adj_self_con=ADJ_SELF_CONNECTIONS)
    test_support = get_degree_supports(adj_test,
                                       config['degree'],
                                       adj_self_con=ADJ_SELF_CONNECTIONS)
    q_support = get_degree_supports(adj_q,
                                    config['degree'],
                                    adj_self_con=ADJ_SELF_CONNECTIONS)

    for i in range(1, len(train_support)):
        train_support[i] = norm_adj(train_support[i])
        val_support[i] = norm_adj(val_support[i])
        test_support[i] = norm_adj(test_support[i])
        q_support[i] = norm_adj(q_support[i])

    num_support = len(train_support)
    placeholders = {
        'row_indices':
        tf.placeholder(tf.int32, shape=(None, )),
        'col_indices':
        tf.placeholder(tf.int32, shape=(None, )),
        'dropout':
        tf.placeholder_with_default(0., shape=()),
        'weight_decay':
        tf.placeholder_with_default(0., shape=()),
        'is_train':
        tf.placeholder_with_default(True, shape=()),
        'support': [
            tf.sparse_placeholder(tf.float32, shape=(None, None))
            for sup in range(num_support)
        ],
        'node_features':
        tf.placeholder(tf.float32, shape=(None, None)),
        'labels':
        tf.placeholder(tf.float32, shape=(None, ))
    }

    model = CompatibilityGAE(placeholders,
                             input_dim=train_features.shape[1],
                             num_classes=NUMCLASSES,
                             num_support=num_support,
                             hidden=config['hidden'],
                             learning_rate=config['learning_rate'],
                             logging=True,
                             batch_norm=config['batch_norm'])

    # Construct feed dicts for train, val and test phases
    train_feed_dict = construct_feed_dict(placeholders, train_features,
                                          train_support, train_labels,
                                          train_r_indices, train_c_indices,
                                          config['dropout'])
    val_feed_dict = construct_feed_dict(placeholders,
                                        val_features,
                                        val_support,
                                        val_labels,
                                        val_r_indices,
                                        val_c_indices,
                                        0.,
                                        is_train=BN_AS_TRAIN)
    test_feed_dict = construct_feed_dict(placeholders,
                                         test_features,
                                         test_support,
                                         test_labels,
                                         test_r_indices,
                                         test_c_indices,
                                         0.,
                                         is_train=BN_AS_TRAIN)
    q_feed_dict = construct_feed_dict(placeholders,
                                      test_features,
                                      q_support,
                                      q_labels,
                                      q_r_indices,
                                      q_c_indices,
                                      0.,
                                      is_train=BN_AS_TRAIN)

    # Add ops to save and restore all the variables.
    saver = tf.train.Saver()
    sigmoid = lambda x: 1 / (1 + np.exp(-x))

    with tf.Session() as sess:
        saver.restore(sess, load_from + '/' + 'best_epoch.ckpt')

        val_avg_loss, val_acc, conf, pred = sess.run(
            [model.loss, model.accuracy, model.confmat,
             model.predict()],
            feed_dict=val_feed_dict)

        print("val_loss=", "{:.5f}".format(val_avg_loss), "val_acc=",
              "{:.5f}".format(val_acc))

        test_avg_loss, test_acc, conf = sess.run(
            [model.loss, model.accuracy, model.confmat],
            feed_dict=test_feed_dict)

        print("test_loss=", "{:.5f}".format(test_avg_loss), "test_acc=",
              "{:.5f}".format(test_acc))

        num_processed = 0
        correct = 0

        kwargs = {
            'K': args.k,
            'subset': args.subset,
            'resampled': args.resampled,
            'expand_outfit': args.expand_outfit
        }

        for question_adj, out_ids, choices_ids, labels, valid in dl.yield_test_questions_K_edges(
                **kwargs):
            q_support = get_degree_supports(question_adj,
                                            config['degree'],
                                            adj_self_con=ADJ_SELF_CONNECTIONS,
                                            verbose=False)
            for i in range(1, len(q_support)):
                q_support[i] = norm_adj(q_support[i])
            q_support = [sparse_to_tuple(sup) for sup in q_support]

            q_feed_dict = construct_feed_dict(placeholders,
                                              test_features,
                                              q_support,
                                              q_labels,
                                              out_ids,
                                              choices_ids,
                                              0.,
                                              is_train=BN_AS_TRAIN)

            # compute the output (correct or not) for the current FITB question
            preds = sess.run(model.outputs, feed_dict=q_feed_dict)
            preds = sigmoid(preds)
            outs = preds.reshape((-1, 4))
            outs = outs.mean(
                axis=0
            )  # pick the item with average largest probability, averaged accross all edges

            gt = labels.reshape((-1, 4)).mean(axis=0)
            predicted = outs.argmax()
            gt = gt.argmax()
            num_processed += 1
            correct += int(predicted == gt)

            print("[{}] Acc: {}".format(num_processed,
                                        correct / num_processed))

    print('Best val score saved in log: {}'.format(config['best_val_score']))
    print('Last val score saved in log: {}'.format(log['val']['acc'][-1]))
Ejemplo n.º 13
0
    def __init__(self, adj, x, y, hidden=16, name="",
                 with_relu=True, params_dict={'dropout': 0.5}, gpu_id=None,
                 seed=None):
        adj = utils.preprocess_adj(adj)
        num_features = x.shape[1]
        num_classes = y.max() + 1

        self.graph = tf.Graph()
        with self.graph.as_default():
            if seed:
                tf.set_random_seed(seed)

            with tf.variable_scope(name) as scope:
                w_init = glorot_uniform
                self.name = name

                self.dropout = params_dict. get('dropout', 0.)
                if not with_relu:
                    self.dropout = 0

                self.learning_rate = params_dict. get('learning_rate', 0.01)

                self.weight_decay = params_dict. get('weight_decay', 5e-4)
                self.N, self.D = x.shape

                self.node_ids = tf.placeholder(tf.int32, [None], 'node_ids')
                self.node_labels = tf.placeholder(tf.int32, [None, num_classes], 'node_labels')

                # bool placeholder to turn on dropout during training
                self.training = tf.placeholder_with_default(False, shape=())

                self.labels = np.eye(num_classes)[y]
                self.adj = tf.SparseTensor(*utils.sparse_to_tuple(adj))
                self.adj = tf.cast(self.adj, tf.float32)
                self.X_sparse = tf.SparseTensor(*utils.sparse_to_tuple(x))
                self.X_sparse = tf.cast(self.X_sparse, tf.float32)
                self.X_dropout = sparse_dropout(self.X_sparse, 1 - self.dropout,
                                                (int(self.X_sparse.values.get_shape()[0]),))
                # only use drop-out during training
                self.X_comp = tf.cond(self.training,
                                      lambda: self.X_dropout,
                                      lambda: self.X_sparse) if self.dropout > 0. else self.X_sparse

                self.W1 = tf.get_variable('W1', [self.D, hidden], tf.float32, initializer=w_init())
                self.b1 = tf.get_variable('b1', dtype=tf.float32, initializer=tf.zeros(hidden))

                self.h1 = spdot(self.adj, spdot(self.X_comp, self.W1))

                if with_relu:
                    self.h1 = tf.nn.relu(self.h1 + self.b1)

                self.h1_dropout = tf.nn.dropout(self.h1, rate=self.dropout)

                self.h1_comp = tf.cond(self.training,
                                       lambda: self.h1_dropout,
                                       lambda: self.h1) if self.dropout > 0. else self.h1

                self.W2 = tf.get_variable('W2', [hidden, num_classes], tf.float32, initializer=w_init())
                self.b2 = tf.get_variable('b2', dtype=tf.float32, initializer=tf.zeros(num_classes))

                self.logits = spdot(self.adj, dot(self.h1_comp, self.W2))
                if with_relu:
                    self.logits += self.b2
                self.logits_gather = tf.gather(self.logits, self.node_ids)

                self.predictions = tf.nn.softmax(self.logits_gather)

                self.loss_per_node = tf.nn.softmax_cross_entropy_with_logits_v2(logits=self.logits_gather,
                                                                                labels=self.node_labels)
                self.loss = tf.reduce_mean(self.loss_per_node)

                # weight decay only on the first layer, to match the original implementation
                if with_relu:
                    self.loss += self.weight_decay * tf.add_n([tf.nn.l2_loss(v) for v in [self.W1, self.b1]])

                var_l = [self.W1, self.W2]
                if with_relu:
                    var_l.extend([self.b1, self.b2])
                self.train_op = tf.train.AdamOptimizer(learning_rate=self.learning_rate).minimize(self.loss,
                                                                                                  var_list=var_l)

                self.varlist = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.name)
                self.local_init_op = tf.variables_initializer(self.varlist)

                if gpu_id is None:
                    config = tf.ConfigProto(
                        device_count={'GPU': 0}
                    )
                else:
                    gpu_options = tf.GPUOptions(visible_device_list='{}'.format(gpu_id), allow_growth=True)
                    config = tf.ConfigProto(gpu_options=gpu_options)

                self.session = tf.Session(config=config)
                self.init_op = tf.global_variables_initializer()
                self.session.run(self.init_op)
    def _partition_graph(self, test_frac=.1, val_frac=.05, prevent_disconnect=True, verbose=False, use_pickle=False):
        # NOTE: Splits are randomized and results might slightly deviate from reported numbers in the paper.
        # taken from https://github.com/lucashu1/link-prediction/blob/master/gae/preprocessing.py

        splits_filename = f'./dumps/splits/{self.dataset}_{int(test_frac*100)}_{int(val_frac*100)}.pkl'
        if use_pickle and check_file_exists(splits_filename):
            logging.error(f'Using pickle at {splits_filename!r}')
            adj_train, train_edges, train_edges_false, val_edges, val_edges_false, test_edges, test_edges_false = load_pickle(splits_filename)
        else:
            g = nx.Graph(self.input_graph)
            adj = nx.to_scipy_sparse_matrix(g)
            orig_num_cc = nx.number_connected_components(g)

            adj_triu = sp.triu(adj)  # upper triangular portion of adj matrix
            adj_tuple = sparse_to_tuple(adj_triu)  # (coords, values, shape), edges only 1 way
            edges = adj_tuple[0]  # all edges, listed only once (not 2 ways)
            # edges_all = sparse_to_tuple(adj)[0] # ALL edges (includes both ways)
            num_test = int(np.floor(edges.shape[0] * test_frac))  # controls how large the test set should be
            num_val = int(np.floor(edges.shape[0] * val_frac))  # controls how alrge the validation set should be

            # Store edges in list of ordered tuples (node1, node2) where node1 < node2
            edge_tuples = [(min(edge[0], edge[1]), max(edge[0], edge[1])) for edge in edges]
            all_edge_tuples = set(edge_tuples)
            train_edges = set(edge_tuples)  # initialize train_edges to have all edges
            test_edges = set()
            val_edges = set()

            if verbose:
                print('generating test/val sets...', end=' ')

            # Iterate over shuffled edges, add to train/val sets
            np.random.shuffle(edge_tuples)
            for edge in edge_tuples:
                # print edge
                node1 = edge[0]
                node2 = edge[1]

                # If removing edge would disconnect a connected component, backtrack and move on
                g.remove_edge(node1, node2)
                if prevent_disconnect:
                    if nx.number_connected_components(g) > orig_num_cc:
                        g.add_edge(node1, node2)
                        continue

                # Fill test_edges first
                if len(test_edges) < num_test:
                    test_edges.add(edge)
                    train_edges.remove(edge)

                # Then, fill val_edges
                elif len(val_edges) < num_val:
                    val_edges.add(edge)
                    train_edges.remove(edge)

                # Both edge lists full --> break loop
                elif len(test_edges) == num_test and len(val_edges) == num_val:
                    break

            if (len(val_edges) < num_val) or (len(test_edges) < num_test):
                print("WARNING: not enough removable edges to perform full train-test split!")
                print("Num. (test, val) edges requested: (", num_test, ", ", num_val, ")")
                print("Num. (test, val) edges returned: (", len(test_edges), ", ", len(val_edges), ")")

            if prevent_disconnect:
                assert nx.number_connected_components(g) == orig_num_cc

            if verbose:
                print('creating false test edges...', end=' ')

            test_edges_false = set()
            while len(test_edges_false) < num_test:
                idx_i = np.random.randint(0, adj.shape[0])
                idx_j = np.random.randint(0, adj.shape[0])
                if idx_i == idx_j:
                    continue

                false_edge = (min(idx_i, idx_j), max(idx_i, idx_j))

                # Make sure false_edge not an actual edge, and not a repeat
                if false_edge in all_edge_tuples:
                    continue
                if false_edge in test_edges_false:
                    continue

                test_edges_false.add(false_edge)

            if verbose:
                print('creating false val edges...', end=' ')

            val_edges_false = set()
            while len(val_edges_false) < num_val:
                idx_i = np.random.randint(0, adj.shape[0])
                idx_j = np.random.randint(0, adj.shape[0])
                if idx_i == idx_j:
                    continue

                false_edge = (min(idx_i, idx_j), max(idx_i, idx_j))

                # Make sure false_edge in not an actual edge, not in test_edges_false, not a repeat
                if false_edge in all_edge_tuples or \
                        false_edge in test_edges_false or \
                        false_edge in val_edges_false:
                    continue

                val_edges_false.add(false_edge)

            if verbose:
                print('creating false train edges...')

            train_edges_false = set()
            while len(train_edges_false) < len(train_edges):
                idx_i = np.random.randint(0, adj.shape[0])
                idx_j = np.random.randint(0, adj.shape[0])
                if idx_i == idx_j:
                    continue

                false_edge = (min(idx_i, idx_j), max(idx_i, idx_j))

                # Make sure false_edge in not an actual edge, not in test_edges_false,
                # not in val_edges_false, not a repeat
                if false_edge in all_edge_tuples or \
                        false_edge in test_edges_false or \
                        false_edge in val_edges_false or \
                        false_edge in train_edges_false:
                    continue

                train_edges_false.add(false_edge)

            if verbose:
                print('final checks for disjointness...', end=' ')

            # assert: false_edges are actually false (not in all_edge_tuples)
            assert test_edges_false.isdisjoint(all_edge_tuples)
            assert val_edges_false.isdisjoint(all_edge_tuples)
            assert train_edges_false.isdisjoint(all_edge_tuples)

            # assert: test, val, train false edges disjoint
            assert test_edges_false.isdisjoint(val_edges_false)
            assert test_edges_false.isdisjoint(train_edges_false)
            assert val_edges_false.isdisjoint(train_edges_false)

            # assert: test, val, train positive edges disjoint
            assert val_edges.isdisjoint(train_edges)
            assert test_edges.isdisjoint(train_edges)
            assert val_edges.isdisjoint(test_edges)

            if verbose:
                print('creating adj_train...', end=' ')

            # Re-build adj matrix using remaining graph
            adj_train = nx.adjacency_matrix(g)

            # Convert edge-lists to numpy arrays
            train_edges = np.array([list(edge_tuple) for edge_tuple in train_edges])
            train_edges_false = np.array([list(edge_tuple) for edge_tuple in train_edges_false])
            val_edges = np.array([list(edge_tuple) for edge_tuple in val_edges])
            val_edges_false = np.array([list(edge_tuple) for edge_tuple in val_edges_false])
            test_edges = np.array([list(edge_tuple) for edge_tuple in test_edges])
            test_edges_false = np.array([list(edge_tuple) for edge_tuple in test_edges_false])

            if verbose:
                print('Done with train-test split!', end=' ')
                print()

            # NOTE: these edge lists only contain single direction of edge!
            dump_pickle((adj_train, train_edges, train_edges_false, val_edges, val_edges_false, test_edges, test_edges_false), splits_filename)
        logging.error(f'train (T/F): {len(train_edges)} valid: {len(val_edges)} ({val_frac*100}%) test: {len(test_edges)} ({test_frac*100}%)')
        return adj_train, train_edges, train_edges_false, val_edges, val_edges_false, test_edges, test_edges_false
Ejemplo n.º 15
0
best_epoch = 0
wait = 0

print('Training...')

for epoch in range(NB_EPOCH):
    t = time.time()

    # modify train_feed_dict with support dropout if needed
    if SUP_DO:
        # do not modify the first support, the self-connections one
        for i in range(1, len(train_support)):
            modified = support_dropout(train_support[i].copy(), SUP_DO, edge_drop=True)
            modified.data[...] = 1 # make it binary to normalize
            modified = normalize_nonsym_adj(modified)
            modified = sparse_to_tuple(modified)
            train_feed_dict.update({placeholders['support'][i]: modified})

    # run one iteration
    outs = sess.run([model.opt_op, model.loss, model.accuracy, model.confmat], feed_dict=train_feed_dict)
    
    train_avg_loss = outs[1]
    train_acc = outs[2]

    val_avg_loss, val_acc, conf = sess.run([model.loss, model.accuracy, model.confmat], feed_dict=val_feed_dict)

    if VERBOSE:
        print("[*] Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(train_avg_loss),
              "train_acc=", "{:.5f}".format(train_acc),
              "val_loss=", "{:.5f}".format(val_avg_loss),
              "val_acc=", "{:.5f}".format(val_acc),
Ejemplo n.º 16
0
def test_compatibility(args):
    args = namedtuple("Args", args.keys())(*args.values())
    load_from = args.load_from
    config_file = load_from + '/results.json'
    log_file = load_from + '/log.json'

    with open(config_file) as f:
        config = json.load(f)
    with open(log_file) as f:
        log = json.load(f)

    # Dataloader
    DATASET = config['dataset']
    if DATASET == 'polyvore':
        # load dataset
        dl = DataLoaderPolyvore()
        orig_train_features, adj_train, train_labels, train_r_indices, train_c_indices = dl.get_phase(
            'train')
        full_train_adj = dl.train_adj
        orig_val_features, adj_val, val_labels, val_r_indices, val_c_indices = dl.get_phase(
            'valid')
        orig_test_features, adj_test, test_labels, test_r_indices, test_c_indices = dl.get_phase(
            'test')
        full_test_adj = dl.test_adj
        dl.setup_test_compatibility(resampled=args.resampled)
    elif DATASET == 'ssense':
        dl = DataLoaderFashionGen()
        orig_train_features, adj_train, train_labels, train_r_indices, train_c_indices = dl.get_phase(
            'train')
        orig_val_features, adj_val, val_labels, val_r_indices, val_c_indices = dl.get_phase(
            'valid')
        orig_test_features, adj_test, test_labels, test_r_indices, test_c_indices = dl.get_phase(
            'test')
        adj_q, q_r_indices, q_c_indices, q_labels, q_ids, q_valid = dl.get_test_questions(
        )
        full_train_adj = dl.train_adj
        full_test_adj = dl.test_adj
        dl.setup_test_compatibility(resampled=args.resampled)
    else:
        raise NotImplementedError(
            'A data loader for dataset {} does not exist'.format(DATASET))

    NUMCLASSES = 2
    BN_AS_TRAIN = False
    ADJ_SELF_CONNECTIONS = True

    def norm_adj(adj_to_norm):
        return normalize_nonsym_adj(adj_to_norm)

    train_features, mean, std = dl.normalize_features(orig_train_features,
                                                      get_moments=True)
    val_features = dl.normalize_features(orig_val_features, mean=mean, std=std)
    test_features = dl.normalize_features(orig_test_features,
                                          mean=mean,
                                          std=std)

    train_support = get_degree_supports(adj_train,
                                        config['degree'],
                                        adj_self_con=ADJ_SELF_CONNECTIONS)
    val_support = get_degree_supports(adj_val,
                                      config['degree'],
                                      adj_self_con=ADJ_SELF_CONNECTIONS)
    test_support = get_degree_supports(adj_test,
                                       config['degree'],
                                       adj_self_con=ADJ_SELF_CONNECTIONS)

    for i in range(1, len(train_support)):
        train_support[i] = norm_adj(train_support[i])
        val_support[i] = norm_adj(val_support[i])
        test_support[i] = norm_adj(test_support[i])

    num_support = len(train_support)
    placeholders = {
        'row_indices':
        tf.compat.v1.placeholder(tf.int32, shape=(None, )),
        'col_indices':
        tf.compat.v1.placeholder(tf.int32, shape=(None, )),
        'dropout':
        tf.compat.v1.placeholder_with_default(0., shape=()),
        'weight_decay':
        tf.compat.v1.placeholder_with_default(0., shape=()),
        'is_train':
        tf.compat.v1.placeholder_with_default(True, shape=()),
        'support': [
            tf.compat.v1.sparse_placeholder(tf.float32, shape=(None, None))
            for sup in range(num_support)
        ],
        'node_features':
        tf.compat.v1.placeholder(tf.float32, shape=(None, None)),
        'labels':
        tf.compat.v1.placeholder(tf.float32, shape=(None, ))
    }

    model = CompatibilityGAE(placeholders,
                             input_dim=train_features.shape[1],
                             num_classes=NUMCLASSES,
                             num_support=num_support,
                             hidden=config['hidden'],
                             learning_rate=config['learning_rate'],
                             logging=True,
                             batch_norm=config['batch_norm'])

    # Construct feed dicts for train, val and test phases
    train_feed_dict = construct_feed_dict(placeholders, train_features,
                                          train_support, train_labels,
                                          train_r_indices, train_c_indices,
                                          config['dropout'])
    val_feed_dict = construct_feed_dict(placeholders,
                                        val_features,
                                        val_support,
                                        val_labels,
                                        val_r_indices,
                                        val_c_indices,
                                        0.,
                                        is_train=BN_AS_TRAIN)
    test_feed_dict = construct_feed_dict(placeholders,
                                         test_features,
                                         test_support,
                                         test_labels,
                                         test_r_indices,
                                         test_c_indices,
                                         0.,
                                         is_train=BN_AS_TRAIN)

    # Add ops to save and restore all the variables.
    saver = tf.compat.v1.train.Saver()

    def eval():
        # use this as a control value, if the model is ok, the value will be the same as in log
        val_avg_loss, val_acc, conf, pred = sess.run(
            [model.loss, model.accuracy, model.confmat,
             model.predict()],
            feed_dict=val_feed_dict)

        print("val_loss=", "{:.5f}".format(val_avg_loss), "val_acc=",
              "{:.5f}".format(val_acc))

    with tf.compat.v1.Session() as sess:
        saver.restore(sess, load_from + '/' + 'best_epoch.ckpt')

        count = 0
        preds = []
        labels = []

        # evaluate the the model for accuracy prediction
        eval()

        prob_act = tf.nn.sigmoid

        K = args.k
        for outfit in dl.comp_outfits:
            before_item = time.time()
            items, score = outfit

            num_new = test_features.shape[0]

            new_adj = sp.csr_matrix((num_new, num_new))  # no connections

            if args.k > 0:
                # add edges to the adj matrix
                available_adj = dl.test_adj.copy()
                available_adj = available_adj.tolil()

                i = 0
                for idx_from in items[:-1]:
                    for idx_to in items[i + 1:]:
                        # remove outfit edges, they won't be expanded
                        available_adj[idx_to, idx_from] = 0
                        available_adj[idx_from, idx_to] = 0
                    i += 1
                available_adj = available_adj.tocsr()
                available_adj.eliminate_zeros()

            if args.subset:  # use only a subset (of size 3) of the outfit
                items = np.random.choice(items, 3)

            new_features = test_features

            # predict edges between the items
            query_r = []
            query_c = []

            i = 0
            item_indexes = items
            for idx_from in item_indexes[:-1]:
                for idx_to in item_indexes[i + 1:]:
                    query_r.append(idx_from)
                    query_c.append(idx_to)
                i += 1

            if args.k > 0:
                G = Graph(available_adj)
                nodes_to_expand = np.unique(items)
                for node in nodes_to_expand:
                    edges = G.run_K_BFS(node, K)
                    for edge in edges:
                        u, v = edge
                        new_adj[u, v] = 1
                        new_adj[v, u] = 1

            query_r = np.array(query_r)
            query_c = np.array(query_c)

            new_adj = new_adj.tocsr()

            new_support = get_degree_supports(
                new_adj,
                config['degree'],
                adj_self_con=ADJ_SELF_CONNECTIONS,
                verbose=False)
            for i in range(1, len(new_support)):
                new_support[i] = norm_adj(new_support[i])
            new_support = [sparse_to_tuple(sup) for sup in new_support]

            new_feed_dict = construct_feed_dict(placeholders,
                                                new_features,
                                                new_support,
                                                train_labels,
                                                query_r,
                                                query_c,
                                                0.,
                                                is_train=BN_AS_TRAIN)

            pred = sess.run(prob_act(model.outputs), feed_dict=new_feed_dict)

            predicted_score = pred.mean()
            print("[{}] Mean scores between outfit: {:.4f}, label: {}".format(
                count, predicted_score, score))
            # TODO: remove this print
            print("Total Elapsed: {:.4f}".format(time.time() - before_item))
            count += 1

            preds.append(predicted_score)
            labels.append(score)

        preds = np.array(preds)
        labels = np.array(labels)

        AUC = compute_auc(preds, labels)

        # use this as a control value, if the model is ok, the value will be the same as in log
        eval()

        print('The AUC compat score is: {}'.format(AUC))

    print('Best val score saved in log: {}'.format(config['best_val_score']))
    print('Last val score saved in log: {}'.format(log['val']['acc'][-1]))

    print("mean positive prediction: {}".format(
        preds[labels.astype(bool)].mean()))
    print("mean negative prediction: {}".format(preds[np.logical_not(
        labels.astype(bool))].mean()))
Ejemplo n.º 17
0
    def mask_test_edges(self, val_frac, test_frac, no_mask):
        adj = self.adj_orig
        assert adj.diagonal().sum() == 0

        adj_triu = sp.triu(adj)
        edges = sparse_to_tuple(adj_triu)[0]
        edges_all = sparse_to_tuple(adj)[0]
        num_test = int(np.floor(edges.shape[0] * test_frac))
        num_val = int(np.floor(edges.shape[0] * val_frac))

        all_edge_idx = list(range(edges.shape[0]))
        np.random.shuffle(all_edge_idx)
        val_edge_idx = all_edge_idx[:num_val]
        test_edge_idx = all_edge_idx[num_val:(num_val + num_test)]
        test_edges = edges[test_edge_idx]
        val_edges = edges[val_edge_idx]
        if no_mask:
            train_edges = edges
        else:
            train_edges = np.delete(edges, np.hstack([test_edge_idx, val_edge_idx]), axis=0)

        def ismember(a, b, tol=5):
            rows_close = np.all(np.round(a - b[:, None], tol) == 0, axis=-1)
            return np.any(rows_close)

        test_edges_false = []
        while len(test_edges_false) < len(test_edges):
            idx_i = np.random.randint(0, adj.shape[0])
            idx_j = np.random.randint(0, adj.shape[0])
            if idx_i == idx_j:
                continue
            if ismember([idx_i, idx_j], edges_all):
                continue
            if test_edges_false:
                if ismember([idx_j, idx_i], np.array(test_edges_false)):
                    continue
                if ismember([idx_i, idx_j], np.array(test_edges_false)):
                    continue
            test_edges_false.append([idx_i, idx_j])

        val_edges_false = []
        while len(val_edges_false) < len(val_edges):
            idx_i = np.random.randint(0, adj.shape[0])
            idx_j = np.random.randint(0, adj.shape[0])
            if idx_i == idx_j:
                continue
            if ismember([idx_i, idx_j], train_edges):
                continue
            if ismember([idx_j, idx_i], train_edges):
                continue
            if ismember([idx_i, idx_j], val_edges):
                continue
            if ismember([idx_j, idx_i], val_edges):
                continue
            if val_edges_false:
                if ismember([idx_j, idx_i], np.array(val_edges_false)):
                    continue
                if ismember([idx_i, idx_j], np.array(val_edges_false)):
                    continue
            val_edges_false.append([idx_i, idx_j])

        # assert ~ismember(test_edges_false, edges_all)
        # assert ~ismember(val_edges_false, edges_all)
        # assert ~ismember(val_edges, test_edges)
        # if not no_mask:
        #     assert ~ismember(val_edges, train_edges)
        #     assert ~ismember(test_edges, train_edges)

        # Re-build adj matrix
        adj_train = sp.csr_matrix((np.ones(train_edges.shape[0]), (train_edges[:, 0], train_edges[:, 1])), shape=adj.shape)
        self.adj_train = adj_train + adj_train.T
        self.adj_label = adj_train + sp.eye(adj_train.shape[0])
        # NOTE: these edge lists only contain single direction of edge!
        self.val_edges = val_edges
        self.val_edges_false = np.asarray(val_edges_false)
        self.test_edges = test_edges
        self.test_edges_false = np.asarray(test_edges_false)
Ejemplo n.º 18
0
def test_amazon(args):
    args = namedtuple("Args", args.keys())(*args.values())

    load_from = args.load_from
    config_file = load_from + '/results.json'
    log_file = load_from + '/log.json'

    with open(config_file) as f:
        config = json.load(f)
    with open(log_file) as f:
        log = json.load(f)

    NUMCLASSES = 2
    BN_AS_TRAIN = False
    ADJ_SELF_CONNECTIONS = True

    # evaluate in the specified version
    print("Trained with {}, evaluating with {}".format(config['amz_data'],
                                                       args.amz_data))
    cat_rel = args.amz_data
    dp = DataLoaderAmazon(cat_rel=cat_rel)
    train_features, adj_train, train_labels, train_r_indices, train_c_indices = dp.get_phase(
        'train')
    _, adj_val, val_labels, val_r_indices, val_c_indices = dp.get_phase(
        'valid')
    _, adj_test, test_labels, test_r_indices, test_c_indices = dp.get_phase(
        'test')
    full_adj = dp.adj

    def norm_adj(adj_to_norm):
        return normalize_nonsym_adj(adj_to_norm)

    train_features, mean, std = dp.normalize_features(train_features,
                                                      get_moments=True)

    train_support = get_degree_supports(adj_train,
                                        config['degree'],
                                        adj_self_con=ADJ_SELF_CONNECTIONS)
    val_support = get_degree_supports(adj_val,
                                      config['degree'],
                                      adj_self_con=ADJ_SELF_CONNECTIONS)
    test_support = get_degree_supports(adj_test,
                                       config['degree'],
                                       adj_self_con=ADJ_SELF_CONNECTIONS)

    for i in range(1, len(train_support)):
        train_support[i] = norm_adj(train_support[i])
        val_support[i] = norm_adj(val_support[i])
        test_support[i] = norm_adj(test_support[i])

    num_support = len(train_support)

    num_support = len(train_support)
    placeholders = {
        'row_indices':
        tf.compat.v1.placeholder(tf.int32, shape=(None, )),
        'col_indices':
        tf.compat.v1.placeholder(tf.int32, shape=(None, )),
        'dropout':
        tf.compat.v1.placeholder_with_default(0., shape=()),
        'weight_decay':
        tf.compat.v1.placeholder_with_default(0., shape=()),
        'is_train':
        tf.compat.v1.placeholder_with_default(True, shape=()),
        'support': [
            tf.compat.v1.sparse_placeholder(tf.float32, shape=(None, None))
            for sup in range(num_support)
        ],
        'node_features':
        tf.compat.v1.placeholder(tf.float32, shape=(None, None)),
        'labels':
        tf.compat.v1.placeholder(tf.float32, shape=(None, ))
    }

    model = CompatibilityGAE(placeholders,
                             input_dim=train_features.shape[1],
                             num_classes=NUMCLASSES,
                             num_support=num_support,
                             hidden=config['hidden'],
                             learning_rate=config['learning_rate'],
                             logging=True,
                             batch_norm=config['batch_norm'])

    train_feed_dict = construct_feed_dict(placeholders, train_features,
                                          train_support, train_labels,
                                          train_r_indices, train_c_indices,
                                          config['dropout'])
    # No dropout for validation and test runs
    val_feed_dict = construct_feed_dict(placeholders,
                                        train_features,
                                        val_support,
                                        val_labels,
                                        val_r_indices,
                                        val_c_indices,
                                        0.,
                                        is_train=BN_AS_TRAIN)
    test_feed_dict = construct_feed_dict(placeholders,
                                         train_features,
                                         test_support,
                                         test_labels,
                                         test_r_indices,
                                         test_c_indices,
                                         0.,
                                         is_train=BN_AS_TRAIN)

    # Add ops to save and restore all the variables.
    saver = tf.compat.v1.train.Saver()

    with tf.compat.v1.Session() as sess:
        saver.restore(sess, load_from + '/' + 'best_epoch.ckpt')

        val_avg_loss, val_acc, conf, pred = sess.run(
            [model.loss, model.accuracy, model.confmat,
             model.predict()],
            feed_dict=val_feed_dict)

        print("val_loss=", "{:.5f}".format(val_avg_loss), "val_acc=",
              "{:.5f}".format(val_acc))

        test_avg_loss, test_acc, conf = sess.run(
            [model.loss, model.accuracy, model.confmat],
            feed_dict=test_feed_dict)

        print("test_loss=", "{:.5f}".format(test_avg_loss), "test_acc=",
              "{:.5f}".format(test_acc))

        # rerun for K=0 (all in parallel)
        k_0_adj = sp.csr_matrix(adj_val.shape)
        k_0_support = get_degree_supports(k_0_adj,
                                          config['degree'],
                                          adj_self_con=ADJ_SELF_CONNECTIONS,
                                          verbose=False)
        for i in range(1, len(k_0_support)):
            k_0_support[i] = norm_adj(k_0_support[i])
        k_0_support = [sparse_to_tuple(sup) for sup in k_0_support]

        k_0_val_feed_dict = construct_feed_dict(placeholders,
                                                train_features,
                                                k_0_support,
                                                val_labels,
                                                val_r_indices,
                                                val_c_indices,
                                                0.,
                                                is_train=BN_AS_TRAIN)
        k_0_test_feed_dict = construct_feed_dict(placeholders,
                                                 train_features,
                                                 k_0_support,
                                                 test_labels,
                                                 test_r_indices,
                                                 test_c_indices,
                                                 0.,
                                                 is_train=BN_AS_TRAIN)

        val_avg_loss, val_acc, conf, pred = sess.run(
            [model.loss, model.accuracy, model.confmat,
             model.predict()],
            feed_dict=k_0_val_feed_dict)
        print("for k=0 val_loss=", "{:.5f}".format(val_avg_loss),
              "for k=0 val_acc=", "{:.5f}".format(val_acc))

        test_avg_loss, test_acc, conf = sess.run(
            [model.loss, model.accuracy, model.confmat],
            feed_dict=k_0_test_feed_dict)
        print("for k=0 test_loss=", "{:.5f}".format(test_avg_loss),
              "for k=0 test_acc=", "{:.5f}".format(test_acc))

        K = args.k

        available_adj = dp.full_valid_adj + dp.full_train_adj
        available_adj = available_adj.tolil()
        for r, c in zip(test_r_indices, test_c_indices):
            available_adj[r, c] = 0
            available_adj[c, r] = 0
        available_adj = available_adj.tocsr()
        available_adj.eliminate_zeros()

        G = Graph(available_adj)
        get_edges_func = G.run_K_BFS

        new_adj = sp.csr_matrix(full_adj.shape)
        new_adj = new_adj.tolil()
        for r, c in zip(test_r_indices, test_c_indices):
            before = time.time()
            if K > 0:  #expand the edges
                nodes_to_expand = [r, c]
                for node in nodes_to_expand:
                    edges = get_edges_func(node, K)
                    for edge in edges:
                        i, j = edge
                        new_adj[i, j] = 1
                        new_adj[j, i] = 1

        new_adj = new_adj.tocsr()

        new_support = get_degree_supports(new_adj,
                                          config['degree'],
                                          adj_self_con=ADJ_SELF_CONNECTIONS,
                                          verbose=False)
        for i in range(1, len(new_support)):
            new_support[i] = norm_adj(new_support[i])
        new_support = [sparse_to_tuple(sup) for sup in new_support]

        new_feed_dict = construct_feed_dict(placeholders,
                                            train_features,
                                            new_support,
                                            test_labels,
                                            test_r_indices,
                                            test_c_indices,
                                            0.,
                                            is_train=BN_AS_TRAIN)

        loss, acc = sess.run([model.loss, model.accuracy],
                             feed_dict=new_feed_dict)

        print("for k={} test_acc=".format(K), "{:.5f}".format(acc))

    print('Best val score saved in log: {}'.format(config['best_val_score']))
    print('Last val score saved in log: {}'.format(log['val']['acc'][-1]))
Ejemplo n.º 19
0
def build_model(adj, features, n_classes, subgraphs):
    perturbation = None
    placeholders = {
        'features':
        tf.sparse_placeholder(tf.float32,
                              shape=tf.constant(features[2], dtype=tf.int64)),
        'labels':
        tf.placeholder(tf.float32, shape=(None, n_classes)),
        'labels_mask':
        tf.placeholder(tf.int32),
        'noise':
        tf.placeholder(tf.float32, shape=()),
        'dropout':
        tf.placeholder_with_default(0., shape=()),
    }

    if FLAGS.model == 'gcn':
        support = [sparse_to_tuple(preprocess_adj(adj))]
        model_func = GCN

    elif FLAGS.model == 'gcnR':
        support = [sparse_to_tuple(adj)]
        model_func = GCN

    elif FLAGS.model == 'gcnT':
        support = [
            sparse_to_tuple(
                preprocess_high_order_adj(adj, FLAGS.order, FLAGS.threshold))
        ]
        model_func = GCN

    elif FLAGS.model == 'fishergcn' or FLAGS.model == 'fishergcnT':

        if FLAGS.model == 'fishergcn':
            A = preprocess_adj(adj)
        else:
            A = preprocess_high_order_adj(adj, FLAGS.order, FLAGS.threshold)

        N = adj.shape[0]
        L = sp.eye(N) - A

        if FLAGS.fisher_freq == 0:
            #nsubgraphs = subgraphs.shape[1]
            #V = block_krylov( A, FLAGS.fisher_rank+nsubgraphs )
            #V = V[:,:FLAGS.fisher_rank]

            V = block_krylov(A, FLAGS.fisher_rank)
            w = (sp.csr_matrix.dot(L, V) * V).sum(0)

        elif FLAGS.fisher_freq == 1:
            # if the graph contains one large component and small isolated components
            # only perturb the largest connected component
            subgraph_sizes = subgraphs.sum(0)
            largest_idx = np.argmax(subgraph_sizes)
            isolated = np.nonzero(1 - subgraphs[:, largest_idx])[0]
            L = L.tolil()
            L[:, isolated] = 0
            L[isolated, :] = 0
            L = L.tocsr()

            V = block_krylov(L, FLAGS.fisher_rank)
            w = (sp.csr_matrix.dot(L, V) * V).sum(0)

        elif FLAGS.fisher_freq == 2:
            V, _ = np.linalg.qr(np.random.randn(N, FLAGS.fisher_rank))
            w = np.ones(FLAGS.fisher_rank)

        else:
            print('unknown frequency:', FLAGS.fisher_freq)
            sys.exit(0)

        perturbation = make_perturbation(V, w, placeholders['noise'],
                                         FLAGS.fisher_adversary)
        support = [sparse_to_tuple(A)]
        model_func = GCN

    elif FLAGS.model == 'chebynet':
        support = chebyshev_polynomials(adj, FLAGS.max_degree)
        model_func = GCN

    elif FLAGS.model == 'mlp':
        support = [sparse_to_tuple(preprocess_adj(adj))]
        model_func = MLP

    else:
        raise ValueError('Invalid argument for model: ' + str(FLAGS.model))

    try:
        _, _values, _shape = support[0]
        print("sparsity: {0:.2f}%".format(100 * (_values > 0).sum() /
                                          (_shape[0] * _shape[1])))
    except:
        pass
    placeholders['support'] = [
        tf.sparse_placeholder(tf.float32) for _ in support
    ]

    model = model_func(placeholders,
                       perturbation=perturbation,
                       subgraphs=subgraphs)
    return model, support, placeholders