def main(_):
    # load data
    meta, train_data, test_data = input_data.load_data(FLAGS.data_dir,
                                                       flatten=True)
    print 'data loaded'
    print 'train images: %s. test images: %s' % (train_data.images.shape[0],
                                                 test_data.images.shape[0])

    LABEL_SIZE = meta['label_size']
    IMAGE_SIZE = meta['width'] * meta['height']
    NUM_PER_IMAGE = meta['num_per_image']
    OUTPUT_SIZE = NUM_PER_IMAGE * LABEL_SIZE

    print 'OUTPUT_SIZE: %s, image_size: %s' % (OUTPUT_SIZE, IMAGE_SIZE)

    # variable in the graph for input data
    x = tf.placeholder(tf.float32, [None, IMAGE_SIZE])
    y_ = tf.placeholder(tf.float32, [None, OUTPUT_SIZE])

    # define the model
    W = tf.Variable(tf.zeros([IMAGE_SIZE, OUTPUT_SIZE]))
    b = tf.Variable(tf.zeros([OUTPUT_SIZE]))
    y = tf.matmul(x, W) + b

    # Define loss and optimizer
    diff = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y)
    cross_entropy = tf.reduce_mean(diff)
    train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

    # forword prop
    predict = tf.argmax(y, axis=1)
    expect = tf.argmax(y_, axis=1)

    # evaluate accuracy
    correct_prediction = tf.equal(predict, expect)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    with tf.Session() as sess:
        tf.global_variables_initializer().run()

        # Train
        for i in range(MAX_STEPS):
            batch_xs, batch_ys = train_data.next_batch(BATCH_SIZE)
            sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})

            if i % 100 == 0:
                # Test trained model
                r = sess.run(accuracy,
                             feed_dict={
                                 x: test_data.images,
                                 y_: test_data.labels
                             })
                print 'step = %s, accuracy = %.2f%%' % (i, r * 100)
        # final check after looping
        r_test = sess.run(accuracy,
                          feed_dict={
                              x: test_data.images,
                              y_: test_data.labels
                          })
        print 'testing accuracy = %.2f%%' % (r_test * 100, )
Esempio n. 2
0
def format_data(data_name):
    # Load data

    adj, features, y_test, tx, ty, test_maks, true_labels = load_data(
        data_name)

    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = adj
    #删除对角线元素
    adj_orig = adj_orig - sp.dia_matrix(
        (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()

    adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(
        adj)
    adj = adj_train
    adj_dense = adj.toarray()

    if FLAGS.features == 0:
        features = sp.identity(features.shape[0])  # featureless

    # Some preprocessing
    adj_norm = preprocess_graph(adj)

    num_nodes = adj.shape[0]
    features_dense = features.tocoo().toarray()

    features = sparse_to_tuple(features.tocoo())
    #num_features是feature的维度
    num_features = features[2][1]
    #features_nonzero就是非零feature的个数
    features_nonzero = features[1].shape[0]

    pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
    norm = adj.shape[0] * adj.shape[0] / float(
        (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

    adj_label = adj_train + sp.eye(adj_train.shape[0])
    adj_label = sparse_to_tuple(adj_label)
    items = [
        adj, num_features, num_nodes, features_nonzero, pos_weight, norm,
        adj_norm, adj_label, features, true_labels, train_edges, val_edges,
        val_edges_false, test_edges, test_edges_false, adj_orig,
        features_dense, adj_dense, features_dense
    ]
    feas = {}

    print('num_features is:', num_features)
    print('num_nodes is:', num_nodes)
    print('features_nonzero is:', features_nonzero)
    print('pos_weight is:', pos_weight)
    print('norm is:', norm)

    for item in items:
        #item_name = [ k for k,v in locals().iteritems() if v == item][0]
        feas[retrieve_name(item)] = item

    return feas
Esempio n. 3
0
    def __init__(self, input_path, output_dir):

        if not exists(output_dir):
            makedirs(output_dir)

        self.output_dir = output_dir

        D = load_data(input_path)._asdict()
        for k in D:
            setattr(self, k, D[k])
Esempio n. 4
0
def format_data(data_name):
    # Load data

    adj, features, true_labels = load_data(data_name)

    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = adj
    adj_orig = adj_orig - sp.dia_matrix(
        (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()

    adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(
        adj)
    adj = adj_train

    if FLAGS.features == 0:
        features = sp.identity(features.shape[0])  # featureless

    # Some preprocessing
    adj_norm = preprocess_graph(adj)

    num_nodes = adj.shape[0]

    features = sparse_to_tuple(features.tocoo())
    num_features = features[2][1]
    features_nonzero = features[1].shape[0]

    pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
    norm = adj.shape[0] * adj.shape[0] / float(
        (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

    adj_label = adj_train + 2 * sp.eye(adj_train.shape[0])
    adj_label = sparse_to_tuple(adj_label)
    feas = {}
    feas['adj'] = adj
    feas['num_features'] = num_features
    feas['num_nodes'] = num_nodes
    feas['features_nonzero'] = features_nonzero
    feas['pos_weight'] = pos_weight
    feas['norm'] = norm
    feas['adj_norm'] = adj_norm
    feas['adj_label'] = adj_label
    feas['features'] = features
    feas['true_labels'] = true_labels
    feas['train_edges'] = train_edges
    feas['val_edges'] = val_edges
    feas['val_edges_false'] = val_edges_false
    feas['test_edges'] = test_edges
    feas['test_edges_false'] = test_edges_false
    feas['adj_orig'] = adj_orig

    return feas
Esempio n. 5
0
    def load_dataset(self, data_filename):
        outs = loader.load_data(data_filename)
        self.train_X = outs[0]
        self.test_X = outs[1]
        self.train_Y = outs[2]
        self.test_Y = outs[3]

        # Layer's sizes.....................................
        self.input_dim = self.train_X.shape[1]
        self.data_size = len(self.train_X)
        self.iterations = 200  #int(self.data_size / self.batch_size)
        print(self.data_size, "/", self.batch_size, "=", self.iterations)

        self.display()
Esempio n. 6
0
    def __init__(self, input_path, output_dir, debug_mode=False):

        if not exists(output_dir):
            makedirs(output_dir)

        self.output_dir = output_dir

        self.pool = Pool()

        self.obj_value_trace = []

        D = load_data(input_path)._asdict()
        for k in D:
            setattr(self, k, D[k])

        self.debug_mode = debug_mode
Esempio n. 7
0
def run_training():
    text_dataset = input_data.load_data("yitian.txt",
                                        max_vocabulary_size=40000)
    valid_window = np.array(range(5, 15))
    sample_p = (19 - valid_window) / np.sum(valid_window)
    valid_ids = np.random.choice(valid_window,
                                 FLAGS.validation_size,
                                 p=sample_p,
                                 replace=False)

    with tf.Graph().as_default():
        batch_inputs_pl, batch_labels_pl, valid_ids_pl = place_holder(
            FLAGS.batch_size, FLAGS.validation_size)
        loss, embeddings = word2vec.loss(batch_inputs_pl, batch_labels_pl)
        train_op = word2vec.train(loss)
        sim_compute = word2vec.compute_sim(valid_ids_pl, embeddings)
        init = tf.initialize_all_variables()

        with tf.Session() as sess:
            sess.run(init)
            start_time = time.time()
            for step in range(FLAGS.max_step):
                filled_dict = fill_feed_dict(text_dataset, batch_inputs_pl,
                                             batch_labels_pl, FLAGS.batch_size,
                                             FLAGS.num_skips,
                                             FLAGS.skip_window)
                _, loss_value = sess.run([train_op, loss], filled_dict)

                if step % 1000 == 0:
                    duration = time.time() - start_time
                    print("Step: {:d}, Training Loss: {:.4f}, {:.1f}us/step".
                          format(step, loss_value, duration * 1000))

                if (step + 1) % 5000 == 0 or (step + 1) == FLAGS.max_step:
                    sim_words_id, _ = sess.run(sim_compute,
                                               {valid_ids_pl: valid_ids})
                    for (i, word_id) in enumerate(valid_ids):
                        word = text_dataset.word_count[word_id][0]
                        sim_words = []
                        for sim_word_id in sim_words_id[i]:
                            sim_words.append(
                                text_dataset.word_count[sim_word_id][0])
                        print(word, end=":")
                        print(" ".join(sim_words))
                start_time = time.time()
Esempio n. 8
0
def heuristic_ga_optimize(input_path, out_path):
    start = time.clock()
    global _last_x, _last_CT, _pool, _delta_trace, _delta_dim, _delta_project_idx
    _tardiness_obj_trace.clear()
    _delta_trace.clear()
    _delta_project_idx.clear()
    _CT_map.clear()
    _last_x = None
    _last_CT = None
    _pool = Pool(5)

    D = load_data(input_path)

    # initialization for GA
    creator.create("FitnessMax", base.Fitness, weights=(1.0,))
    creator.create("Individual", list, fitness=creator.FitnessMax)
    toolbox = base.Toolbox()

    _delta_dim = 0
    for j in range(D.project_n):
        p = D.project_list[j]
        for r in sorted([r_ for (r_, p_) in D.resource_project_demand.keys() if p_ == p]):
            _delta_project_idx[j, r] = _delta_dim
            _delta_dim += 1

    toolbox.register("individual", _random_delta_weight_for_projects, _delta_dim, creator.Individual)
    toolbox.register("population", tools.initRepeat, creator.Individual, toolbox.individual)
    toolbox.register("evaluate", _objective_function_for_delta_weight, D)
    toolbox.register("mate", _mate)
    toolbox.register("mutate", _mutate, mutate_prob=0.25)
    toolbox.register("select", tools.selTournament, tournsize=3)
    # print()

    pop = toolbox.population(n=1)
    hof = tools.HallOfFame(1)

    # print(toolbox.individual())
    # print(pop)
    pop, log = algorithms.eaSimple(pop, toolbox, cxpb=0.5, mutpb=0.2, ngen=1, halloffame=hof, verbose=True)
    # print(min(_tardiness_obj_trace), '\n', max(_tardiness_obj_trace))
    # print(_tardiness_obj_trace)
    # logging.info('min tardiness obj trace %r \n max tardiness obj trace:%r\n' % (
    #     min(_tardiness_obj_trace), max(_tardiness_obj_trace)))
    # logging.info(_tardiness_obj_trace)
    return min(_tardiness_obj_trace), time.clock() - start
Esempio n. 9
0
def main(argv=None):
    (images, labels), (t_images, t_labels) = input_data.load_data(
    )  # input_data.distorted_inputs("../data/cifar/", 128)
    images = np.reshape(images, (50000, 3072))
    t_images = np.reshape(t_images, (10000, 3072))
    tmp = []
    tmp_t = []
    for i in range(0, 50000):
        data = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], np.int)
        data[labels[i]] = 1
        tmp.append(data)
        del data
    del labels
    for i in range(0, 10000):
        data = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], np.int)
        data[t_labels[i]] = 1
        tmp_t.append(data)
        del data
    del t_labels
    train(images, np.array(tmp), t_images, tmp_t)
Esempio n. 10
0
def run_training():
    # for mnist
    # train_data, test_data, validation_data = input_data.read_data_sets("../data/MNIST_data/")
    # for cifar-10
    train_data, test_data, validation_data = input_data.load_data()

    with tf.Graph().as_default():
        image_pl, label_pl, keep_prob_pl = place_holder(FLAGS.batch_size)
        logits = nn_structure.inference(image_pl, conv_1_params,
                                        max_pool_1_params, conv_2_params,
                                        max_pool_2_params,
                                        full_connected_units, keep_prob_pl)
        loss = nn_structure.loss(logits, label_pl)
        train_op = nn_structure.train(loss, FLAGS.learning_rate)
        eval_correct = nn_structure.evaluation(logits, label_pl, k=1)
        init = tf.initialize_all_variables()

        with tf.Session() as sess:
            sess.run(init)
            start_time = time.time()
            for step in range(FLAGS.max_step):
                feed_dict = fill_feed_dict(train_data, 0.5, image_pl, label_pl,
                                           keep_prob_pl)
                _, loss_value = sess.run([train_op, loss], feed_dict)

                if step % 100 == 0:
                    duration = time.time() - start_time
                    print("Step: {:d}, Training Loss: {:.4f}, {:.1f}ms/step".
                          format(step, loss_value, duration * 10))
                    start_time = time.time()

                if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_step:
                    print("Train Eval:")
                    do_eval(sess, eval_correct, train_data, image_pl, label_pl,
                            keep_prob_pl)
                    print("Validation Eval:")
                    do_eval(sess, eval_correct, validation_data, image_pl,
                            label_pl, keep_prob_pl)
                    print("Test Eval:")
                    do_eval(sess, eval_correct, test_data, image_pl, label_pl,
                            keep_prob_pl)
Esempio n. 11
0
def format_data(data_source):

    adj, features, labels = load_data(data_source)

    # Store original adjacency matrix (without diagonal entries) for later
    # adj_orig = adj
    # adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    # adj_orig.eliminate_zeros()
    # adj = adj_orig

    if FLAGS.features == 0:
        features = sp.identity(features.shape[0])  # featureless

    # Some preprocessing
    adj_norm = preprocess_graph(adj)

    num_nodes = adj.shape[0]

    features = sparse_to_tuple(features.tocoo())
    num_features = features[2][1]
    features_nonzero = features[1].shape[0]

    pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
    norm = adj.shape[0] * adj.shape[0] / float(
        (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

    adj_label = adj + sp.eye(adj.shape[0])
    adj_label = sparse_to_tuple(adj_label)
    items = [
        adj, num_features, num_nodes, features_nonzero, adj_norm, adj_label,
        features, labels, pos_weight, norm
    ]
    feas = {}
    for item in items:
        # item_name = [ k for k,v in locals().iteritems() if v == item][0]]
        item_name = retrieve_name(item)
        feas[item_name] = item

    return feas
Esempio n. 12
0
def train(dataset, weightRate):
    adj, features, falseEdges = load_data(dataset)

    #generate training and test data
    adj_train, train_edges, train_edges_false, test_edges, test_edges_false = make_test_edges(
        weightRate, adj, falseEdges)

    print adj_train.shape
    print train_edges.shape, train_edges_false.shape

    #embeddings returned by W-VGAE
    emb = train_gcn(features, adj_train, train_edges, train_edges_false,
                    test_edges, test_edges_false)

    #generate paired training and test data, similar to GCN
    X_train, Y_train = generate_data(emb, train_edges, train_edges_false)
    X_test, Y_test = generate_data(emb, test_edges, test_edges_false)

    #the final softmax classifier
    acc = train_nn(X_train, Y_train, X_test, Y_test)
    print 'accuracy:', acc[0]
    print 'sensitivity:', acc[1]
    print 'specificity:', acc[2]
    print 'precision:', acc[3]
Esempio n. 13
0
# Lists to collect average results
if FLAGS.task == 'link_prediction':
    mean_roc = []
    mean_ap = []
elif FLAGS.task == 'node_clustering':
    mean_mutual_info = []
if FLAGS.kcore:
    mean_time_kcore = []
    mean_time_train = []
    mean_time_expand = []
    mean_core_size = []
mean_time = []

# Load graph dataset
adj_init, features_init = load_data(FLAGS.dataset)
if FLAGS.verbose:
    print(f"Loading data... {FLAGS.dataset} n: {adj_init.shape[0]}, m: {np.sum(adj_init)//2}")

# Load ground-truth labels for node clustering task
if FLAGS.task == 'node_clustering':
    labels = load_label(FLAGS.dataset)

# The entire training+test process is repeated FLAGS.nb_run times
for i in range(FLAGS.nb_run):

    if FLAGS.task == 'link_prediction' :
        if FLAGS.verbose:
            print("Masking test edges...")
        # Edge Masking for Link Prediction: compute Train/Validation/Test set
        adj, val_edges, val_edges_false, test_edges, test_edges_false = \
Esempio n. 14
0
if FLAGS.dataset == 'yale':
    flags.DEFINE_integer('epochs', 500, 'Number of iterations.')
    flags.DEFINE_integer('hidden2', 16, 'Number of units in GCN layer 2.')
    flags.DEFINE_integer('pri_weight', 1, 'weight of privacy')
    flags.DEFINE_integer('uti_attr_weight', 10, 'weight of utility_attr')
    flags.DEFINE_float('link_weight', 1, 'weight of privacy')
elif FLAGS.dataset == 'rochester':
    flags.DEFINE_integer('epochs', 2000, 'Number of iterations.')
    flags.DEFINE_integer('pri_weight', 10, 'weight of privacy')
    flags.DEFINE_integer('uti_attr_weight', 1, 'weight of utility_attr')
    flags.DEFINE_integer('hidden2', 8, 'Number of units in GCN layer 2.')
    flags.DEFINE_float('link_weight', 1, 'weight of privacy')

# Load data
adj, features, adj_train, val_edges, val_edges_false, test_edges, test_edges_false, labels = load_data(
    FLAGS.dataset)

# Store original adjacency matrix (without diagonal entries) for later
adj_orig = adj
adj_orig = adj_orig - sp.dia_matrix(
    (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
adj_orig.eliminate_zeros()

adj = adj_train

# Some preprocessing
adj_norm = preprocess_graph(adj)
features_mat = features.toarray()
attr_labels_list, dim_attr, features_rm_privacy = get_attr_list(
    FLAGS.dataset, labels, features_mat)
Esempio n. 15
0
def heuristic_delta_weight(input_path, output_path=None, converge_count=2, tolerance=1, d1=100, d2=0):
    '''

    :param input_path: the path for the folder of the input files
    :param converge_count: the process will stop when the optimal solution isn't update in converge_count rounds.
    :param tolerance: when abs(last_optimal-current_optimal)<tolerance, the solution are considered as unchanged(converged).
    :param d1: parameter in formula 40
    :param d2: parameter in formula 40
    :return: (objective_value, time_cost) will be returned
    '''
    from random import seed
    seed(13)

    start = time.clock()
    global _last_x, _last_CT, _pool, _delta_trace, _historical_delta_weight_idx_map, _result_output_path, _time_limit_per_model, _gap_trace, _round
    _tardiness_obj_trace.clear()
    _gap_trace.clear()
    _delta_trace.clear()
    _CT_map.clear()
    _historical_delta_weight_idx_map.clear()
    _last_x = None
    _last_CT = None
    _pool = Pool(2)
    if output_path is not None:
        _result_output_path = output_path
    if not exists(_result_output_path):
        makedirs(_result_output_path)
    D = load_data(input_path)
    _round = 0

    # initialization for GA
    _time_limit_per_model = 3600.0 / (D.project_n + 2)
    delta_weight = {}
    for j in range(D.project_n):
        p = D.project_list[j]
        for r in sorted([r_ for (r_, p_) in D.resource_project_demand.keys() if p_ == p]):
            delta_weight[j, r] = 1  # random()

    # delta_weight[0, 'NK0g2'] = 1
    _logger.info(str(delta_weight))
    _normalize(delta_weight)

    for (j, r) in delta_weight.keys():
        _weight_dataset.loc[_weight_dataset.shape[0]] = [_round, j, r, delta_weight[j, r]]

    optimal = 1e10
    current_converge_count = 0

    with open('trace.log', 'a') as f:
        while current_converge_count < converge_count:
            _round += 1
            _logger.info('-' * 50)
            _logger.info('round %d' % _round)
            delta_weight = _objective_function_for_delta_weight(D, delta_weight, d1, d2)
            if _tardiness_obj_trace[-1] < optimal:
                if abs(_tardiness_obj_trace[-1] - optimal) <= tolerance:
                    current_converge_count += 1
                else:
                    current_converge_count = 0
                optimal = min(optimal, _tardiness_obj_trace[-1])
            else:
                current_converge_count += 1

            print("trace:", _tardiness_obj_trace)
            f.write('%r\n' % _tardiness_obj_trace)
            f.write("time cost:%r" % (time.clock() - start))
            # break
            # print("current_converge_count:", current_converge_count)
            # print("delta size:", len(delta_weight))
            # print(delta_weight)

    return min(_tardiness_obj_trace), time.clock() - start, _gap_trace[np.argmin(_tardiness_obj_trace)]
Esempio n. 16
0
import input_data
import tensorflow as tf

if __name__ == '__main__':
    mnist = input_data.load_data()
Esempio n. 17
0
def format_data(data_name):
    # Load data
    #adj, features, y_test, tx, ty, test_maks, true_labels = load_data(data_name)
    print("&&&&&&&&&&&&&&&&&", data_name)
    rownetworks, numView, features, truelabels, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data(
        data_name)
    adjs_orig = []
    for v in range(numView):
        adj_orig = rownetworks[v]
        adj_orig = adj_orig - sp.dia_matrix(
            (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
        #adj_orig.eliminate_zeros()
        adjs_orig.append(adj_orig)
    adjs_label = rownetworks

    adjs_orig = np.array(adjs_orig)
    adjs = adjs_orig
    if FLAGS.features == 0:
        features = sp.identity(features.shape[0])  # featureless

    # Some preprocessing
    adjs_norm = preprocess_graph(adjs)

    num_nodes = adjs[0].shape[0]

    features = features
    num_features = features.shape[1]
    #features_nonzero = features[1].shape[0]
    fea_pos_weights = float(features.shape[0] * features.shape[1] -
                            features.sum()) / features.sum()
    pos_weights = []
    norms = []
    for v in range(numView):
        pos_weight = float(adjs[v].shape[0] * adjs[v].shape[0] -
                           adjs[v].sum()) / adjs[v].sum()
        norm = adjs[v].shape[0] * adjs[v].shape[0] / float(
            (adjs[v].shape[0] * adjs[v].shape[0] - adjs[v].sum()) * 2)
        pos_weights.append(pos_weight)
        norms.append(norm)
    true_labels = truelabels
    feas = {
        'adjs': adjs_norm,
        'adjs_label': adjs_label,
        'num_features': num_features,
        'num_nodes': num_nodes,
        'true_labels': true_labels,
        'pos_weights': pos_weights,
        'norms': np.array(norms),
        'adjs_norm': adjs_norm,
        'features': features,
        'fea_pos_weights': fea_pos_weights,
        'numView': numView
    }
    return feas
Esempio n. 18
0
    total = truth.shape[0] * truth.shape[1]
    seq_err_rate = seqerr[f] / total
    acc = seqerr[t] / total
    return seq_err_rate, acc


if __name__ == "__main__":
    # input data
    nb = 50000
    timesteps = 1
    nb_samples = timesteps * nb
    val = 0.1
    test = 0.2
    data_dim = 102

    data = dataset.load_data(nb_samples)

    x_train, y_train, x_val, y_val, x_test, y_test = split_data(data, nb_samples, val, test)

    # first approach
    trainX = reshape_data(x_train, timesteps)
    trainY = reshape_data(y_train, timesteps)
    valX = reshape_data(x_val, timesteps)
    valY = reshape_data(y_val, timesteps)
    testX = reshape_data(x_test, timesteps)
    testY = reshape_data(y_test, timesteps)

    # model paramters
    results = []
    batch_size = 16
    nb_epochs = 1000
Esempio n. 19
0
def original_model(input_path, output_path):
    if not exists(output_path):
        makedirs(output_path)

    supplier_project_shipping, project_list, project_activity, DD, resource_supplier_capacity, \
    project_n, resource_project_demand, resource_supplier_list, M, c, B, resource_supplier_release_time, \
    review_duration, w = load_data(input_path)

    start_time = time.clock()
    m = Model('construction')
    # m.setParam('OutputFlag', False)
    ##############################################################
    # m.params.presolve = 0
    m.params.MIPGap = 1e-8
    m.params.timelimit = 3600
    # m.params.IntFeasTol = 1e-9
    # Create variables############################################
    #####supplier-project shipping decision x and shipping quality
    x = {}
    q = {}
    for (i, j, k) in supplier_project_shipping:
        # i resource, j supplier, k project
        x[i, j, k] = m.addVar(obj=0, vtype=GRB.BINARY, name="x_%s_%s_%s" % (i, j, k))
        q[i, j, k] = m.addVar(obj=0, vtype=GRB.CONTINUOUS, name="q_%s_%s_%s" % (i, j, k))
    print('add var x,q')
    #####Project complete data,Project Tadeness,construction completion time
    DT = {}
    TD = {}
    CT = {}
    DT[-1] = m.addVar(obj=0, vtype=GRB.CONTINUOUS, name="DT_-1")  # project start time
    for j in range(project_n):
        DT[j] = m.addVar(obj=0, vtype=GRB.CONTINUOUS, name="DT_%d" % j)  # project j complete time
        TD[j] = m.addVar(obj=0, vtype=GRB.CONTINUOUS, name="TD_%d" % j)  # project j complete time
        CT[j] = m.addVar(obj=0, vtype=GRB.CONTINUOUS, name="CT_%d" % j)  # project j complete time
    print('add var DT TD CT')
    #####Activity start time
    ST = []
    for j in range(project_n):
        ST.append({})
        for row in project_activity[project_list[j]].nodes():
            ST[j][row] = m.addVar(obj=0, vtype=GRB.CONTINUOUS, name="ST_%d_%s" % (j, row))
    print('add var ST')
    #####Review sequence
    z = {}
    for i in range(project_n):
        for j in range(project_n):
            if i != j:
                z[i, j] = m.addVar(obj=0, vtype=GRB.BINARY, name="z_%d_%d" % (i, j))

    for j in range(project_n):
        z[-1, j] = m.addVar(obj=0, vtype=GRB.BINARY, name="z_%d_%d" % (-1, j))
    print('add var z')
    #####
    y = {}
    for j in range(project_n):
        for row1 in project_activity[project_list[j]].nodes():
            for row2 in project_activity[project_list[j]].nodes():
                # print project_activity[project_list[j]].node[row1]
                if row1 != row2 and len(
                        list(set(project_activity[project_list[j]].node[row1]['rk_resources']).intersection(
                            project_activity[project_list[j]].node[row2]['rk_resources']))) > 0:
                    y[j, row1, row2] = m.addVar(obj=0, vtype=GRB.BINARY, name="y_%d_%s_%s" % (j, row1, row2))
    print('add var y')
    m.update()
    # create constrains#########################################
    #####Constrain 2: project complete data>due data
    for j in range(project_n):
        m.addConstr(DT[j] - TD[j], GRB.LESS_EQUAL, DD[j], name="constraint_2_project_%d" % j)
    print('add constr 2')
    ##### constrain 3: supplier capacity limit
    for (row1, row2) in resource_supplier_capacity:
        m.addConstr(quicksum(q[row1, row2, project_list[j]] for j in range(project_n)), GRB.LESS_EQUAL,
                    resource_supplier_capacity[row1, row2], name="constraint_3_resource_%s_supplier_%s" % (row1, row2))
    print('add constr 3')
    #####constrain 4,6: project demand require; each project receive from one supplier for each resource
    for (row1, row2) in resource_project_demand:
        m.addConstr(quicksum(x[row1, i, row2] for i in resource_supplier_list[row1]), GRB.EQUAL, 1,
                    name="constraint_6_resource_%s_project_%s" % (row1, row2))
        m.addConstr(quicksum(q[row1, i, row2] for i in resource_supplier_list[row1]), GRB.GREATER_EQUAL,
                    resource_project_demand[row1, row2], name="constraint_4_resource_%s_project_%s" % (row1, row2))
    print('add constr 4,6')
    #####constrain 5: shipping constrain
    for (i, j, k) in q:
        # i resource, j supplier, k project
        m.addConstr(q[i, j, k], GRB.LESS_EQUAL, M * x[i, j, k],
                    name="constraint_5_resource_%s_supplier_%s_project_%s" % (i, j, k))
    print('add constr 5')
    #####constrain 7:budget limit
    expr = LinExpr()
    for (i, j, k) in q:
        expr.addTerms(c[i, j, k], q[i, j, k])
    m.addConstr(expr, GRB.LESS_EQUAL, B, name="constraint_7")
    print('add constr 7')
    #####constrain 8: activity starting constrain
    for j in range(project_n):
        for row in project_activity[project_list[j]].nodes():
            for row1 in project_activity[project_list[j]].node[row]['resources']:
                m.addConstr(quicksum(x[row1, i, project_list[j]] * (
                    resource_supplier_release_time[row1, i] + supplier_project_shipping[row1, i, project_list[j]]) for i
                                     in
                                     resource_supplier_list[row1]), GRB.LESS_EQUAL, ST[j][row],
                            name="constraint_8_project_%d_activity_%s_resource_%s" % (j, row, row1))
    print('add constr 8')
    #####constrain 9 activity sequence constrain
    for j in range(project_n):
        for row1, row2 in project_activity[project_list[j]].edges():
            m.addConstr(ST[j][row1] + project_activity[project_list[j]].node[row1]['duration'], GRB.LESS_EQUAL,
                        ST[j][row2],
                        name="constraint_9_project_%d_activity_%s_activity_%s" % (j, row1, row2))
    print('add constr 9')
    #####constrain 10,11
    for j in range(project_n):
        for row1 in project_activity[project_list[j]].nodes():
            for row2 in project_activity[project_list[j]].nodes():
                if row1 != row2 and len(
                        list(set(project_activity[project_list[j]].node[row1]['rk_resources']).intersection(
                            project_activity[project_list[j]].node[row2]['rk_resources']))) > 0:
                    m.addConstr(
                        ST[j][row1] + project_activity[project_list[j]].node[row1]['duration'] - M * (
                            1 - y[j, row1, row2]),
                        GRB.LESS_EQUAL, ST[j][row2],
                        name="constraint_10_project_%d_activity_%s_activity_%s" % (j, row1, row2))
                    m.addConstr(
                        ST[j][row2] + project_activity[project_list[j]].node[row2]['duration'] - M * (y[j, row1, row2]),
                        GRB.LESS_EQUAL, ST[j][row1],
                        name="constraint_11_project_%d_activity_%s_activity_%s" % (j, row1, row2))
                    # m.addConstr(y[j,row1,row2]+y[j,row2,row1],GRB.LESS_EQUAL,1)
    print('add constr 10 11')
    #####constrain 12
    for j in range(project_n):
        for row in project_activity[project_list[j]].nodes():
            m.addConstr(CT[j], GRB.GREATER_EQUAL, ST[j][row] + project_activity[project_list[j]].node[row]['duration'],
                        name="constraint_12_project_%d_activity_%s" % (j, row))
    print('add constr 12')
    #####constrain 13
    for j in range(project_n):
        m.addConstr(DT[j], GRB.GREATER_EQUAL, CT[j] + review_duration[j], name="constraint_13_project_%d" % j)
    #####constrain 14
    for i in range(-1, project_n):
        for j in range(project_n):
            if i != j:
                m.addConstr(DT[j], GRB.GREATER_EQUAL, DT[i] - M * (1 - z[i, j]) + review_duration[j],
                            name="constraint_14_project_%d_project_%d" % (i, j))
    print('add constr 14')
    #####constrain 15
    for j in range(project_n):
        m.addConstr(quicksum(z[i, j] for i in range(-1, project_n) if i != j), GRB.EQUAL, 1,
                    name="constraint_15_project_%d" % j)
    print('add constr 15')
    #####constrain 16
    m.addConstr(quicksum(z[-1, j] for j in range(project_n)), GRB.EQUAL, 1, name="constraint_16")
    print('add constr 16')
    #####constrain 17
    for i in range(project_n):
        m.addConstr(quicksum(z[i, j] for j in range(project_n) if j != i), GRB.LESS_EQUAL, 1,
                    name="constraint_17_project_%d" % i)
    print('add constr 17')
    m.update()

    # for i in range(project_n):
    #     for j in range(project_n):
    #         if i!=j:
    #             m.addConstr(z[i,j]+z[j,i],GRB.LESS_EQUAL,1)

    # Set optimization objective - minimize sum of
    expr = LinExpr()
    for j in range(project_n):
        expr.addTerms(w[j], TD[j])
    print('add obj')
    m.setObjective(expr, GRB.MINIMIZE)
    m.update()
    # Solve
    m.optimize()
    print('project_n=%d' % project_n)
    # for j in range(project_n):
    #     print(len(project_activity[project_list[j]].edges()))

    time_cost = time.clock() - start_time
    print('time cost=', time_cost)
    # Print solution
    m.write(join(output_path, 'original.lp'))
    m.write(join(output_path, 'original.sol'))

    print('objective value=', m.objVal)

    return m.objVal, time_cost
Esempio n. 20
0
def pred_link(dataset, epochs):
    #load samples

    adj, features, adj_train, val_edges, val_edges_false, test_edges, test_edges_false, labels = load_data(
        dataset)
    adj_tuple = sparse_to_tuple(adj)
    adj_train_tuple = sparse_to_tuple(adj_train)
    train_edges_false = np.load('./data/' + dataset + '_train_edges_false.npy')
    train_all_edges = np.concatenate((adj_train_tuple[0], train_edges_false),
                                     axis=0)
    labels = np.zeros(train_all_edges.shape)
    labels[:int(train_all_edges.shape[0] / 2), 0] = 1
    labels[int(train_all_edges.shape[0] / 2):, 1] = 1
    permutation = np.random.permutation(train_all_edges.shape[0])
    train_all_edges = train_all_edges[permutation, :]
    labels = labels[permutation, :]

    #load_embeddings
    emb = np.load('./data/' + dataset + '_emb.npy')

    tf.compat.v1.disable_eager_execution()
    x1 = tf.placeholder('float', [None, 64])
    x2 = tf.placeholder('float', [None, 64])
    y = tf.placeholder('float', [None, 2])

    x11 = tf.nn.relu(tf.layers.dense(inputs=x1, units=32))
    x21 = tf.nn.relu(tf.layers.dense(inputs=x2, units=32))
    x31 = tf.concat([x11, x21], 1)
    x41 = tf.nn.relu(tf.layers.dense(inputs=x31, units=16))
    x4 = tf.nn.relu(tf.layers.dense(inputs=x41, units=8))
    preds = tf.layers.dense(inputs=x4, units=2)
    cross_entropy = tf.reduce_mean(
        tf.losses.sigmoid_cross_entropy(logits=preds, multi_class_labels=y))

    sess = tf.Session()

    train_op = tf.train.AdamOptimizer(
        learning_rate=0.01).minimize(cross_entropy)
    init = tf.global_variables_initializer()
    sess.run(init)
    flag = 0

    for epoch in range(epochs):
        if flag * 100 + 100 > train_all_edges.shape[0]:
            flag = 0
        a = flag * 100
        b = a + 100
        flag = flag + 1
        batch_edges = train_all_edges[a:b, :]
        batch_y = labels[a:b]
        batch_x1 = emb[batch_edges[:, 0], :]
        batch_x2 = emb[batch_edges[:, 1], :]
        _, loss, preds_ = sess.run([train_op, cross_entropy, preds],
                                   feed_dict={
                                       x1: batch_x1,
                                       x2: batch_x2,
                                       y: batch_y
                                   })

#         if epoch%1000 == 0:
#             print(epoch)

    test_all_edges = np.concatenate((test_edges, test_edges_false), axis=0)
    test_labels = np.zeros(test_all_edges.shape)
    test_labels[:int(test_all_edges.shape[0] / 2), 0] = 1
    test_labels[int(test_all_edges.shape[0] / 2):, 1] = 1
    test_preds = np.empty((0, 2))
    flag = 0
    for epoch in range(int(test_all_edges.shape[0] / 100)):
        if flag * 100 + 100 > test_all_edges.shape[0]:
            flag = 0
        a = flag * 100
        b = a + 100
        flag = flag + 1
        batch_edges = test_all_edges[a:b, :]
        batch_y = test_labels[:100, :]
        batch_x1 = emb[batch_edges[:, 0], :]
        batch_x2 = emb[batch_edges[:, 1], :]
        batch_preds = sess.run(preds,
                               feed_dict={
                                   x1: batch_x1,
                                   x2: batch_x2,
                                   y: batch_y
                               })
        test_preds = np.vstack((test_preds, batch_preds))
    test_preds.shape
    test_labels = test_labels[:int((test_all_edges.shape[0]) / 100) * 100, :]
    #p = np.where(test_preds>0)[1]
    p = []
    for label in test_preds:
        if label[0] >= label[1]:
            p.append(0)
        else:
            p.append(1)

    l = test_labels[:, 1]
    from sklearn.metrics import f1_score, accuracy_score
    acc = accuracy_score(l, p)
    f1 = f1_score(l, p, average='macro')
    print(acc)
    print(f1)

    f = open('./data/' + dataset + '_results.txt', 'r+')
    content = f.read()
    f.seek(0, 0)
    f.write(str(acc) + '\n')
    f.write(str(f1) + '\n' + content)
    f.close()
    return acc, f1
Esempio n. 21
0
def load_company_data(company_str):
    company = input_data.load_data(company=company_str)
    return company
Esempio n. 22
0

# flags
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_string('data_name', 'SBM', 'name of data set.')
flags.DEFINE_float('learning_rate', .5 * 0.001, 'Initial learning rate.')
flags.DEFINE_integer('hidden1', 32, 'Number of units in hidden layer 1.')
flags.DEFINE_integer('hidden2', 16, 'Number of units in hidden layer 2.')
flags.DEFINE_float('dropout', 0., 'Dropout rate (1 - keep probability).')
flags.DEFINE_integer('features', 0, 'Whether to use features (1) or not (0).')
flags.DEFINE_integer('seed', 50, 'seed for fixing the results.')
flags.DEFINE_integer('iterations', 1000, 'number of iterations.')

# preprocess
adjs, features = load_data(FLAGS.data_name, 0.5)
adj = adjs[-1]
feature = features[-1]

adj_orig = sparse_to_tuple(adj)
adj_norm = preprocess_graph(adj)
feature = sparse_to_tuple(feature)

features_nonzero = feature[1].shape[0]
num_node = np.array(adjs[0]).shape[1]
feature_dim = np.array(features[0]).shape[1]

pos_weight = float(num_node * num_node - adj[1].sum()) / adj[1].sum()
norm = num_node * num_node / float((num_node * num_node - adj[1].sum()) * 2)

print('num_node: ', num_node, ' feature_dim: ', feature_dim, ' pos_weight: ',
Esempio n. 23
0
def predict_model(test_x, test_y, parameters):
    m = test_x.shape[1]
    num = test_y.shape[0]
    pre, _ = dnn.L_model_forward(test_x, parameters)
    pre[pre >= 0.5] = 1
    pre[pre < 0.5] = 0
    pre = (pre == test_y).astype(int)
    pre = np.sum(pre, axis=0, keepdims=True)
    pre[pre < num] = 0
    pre[pre == num] = 1
    print(pre)
    return (1 / m) * np.sum(pre)


if __name__ == '__main__':
    train_x, train_y, test_x, test_y = input_data.load_data()
    train_x_flatten = train_x.reshape(train_x.shape[0],
                                      -1).T  # preprocessing of data
    test_x_flatten = test_x.reshape(test_x.shape[0], -1).T
    train_y = (train_y.T).astype(int)
    test_y = test_y.T.astype(int)

    train_x_flatten = train_x_flatten / 255  # standardize
    test_x_flatten = test_x_flatten / 255

    parameters = L_layer_model(train_x_flatten, train_y, (784, 100, 10))

    train_accuracy = predict_model(train_x_flatten, train_y, parameters)
    print(train_accuracy, '\n')
    test_accuracy = predict_model(test_x_flatten, test_y, parameters)
    print(test_accuracy, '\n')
Esempio n. 24
0
# make dirs
if FLAGS.output is not None:
    os.makedirs(FLAGS.output, exist_ok=True)
output_dir = os.path.join(FLAGS.output, now)

model_path = os.path.join(output_dir, 'checkpoint')
prediction_path = os.path.join(output_dir, 'prediction')
log_path = os.path.join(output_dir, 'log')

create_dir_if_not_exists(model_path)
create_dir_if_not_exists(prediction_path)
create_dir_if_not_exists(log_path)


adj, adata = load_data()

adj_orig = adj
adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
adj_orig.eliminate_zeros()


adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(adj)
features, features_orig, size_factors, val_features, val_features_idx, test_features, test_features_idx = mask_test_express(adata)

adj = adj_train
adj_norm = preprocess_graph(adj)

# Define placeholders
placeholders = {
    'features': tf.placeholder(tf.float32),
Esempio n. 25
0
import numpy as np
import scipy.sparse as sp

#import tensorflow as tf
from input_data import load_data
from preprocessing import (construct_feed_dict, mask_test_edges,
                           preprocess_graph, sparse_to_tuple)

adj, features = load_data('cora')
adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(
    adj)

print(adj.nnz)
print(adj_train.nnz)
#print(features.shape)
'''
a = tf.constant([[1,2,2],[1,2,3]],tf.float32)
ses = tf.Session()
x = tf.transpose(a)
y = tf.matmul(a, x)
ys = tf.nn.sigmoid(y)
print(ses.run(a))
print(ses.run(x))
print(ses.run(y))
print(ses.run(ys))


def sparse_to_tuple(sparse_mx):
    if not sp.isspmatrix_coo(sparse_mx):
        sparse_mx = sparse_mx.tocoo()
    coords = np.vstack((sparse_mx.row, sparse_mx.col)).transpose()
Esempio n. 26
0
    def runner(self):
        model_str = FLAGS.model
        placeholders = [{
            'features':
            tf.sparse_placeholder(tf.float32),
            'adj':
            tf.sparse_placeholder(tf.float32),
            'adj_orig':
            tf.sparse_placeholder(tf.float32),
            'dropout':
            tf.placeholder_with_default(0., shape=()),
            'num_features':
            tf.placeholder(tf.float32),
            'features_nonzero':
            tf.placeholder(tf.float32),
            'pos_weight':
            tf.placeholder(tf.float32),
            'norm':
            tf.placeholder(tf.float32),
            'reward':
            tf.placeholder(tf.float32),
            'D_W1':
            tf.placeholder_with_default(
                tf.zeros([FLAGS.g_hidden2, FLAGS.d_hidden1]),
                shape=[FLAGS.g_hidden2, FLAGS.d_hidden1]),
            'D_W2':
            tf.placeholder_with_default(tf.zeros([FLAGS.d_hidden1, 1]),
                                        shape=[FLAGS.d_hidden1, 1]),
            'D_b1':
            tf.placeholder_with_default(tf.zeros([FLAGS.d_hidden1]),
                                        shape=[FLAGS.d_hidden1]),
            'D_b2':
            tf.placeholder_with_default(tf.zeros([1]), shape=[1]),
        }, {
            'features': tf.sparse_placeholder(tf.float32),
            'adj': tf.sparse_placeholder(tf.float32),
            'adj_orig': tf.sparse_placeholder(tf.float32),
            'dropout': tf.placeholder_with_default(0., shape=()),
            'num_features': tf.sparse_placeholder(tf.float32),
            'features_nonzero': tf.placeholder(tf.float32),
            'pos_weight': tf.placeholder(tf.float32),
            'norm': tf.placeholder(tf.float32),
            'reward': tf.placeholder(tf.float32)
        }]
        sess = tf.Session()

        real_X = tf.placeholder(tf.float32, shape=[None, FLAGS.g_hidden2])
        fake_X = tf.placeholder(tf.float32, shape=[None, FLAGS.g_hidden2])

        self.D_W1 = tf.Variable(xavier_init([FLAGS.g_hidden2,
                                             FLAGS.d_hidden1]))
        self.D_b1 = tf.Variable(xavier_init([FLAGS.d_hidden1]))
        self.D_W2 = tf.Variable(xavier_init([FLAGS.d_hidden1, 1]))
        self.D_b2 = tf.Variable(xavier_init([1]))
        d_vars = [self.D_W1, self.D_b1, self.D_W2, self.D_b2]

        print('train for the network embedding...')
        # Load data
        dataset_str1 = 'Douban_offline'  # 1118 nodes
        dataset_str2 = 'Douban_online'  # 3906 nodes
        adj1, features1, fea_num1 = load_data(dataset_str1)
        adj2, features2, fea_num2 = load_data(dataset_str2)
        num_features = [features1.shape[1], features2.shape[1]]

        model = None

        if model_str == 'gcn_ae':
            model = GCNModelAE(placeholders, num_features, sess)
        elif model_str == 'gcn_vae':
            model = GCNModelVAE(placeholders, num_features, num_nodes,
                                features_nonzero)

        # Optimizer

        with tf.name_scope('optimizer'):
            opt = OptimizerAE(
                preds=[model.reconstructions1, model.reconstructions2],
                labels=[
                    tf.reshape(
                        tf.sparse_tensor_to_dense(placeholders[0]['adj_orig'],
                                                  validate_indices=False),
                        [-1]),
                    tf.reshape(
                        tf.sparse_tensor_to_dense(placeholders[1]['adj_orig'],
                                                  validate_indices=False),
                        [-1])
                ],
                preds_attribute=[
                    model.attribute_reconstructions1,
                    model.attribute_reconstructions1
                ],
                labels_attribute=[
                    tf.sparse_tensor_to_dense(placeholders[0]['features']),
                    tf.sparse_tensor_to_dense(placeholders[1]['features'])
                ],
                pos_weight=[
                    placeholders[0]['pos_weight'],
                    placeholders[1]['pos_weight']
                ],
                norm=[placeholders[0]['norm'], placeholders[1]['norm']],
                fake_logits=model.fake_logits,
                alpha=FLAGS.AX_alpha)

        real_X = tf.placeholder(tf.float32, shape=[None, FLAGS.g_hidden2])
        fake_X = tf.placeholder(tf.float32, shape=[None, FLAGS.g_hidden2])

        real_logits, fake_logits = self.discriminator(real_X, fake_X)
        real_prob = tf.reduce_mean(real_logits)
        fake_prob = tf.reduce_mean(fake_logits)
        D_loss = -real_prob + fake_prob
        dis_optimizer = tf.train.AdamOptimizer(
            learning_rate=FLAGS.learning_rate_dis)  # Adam Optimizer
        opt_dis = dis_optimizer.minimize(D_loss, var_list=d_vars)

        sess.run(tf.global_variables_initializer())
        final_emb1 = []
        final_emb2 = []
        emb1_id = []
        emb2_id = []
        local_A_1 = adj1
        local_X_1 = features1
        local_A_2 = adj2
        local_X_2 = features2

        adj_norm_1 = preprocess_graph(local_A_1)
        local_X_1 = sparse_to_tuple(local_X_1.tocoo())
        pos_weight_1 = float(local_A_1.shape[0] * local_A_1.shape[0] -
                             local_A_1.sum()) / local_A_1.sum()
        adj_label_1 = local_A_1 + sp.eye(local_A_1.shape[0])
        adj_label_1 = sparse_to_tuple(adj_label_1)
        norm_1 = local_A_1.shape[0] * local_A_1.shape[0] / float(
            (local_A_1.shape[0] * local_A_1.shape[0] - local_A_1.sum()) * 2)

        adj_norm_2 = preprocess_graph(local_A_2)
        local_X_2 = sparse_to_tuple(local_X_2.tocoo())
        pos_weight_2 = float(local_A_2.shape[0] * local_A_2.shape[0] -
                             local_A_2.sum()) / local_A_2.sum()
        adj_label_2 = local_A_2 + sp.eye(local_A_2.shape[0])
        adj_label_2 = sparse_to_tuple(adj_label_2)
        norm_2 = local_A_2.shape[0] * local_A_2.shape[0] / float(
            (local_A_2.shape[0] * local_A_2.shape[0] - local_A_2.sum()) * 2)

        self.tmp_count = {}

        for epoch in range(FLAGS.epoch):
            for circle_epoch in range(FLAGS.circle_epoch):
                for G_epoch in range(FLAGS.g_epoch):
                    # ------------------------------------------------------------------------------------------
                    feed_dict = construct_feed_dict(
                        [adj_norm_2, adj_norm_1], [adj_label_2, adj_label_1],
                        [local_X_2, local_X_1], [pos_weight_2, pos_weight_1],
                        [norm_2, norm_1], placeholders)
                    feed_dict.update(
                        {placeholders[0]['D_W1']: sess.run(self.D_W1)})
                    feed_dict.update(
                        {placeholders[0]['D_W2']: sess.run(self.D_W2)})
                    feed_dict.update(
                        {placeholders[0]['D_b1']: sess.run(self.D_b1)})
                    feed_dict.update(
                        {placeholders[0]['D_b2']: sess.run(self.D_b2)})

                    _, embeddings1_, embeddings2_, gcn_cost, fake_prob_, attr_cost = sess.run(
                        [
                            opt.opt_op, model.embeddings1, model.embeddings2_,
                            opt.cost, model.fake_prob, opt.attribute_cost
                        ],
                        feed_dict=feed_dict)

                for D_epoch in range(FLAGS.d_epoch):
                    feed_dict.update(
                        {placeholders[0]['dropout']: FLAGS.dropout})
                    emb1, emb2 = sess.run(
                        [model.embeddings1, model.embeddings2_],
                        feed_dict=feed_dict)
                    _, real_prob_, fake_prob_ = sess.run(
                        [opt_dis, real_prob, fake_prob],
                        feed_dict={
                            real_X: emb1,
                            fake_X: emb2
                        })

            if epoch % 1 == 0:

                emb1, emb2 = sess.run([model.embeddings1, model.embeddings2_],
                                      feed_dict=feed_dict)
                final_emb1 = np.array(emb1)
                final_emb2 = np.array(emb2)

                similar_matrix = cosine_similarity(final_emb1, final_emb2)

                self.similar_matrix = similar_matrix

                pair = {}
                gnd = np.loadtxt("data/douban_truth.emb")
                count = {}
                topk = [1, 5, 10, 20, 30, 50]
                for i in range(len(topk)):
                    pair[topk[i]] = []
                    count[topk[i]] = 0
                    self.tmp_count[topk[i]] = 0
                for top in topk:
                    for index in range(similar_matrix.shape[0]):
                        top_index = heapq.nlargest(
                            int(top), range(len(similar_matrix[index])),
                            similar_matrix[index].take)
                        top_index = list(map(lambda x: x + 1, top_index))
                        pair[top].append([index + 1, top_index])
                    for ele_1 in gnd:
                        for ele_2 in pair[top]:
                            if ele_1[0] == ele_2[0]:
                                if ele_1[1] in ele_2[1]:
                                    count[top] += 1

                print(
                    f'-----------------------epoch {epoch}------------------------'
                )
                for top in topk:
                    print("top", '%02d' % (top), "count=", '%d' % (count[top]),
                          "precision=", "{:.5f}".format(count[top] / len(gnd)))
                print(
                    f'-----------------------epoch {epoch}------------------------'
                )
    index = epoch % number_of_slices
    return edges[index * args.subsample_number:(index + 1) *
                 args.subsample_number]


for exp in range(10):
    args.model = 'NLGF'

    print('model= ' + str(args.model))
    print('dataset=' + str(args.dataset))
    print('learning rate= ' + str(args.learning_rate))
    print('epoch= ' + str(args.num_epoch))
    print('subsample_number=' + str(args.subsample_number))
    print('hidden1_dim=' + str(args.hidden1_dim))

    adj, features = load_data(args.dataset)

    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = adj
    adj_orig = adj_orig - sp.dia_matrix(
        (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()

    adj_train, train_edges, train_false_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(
        adj)

    adj = adj_train

    # Some preprocessing
    adj_norm = preprocess_graph(adj)
Esempio n. 28
0
import time
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

from constants import batch_size, epochs, dropout, variables_device,\
      sequence_length, learning_rate, display_steps,\
      prediction_length, processing_device
from funcs import defineVariables, preActivation, activation

company_str = input("Enter company name for training:  ")

while not os.path.exists("../csv-data/gainers/" + company_str + ".NS.csv"):
    print "Company not found"
    company_str = input("Enter company name for training:  ")

company = input_data.load_data(company=company_str)

# placeholders
seq_input = tf.placeholder(tf.float32,
                           shape=(None, sequence_length, 4),
                           name="input_to_lstm")

seq_output = tf.placeholder(tf.float32,
                            shape=(None, 4 * prediction_length),
                            name="output_of_model")

with tf.device(variables_device):
    # weights
    fc_weights = {
        'wfc1': defineVariables([120, 80], "wfc1"),
        'wfc2': defineVariables([80, 64], "wfc2"),
Esempio n. 29
0
print ('WeightedCE: ' + str(FLAGS.weighted_ce))
print ('ReconstructX: ' + str(FLAGS.reconstruct_x))

model_str = FLAGS.model
dataset_str = FLAGS.dataset

print (model_str)
if (model_str == 'dglfrm' or model_str == 'dglfrm_b'):
    if (len(FLAGS.hidden.split('_')) < 2):
        sys.exit("The truncation parameter missing. Specify '--hidden <layer_1>_<truncation_parameter>'")
save_dir = './data/' + dataset_str +'/split_'+ str(FLAGS.split_idx) + '/' + model_str + "/" + FLAGS.hidden + "/"
if not os.path.exists(save_dir):
        os.makedirs(save_dir)

# Load data. Raw adj is NxN Matrix and Features is NxF Matrix. Using sparse matrices here (See scipy docs). 
adj, features, feature_presence = load_data(dataset_str)

# Store original adjacency matrix (without diagonal entries) for later
adj_orig = adj
adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
adj_orig.eliminate_zeros()

print ("Adj Original Matrix: " + str(adj_orig.shape))
print ("Features Shape: " + str(features.shape))

features_shape = features.shape[0]
if FLAGS.features == 0:
        features = sp.identity(features_shape)  # featureless

pos_weight_feats = float(features.shape[0] * features.shape[1] - features.sum()) / features.sum() # (N) / P
norm_feats = features.shape[0] * features.shape[1] / float((features.shape[0] * features.shape[1] - features.sum()) * 2) # (N+P) / (N)
Esempio n. 30
0
def main(data_dir):
    # load data
    meta, train_data, test_data = input_data.load_data(data_dir, flatten=True)
    print 'data loaded. train images: %s. test images: %s' % (
        train_data.images.shape[0], test_data.images.shape[0])

    LABEL_SIZE = meta['label_size']
    IMAGE_WIDTH = meta['width']
    IMAGE_HEIGHT = meta['height']
    IMAGE_SIZE = IMAGE_WIDTH * IMAGE_HEIGHT
    print 'label_size: %s, image_size: %s' % (LABEL_SIZE, IMAGE_SIZE)

    # variable in the graph for input data
    with tf.name_scope('input'):
        x = tf.placeholder(tf.float32, [None, IMAGE_SIZE])
        y_ = tf.placeholder(tf.float32, [None, LABEL_SIZE])
        variable_summaries(x)
        variable_summaries(y_)

        # must be 4-D with shape `[batch_size, height, width, channels]`
        images_shaped_input = tf.reshape(x, [-1, IMAGE_HEIGHT, IMAGE_WIDTH, 1])
        tf.summary.image('input',
                         images_shaped_input,
                         max_outputs=LABEL_SIZE * 2)

    # define the model
    # Adding a name scope ensures logical grouping of the layers in the graph.
    with tf.name_scope('linear_model'):
        with tf.name_scope('W'):
            W = tf.Variable(tf.zeros([IMAGE_SIZE, LABEL_SIZE]))
            variable_summaries(W)
        with tf.name_scope('b'):
            b = tf.Variable(tf.zeros([LABEL_SIZE]))
            variable_summaries(b)
        with tf.name_scope('y'):
            y = tf.matmul(x, W) + b
            tf.summary.histogram('y', y)

    # Define loss and optimizer
    # Returns:
    # A 1-D `Tensor` of length `batch_size`
    # of the same type as `logits` with the softmax cross entropy loss.
    with tf.name_scope('loss'):
        diff = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y)
        cross_entropy = tf.reduce_mean(diff)
        train_step = tf.train.GradientDescentOptimizer(0.5).minimize(
            cross_entropy)
        variable_summaries(diff)

    # forword prop
    predict = tf.argmax(y, axis=1)
    expect = tf.argmax(y_, axis=1)

    # evaluate accuracy
    with tf.name_scope('evaluate_accuracy'):
        correct_prediction = tf.equal(predict, expect)
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        variable_summaries(accuracy)

    with tf.Session() as sess:

        merged = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter(LOG_DIR + '/train', sess.graph)

        tf.global_variables_initializer().run()

        # Train
        for i in range(MAX_STEPS):
            batch_xs, batch_ys = train_data.next_batch(BATCH_SIZE)
            train_summary, _ = sess.run([merged, train_step],
                                        feed_dict={
                                            x: batch_xs,
                                            y_: batch_ys
                                        })
            train_writer.add_summary(train_summary, i)

            if i % 100 == 0:
                # Test trained model
                test_summary, r = sess.run([merged, accuracy],
                                           feed_dict={
                                               x: test_data.images,
                                               y_: test_data.labels
                                           })
                train_writer.add_summary(test_summary, i)
                print 'step = %s, accuracy = %.2f%%' % (i, r * 100)

        train_writer.close()

        # final check after looping
        test_summary, r_test = sess.run([merged, accuracy],
                                        feed_dict={
                                            x: test_data.images,
                                            y_: test_data.labels
                                        })
        train_writer.add_summary(test_summary, i)
        print 'testing accuracy = %.2f%%' % (r_test * 100, )
Esempio n. 31
0
def format_data(data_name, seq_len, time_decay):
    # Load data
    adjs, features = load_data(data_name, time_decay)

    # Store original adjacency matrix (without diagonal entries) for later
    adj_origs = []
    pos_weights = []
    norms = []
    adj_norms = []
    features_sp = []
    features_nonzeros = []

    num_node = np.array(adjs[0]).shape[1]
    feature_dim = np.array(features[0]).shape[1]

    for adj, feature in zip(adjs, features):
        adj_orig = sparse_to_tuple(adj)

        pos_weight = float(num_node * num_node -
                           adj_orig[1].sum()) / adj_orig[1].sum()
        norm = num_node * num_node / float(
            (num_node * num_node - adj_orig[1].sum()) * 2)

        feature = sparse_to_tuple(feature)
        features_nonzero = feature[1].shape[0]

        adj_norm = preprocess_graph(adj)

        adj_origs.append(adj_orig)
        pos_weights.append(pos_weight)
        norms.append(norm)
        features_sp.append(feature)
        features_nonzeros.append(features_nonzero)
        adj_norms.append(adj_norm)

    batch_size = len(adj_origs) - seq_len

    temporal_adj_origs = []
    temporal_pos_weights = []
    temporal_norms = []

    struct_adj_origs = []
    struct_pos_weights = []
    struct_norms = []
    struct_adj_norms = []
    struct_features = []
    struct_features_nonzeros = []

    for i in range(batch_size):
        temporal_adj_origs.append(adj_origs[i + 1:i + 1 + seq_len])
        temporal_pos_weights.append(pos_weights[i + 1:i + 1 + seq_len])
        temporal_norms.append(norms[i + 1:i + 1 + seq_len])

        struct_adj_origs.append(adj_origs[i:i + seq_len])
        struct_pos_weights.append(pos_weights[i:i + seq_len])
        struct_norms.append(norms[i:i + seq_len])
        struct_adj_norms.append(adj_norms[i:i + seq_len])
        struct_features.append(features_sp[i:i + seq_len])
        struct_features_nonzeros.append(features_nonzeros[i:i + seq_len])

    # temporal_adj_origs = adj_origs[1: 1+seq_len]
    # temporal_pos_weights = pos_weights[1: 1+seq_len]
    # temporal_norms = norms[1: 1+seq_len]
    #
    # struct_adj_origs = adj_origs[0: 0+seq_len]
    # struct_pos_weights = pos_weights[0: 0+seq_len]
    # struct_norms = norms[0: 0+seq_len]
    # struct_adj_norms = adj_norms[0: 0+seq_len]
    # struct_features = features_sp[0: 0+seq_len]
    # struct_features_nonzeros = features_nonzeros[0: 0+seq_len]

    feas = {
        'temporal_adj_origs': temporal_adj_origs,
        'temporal_pos_weights': temporal_pos_weights,
        'temporal_norms': temporal_norms,
        'num_node': num_node,
        'feature_dim': feature_dim,
        'batch_size': batch_size,
        'struct_adj_origs': struct_adj_origs,
        'struct_features': struct_features,
        'struct_features_nonzeros': struct_features_nonzeros,
        'struct_adj_norms': struct_adj_norms,
        'struct_pos_weights': struct_pos_weights,
        'struct_norms': struct_norms,
        'adj_norms': adj_norms,
        'features': features_sp
    }

    return feas
Esempio n. 32
0
File: train.py Progetto: yuk12/dgl
def web_main():
    adj, features = load_data(args.dataset)

    features = sparse_to_tuple(features.tocoo())

    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = adj
    adj_orig = adj_orig - sp.dia_matrix(
        (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
    adj_orig.eliminate_zeros()

    adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(
        adj)
    adj = adj_train

    # # Create model
    # graph = dgl.from_scipy(adj)
    # graph.add_self_loop()

    # Some preprocessing
    adj_normalization, adj_norm = preprocess_graph(adj)

    # Create model
    graph = dgl.from_scipy(adj_normalization)
    graph.add_self_loop()

    # Create Model
    pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
    norm = adj.shape[0] * adj.shape[0] / float(
        (adj.shape[0] * adj.shape[0] - adj.sum()) * 2)

    adj_label = adj_train + sp.eye(adj_train.shape[0])
    adj_label = sparse_to_tuple(adj_label)

    adj_norm = torch.sparse.FloatTensor(torch.LongTensor(adj_norm[0].T),
                                        torch.FloatTensor(adj_norm[1]),
                                        torch.Size(adj_norm[2]))
    adj_label = torch.sparse.FloatTensor(torch.LongTensor(adj_label[0].T),
                                         torch.FloatTensor(adj_label[1]),
                                         torch.Size(adj_label[2]))
    features = torch.sparse.FloatTensor(torch.LongTensor(features[0].T),
                                        torch.FloatTensor(features[1]),
                                        torch.Size(features[2]))

    weight_mask = adj_label.to_dense().view(-1) == 1
    weight_tensor = torch.ones(weight_mask.size(0))
    weight_tensor[weight_mask] = pos_weight

    features = features.to_dense()
    in_dim = features.shape[-1]

    vgae_model = model.VGAEModel(in_dim, args.hidden1, args.hidden2)
    # create training component
    optimizer = torch.optim.Adam(vgae_model.parameters(),
                                 lr=args.learning_rate)
    print('Total Parameters:',
          sum([p.nelement() for p in vgae_model.parameters()]))

    def get_scores(edges_pos, edges_neg, adj_rec):
        def sigmoid(x):
            return 1 / (1 + np.exp(-x))

        # Predict on test set of edges
        preds = []
        pos = []
        for e in edges_pos:
            # print(e)
            # print(adj_rec[e[0], e[1]])
            preds.append(sigmoid(adj_rec[e[0], e[1]].item()))
            pos.append(adj_orig[e[0], e[1]])

        preds_neg = []
        neg = []
        for e in edges_neg:
            preds_neg.append(sigmoid(adj_rec[e[0], e[1]].data))
            neg.append(adj_orig[e[0], e[1]])

        preds_all = np.hstack([preds, preds_neg])
        labels_all = np.hstack([np.ones(len(preds)), np.zeros(len(preds_neg))])
        roc_score = roc_auc_score(labels_all, preds_all)
        ap_score = average_precision_score(labels_all, preds_all)

        return roc_score, ap_score

    def get_acc(adj_rec, adj_label):
        labels_all = adj_label.to_dense().view(-1).long()
        preds_all = (adj_rec > 0.5).view(-1).long()
        accuracy = (preds_all == labels_all).sum().float() / labels_all.size(0)
        return accuracy

    # create training epoch
    for epoch in range(args.epochs):
        t = time.time()

        # Training and validation using a full graph
        vgae_model.train()

        logits = vgae_model.forward(graph, features)

        # compute loss
        loss = norm * F.binary_cross_entropy(logits.view(-1),
                                             adj_label.to_dense().view(-1),
                                             weight=weight_tensor)
        kl_divergence = 0.5 / logits.size(0) * (
            1 + 2 * vgae_model.log_std - vgae_model.mean**2 -
            torch.exp(vgae_model.log_std)**2).sum(1).mean()
        loss -= kl_divergence

        # backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_acc = get_acc(logits, adj_label)

        val_roc, val_ap = get_scores(val_edges, val_edges_false, logits)

        # Print out performance
        print("Epoch:", '%04d' % (epoch + 1), "train_loss=",
              "{:.5f}".format(loss.item()), "train_acc=",
              "{:.5f}".format(train_acc), "val_roc=", "{:.5f}".format(val_roc),
              "val_ap=", "{:.5f}".format(val_ap), "time=",
              "{:.5f}".format(time.time() - t))

    test_roc, test_ap = get_scores(test_edges, test_edges_false, logits)
    print("End of training!", "test_roc=", "{:.5f}".format(test_roc),
          "test_ap=", "{:.5f}".format(test_ap))
Esempio n. 33
0
def main(_):
    # load data
    meta, train_data, test_data = input_data.load_data(FLAGS.data_dir,
                                                       flatten=False)
    print 'data loaded'
    print 'train images: %s. test images: %s' % (train_data.images.shape[0],
                                                 test_data.images.shape[0])

    LABEL_SIZE = meta['label_size']
    IMAGE_HEIGHT = meta['height']
    IMAGE_WIDTH = meta['width']
    IMAGE_SIZE = IMAGE_WIDTH * IMAGE_HEIGHT
    print 'label_size: %s, image_size: %s' % (LABEL_SIZE, IMAGE_SIZE)

    # variable in the graph for input data
    with tf.name_scope('input'):
        x = tf.placeholder(tf.float32, [None, IMAGE_HEIGHT, IMAGE_WIDTH])
        y_ = tf.placeholder(tf.float32, [None, LABEL_SIZE])

        # must be 4-D with shape `[batch_size, height, width, channels]`
        x_image = tf.reshape(x, [-1, IMAGE_HEIGHT, IMAGE_WIDTH, 1])
        tf.summary.image('input', x_image, max_outputs=LABEL_SIZE)

    # define the model
    with tf.name_scope('convolution-layer-1'):
        W_conv1 = weight_variable([7, 7, 1, 32])
        b_conv1 = bias_variable([32])

        h_conv1 = tf.nn.tanh(conv2d(x_image, W_conv1) + b_conv1)
        h_pool1 = max_pool_2x2(h_conv1)

    with tf.name_scope('convolution-layer-2'):
        W_conv2 = weight_variable([7, 7, 32, 64])
        b_conv2 = bias_variable([64])

        h_conv2 = tf.nn.tanh(conv2d(h_pool1, W_conv2) + b_conv2)
        h_pool2 = max_pool_2x2(h_conv2)

    with tf.name_scope('densely-connected'):
        W_fc1 = weight_variable([IMAGE_WIDTH * IMAGE_HEIGHT * 4, 1024])
        b_fc1 = bias_variable([1024])

        h_pool2_flat = tf.reshape(h_pool2,
                                  [-1, IMAGE_WIDTH * IMAGE_HEIGHT * 4])
        h_fc1 = tf.nn.tanh(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)

    with tf.name_scope('dropout'):
        # To reduce overfitting, we will apply dropout before the readout layer
        keep_prob = tf.placeholder(tf.float32)
        h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

    with tf.name_scope('readout'):
        W_fc2 = weight_variable([1024, LABEL_SIZE])
        b_fc2 = bias_variable([LABEL_SIZE])

        y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2

    # Define loss and optimizer
    # Returns:
    # A 1-D `Tensor` of length `batch_size`
    # of the same type as `logits` with the softmax cross entropy loss.
    with tf.name_scope('loss'):
        cross_entropy = tf.reduce_mean(
            # -tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1]))
            tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
        train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
        variable_summaries(cross_entropy)

    # forword prop
    predict = tf.argmax(y_conv, axis=1)
    expect = tf.argmax(y_, axis=1)

    # evaluate accuracy
    with tf.name_scope('evaluate_accuracy'):
        correct_prediction = tf.equal(predict, expect)
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        variable_summaries(accuracy)

    with tf.Session() as sess:

        merged = tf.summary.merge_all()
        train_writer = tf.summary.FileWriter(LOG_DIR + '/train', sess.graph)
        test_writer = tf.summary.FileWriter(LOG_DIR + '/test', sess.graph)

        tf.global_variables_initializer().run()

        # Train
        for i in range(MAX_STEPS):
            batch_xs, batch_ys = train_data.next_batch(BATCH_SIZE)

            step_summary, _ = sess.run([merged, train_step],
                                       feed_dict={
                                           x: batch_xs,
                                           y_: batch_ys,
                                           keep_prob: 1.0
                                       })
            train_writer.add_summary(step_summary, i)

            if i % 100 == 0:
                # Test trained model
                valid_summary, train_accuracy = sess.run([merged, accuracy],
                                                         feed_dict={
                                                             x: batch_xs,
                                                             y_: batch_ys,
                                                             keep_prob: 1.0
                                                         })
                train_writer.add_summary(valid_summary, i)

                # final check after looping
                test_x, test_y = test_data.next_batch(2000)
                test_summary, test_accuracy = sess.run([merged, accuracy],
                                                       feed_dict={
                                                           x: test_x,
                                                           y_: test_y,
                                                           keep_prob: 1.0
                                                       })
                test_writer.add_summary(test_summary, i)

                print 'step %s, training accuracy = %.2f%%, testing accuracy = %.2f%%' % (
                    i, train_accuracy * 100, test_accuracy * 100)

        train_writer.close()
        test_writer.close()

        # final check after looping
        test_x, test_y = test_data.next_batch(2000)
        test_accuracy = accuracy.eval(feed_dict={
            x: test_x,
            y_: test_y,
            keep_prob: 1.0
        })
        print 'testing accuracy = %.2f%%' % (test_accuracy * 100, )
Esempio n. 34
0
GAE_l_roc = []
GAE_l_ap = []
GAE_l_acc = []
AGAE_l_roc = []
AGAE_l_ap = []
AGAE_l_acc = []
p = 0.01
attrNoise = 0.2
m = 10

for i in range(FLAGS.num_experiments):
    # Load data
    if dataset_str == 'synthetic':
        adj, features = get_synthetic_data(p=p, attrNoise=attrNoise, m=m)
    else:
        adj, features = load_data(dataset_str)

    # Store original adjacency matrix (without diagonal entries) for later
    adj_orig = adj  # sparse matrix
    # adj_orig.diagonal()[np.newaxis, :] row vector
    adj_orig = adj_orig - sp.dia_matrix(
        (adj_orig.diagonal()[np.newaxis, :], [0]),
        shape=adj_orig.shape)  # set the diagnal elements to 0

    adj_orig.eliminate_zeros(
    )  # sparse matrix should not contain entries equals 0. So always call eliminate_zeros() after an update.

    adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(
        adj, test_percent=10., val_percent=5.)
    adj = adj_train  # This is the adj matrix that masked out all validation and testing entries.
    #print(adj_train.shape)
Esempio n. 35
0
# Default settings
class args:
    data_dir = "BSNIP_left_full/"
    hidden_dim_1 = 100
    hidden_dim_2 = 50
    hidden_dim_3 = 5
    batch_size = 32
    learning_rate = 0.0001
    kl_coefficient = 0.0001
    activation = 'tanh'
    dropout = 0.


# Load data
adj = load_data("./data/" + args.data_dir + "original.npy")

for sub in adj:
    np.fill_diagonal(sub, 1)

# Normalize adjacency matrix (i.e. D^(.5)AD^(.5))
adj_norm = normalize_adj(adj)

num_nodes = adj.shape[1]

# CHANGE TO features.shape[1] LATER
num_features = adj.shape[1]

# Define placeholders
placeholders = {
    'features':