def main(_): # load data meta, train_data, test_data = input_data.load_data(FLAGS.data_dir, flatten=True) print 'data loaded' print 'train images: %s. test images: %s' % (train_data.images.shape[0], test_data.images.shape[0]) LABEL_SIZE = meta['label_size'] IMAGE_SIZE = meta['width'] * meta['height'] NUM_PER_IMAGE = meta['num_per_image'] OUTPUT_SIZE = NUM_PER_IMAGE * LABEL_SIZE print 'OUTPUT_SIZE: %s, image_size: %s' % (OUTPUT_SIZE, IMAGE_SIZE) # variable in the graph for input data x = tf.placeholder(tf.float32, [None, IMAGE_SIZE]) y_ = tf.placeholder(tf.float32, [None, OUTPUT_SIZE]) # define the model W = tf.Variable(tf.zeros([IMAGE_SIZE, OUTPUT_SIZE])) b = tf.Variable(tf.zeros([OUTPUT_SIZE])) y = tf.matmul(x, W) + b # Define loss and optimizer diff = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y) cross_entropy = tf.reduce_mean(diff) train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) # forword prop predict = tf.argmax(y, axis=1) expect = tf.argmax(y_, axis=1) # evaluate accuracy correct_prediction = tf.equal(predict, expect) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) with tf.Session() as sess: tf.global_variables_initializer().run() # Train for i in range(MAX_STEPS): batch_xs, batch_ys = train_data.next_batch(BATCH_SIZE) sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys}) if i % 100 == 0: # Test trained model r = sess.run(accuracy, feed_dict={ x: test_data.images, y_: test_data.labels }) print 'step = %s, accuracy = %.2f%%' % (i, r * 100) # final check after looping r_test = sess.run(accuracy, feed_dict={ x: test_data.images, y_: test_data.labels }) print 'testing accuracy = %.2f%%' % (r_test * 100, )
def format_data(data_name): # Load data adj, features, y_test, tx, ty, test_maks, true_labels = load_data( data_name) # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj #删除对角线元素 adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges( adj) adj = adj_train adj_dense = adj.toarray() if FLAGS.features == 0: features = sp.identity(features.shape[0]) # featureless # Some preprocessing adj_norm = preprocess_graph(adj) num_nodes = adj.shape[0] features_dense = features.tocoo().toarray() features = sparse_to_tuple(features.tocoo()) #num_features是feature的维度 num_features = features[2][1] #features_nonzero就是非零feature的个数 features_nonzero = features[1].shape[0] pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) items = [ adj, num_features, num_nodes, features_nonzero, pos_weight, norm, adj_norm, adj_label, features, true_labels, train_edges, val_edges, val_edges_false, test_edges, test_edges_false, adj_orig, features_dense, adj_dense, features_dense ] feas = {} print('num_features is:', num_features) print('num_nodes is:', num_nodes) print('features_nonzero is:', features_nonzero) print('pos_weight is:', pos_weight) print('norm is:', norm) for item in items: #item_name = [ k for k,v in locals().iteritems() if v == item][0] feas[retrieve_name(item)] = item return feas
def __init__(self, input_path, output_dir): if not exists(output_dir): makedirs(output_dir) self.output_dir = output_dir D = load_data(input_path)._asdict() for k in D: setattr(self, k, D[k])
def format_data(data_name): # Load data adj, features, true_labels = load_data(data_name) # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges( adj) adj = adj_train if FLAGS.features == 0: features = sp.identity(features.shape[0]) # featureless # Some preprocessing adj_norm = preprocess_graph(adj) num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) adj_label = adj_train + 2 * sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) feas = {} feas['adj'] = adj feas['num_features'] = num_features feas['num_nodes'] = num_nodes feas['features_nonzero'] = features_nonzero feas['pos_weight'] = pos_weight feas['norm'] = norm feas['adj_norm'] = adj_norm feas['adj_label'] = adj_label feas['features'] = features feas['true_labels'] = true_labels feas['train_edges'] = train_edges feas['val_edges'] = val_edges feas['val_edges_false'] = val_edges_false feas['test_edges'] = test_edges feas['test_edges_false'] = test_edges_false feas['adj_orig'] = adj_orig return feas
def load_dataset(self, data_filename): outs = loader.load_data(data_filename) self.train_X = outs[0] self.test_X = outs[1] self.train_Y = outs[2] self.test_Y = outs[3] # Layer's sizes..................................... self.input_dim = self.train_X.shape[1] self.data_size = len(self.train_X) self.iterations = 200 #int(self.data_size / self.batch_size) print(self.data_size, "/", self.batch_size, "=", self.iterations) self.display()
def __init__(self, input_path, output_dir, debug_mode=False): if not exists(output_dir): makedirs(output_dir) self.output_dir = output_dir self.pool = Pool() self.obj_value_trace = [] D = load_data(input_path)._asdict() for k in D: setattr(self, k, D[k]) self.debug_mode = debug_mode
def run_training(): text_dataset = input_data.load_data("yitian.txt", max_vocabulary_size=40000) valid_window = np.array(range(5, 15)) sample_p = (19 - valid_window) / np.sum(valid_window) valid_ids = np.random.choice(valid_window, FLAGS.validation_size, p=sample_p, replace=False) with tf.Graph().as_default(): batch_inputs_pl, batch_labels_pl, valid_ids_pl = place_holder( FLAGS.batch_size, FLAGS.validation_size) loss, embeddings = word2vec.loss(batch_inputs_pl, batch_labels_pl) train_op = word2vec.train(loss) sim_compute = word2vec.compute_sim(valid_ids_pl, embeddings) init = tf.initialize_all_variables() with tf.Session() as sess: sess.run(init) start_time = time.time() for step in range(FLAGS.max_step): filled_dict = fill_feed_dict(text_dataset, batch_inputs_pl, batch_labels_pl, FLAGS.batch_size, FLAGS.num_skips, FLAGS.skip_window) _, loss_value = sess.run([train_op, loss], filled_dict) if step % 1000 == 0: duration = time.time() - start_time print("Step: {:d}, Training Loss: {:.4f}, {:.1f}us/step". format(step, loss_value, duration * 1000)) if (step + 1) % 5000 == 0 or (step + 1) == FLAGS.max_step: sim_words_id, _ = sess.run(sim_compute, {valid_ids_pl: valid_ids}) for (i, word_id) in enumerate(valid_ids): word = text_dataset.word_count[word_id][0] sim_words = [] for sim_word_id in sim_words_id[i]: sim_words.append( text_dataset.word_count[sim_word_id][0]) print(word, end=":") print(" ".join(sim_words)) start_time = time.time()
def heuristic_ga_optimize(input_path, out_path): start = time.clock() global _last_x, _last_CT, _pool, _delta_trace, _delta_dim, _delta_project_idx _tardiness_obj_trace.clear() _delta_trace.clear() _delta_project_idx.clear() _CT_map.clear() _last_x = None _last_CT = None _pool = Pool(5) D = load_data(input_path) # initialization for GA creator.create("FitnessMax", base.Fitness, weights=(1.0,)) creator.create("Individual", list, fitness=creator.FitnessMax) toolbox = base.Toolbox() _delta_dim = 0 for j in range(D.project_n): p = D.project_list[j] for r in sorted([r_ for (r_, p_) in D.resource_project_demand.keys() if p_ == p]): _delta_project_idx[j, r] = _delta_dim _delta_dim += 1 toolbox.register("individual", _random_delta_weight_for_projects, _delta_dim, creator.Individual) toolbox.register("population", tools.initRepeat, creator.Individual, toolbox.individual) toolbox.register("evaluate", _objective_function_for_delta_weight, D) toolbox.register("mate", _mate) toolbox.register("mutate", _mutate, mutate_prob=0.25) toolbox.register("select", tools.selTournament, tournsize=3) # print() pop = toolbox.population(n=1) hof = tools.HallOfFame(1) # print(toolbox.individual()) # print(pop) pop, log = algorithms.eaSimple(pop, toolbox, cxpb=0.5, mutpb=0.2, ngen=1, halloffame=hof, verbose=True) # print(min(_tardiness_obj_trace), '\n', max(_tardiness_obj_trace)) # print(_tardiness_obj_trace) # logging.info('min tardiness obj trace %r \n max tardiness obj trace:%r\n' % ( # min(_tardiness_obj_trace), max(_tardiness_obj_trace))) # logging.info(_tardiness_obj_trace) return min(_tardiness_obj_trace), time.clock() - start
def main(argv=None): (images, labels), (t_images, t_labels) = input_data.load_data( ) # input_data.distorted_inputs("../data/cifar/", 128) images = np.reshape(images, (50000, 3072)) t_images = np.reshape(t_images, (10000, 3072)) tmp = [] tmp_t = [] for i in range(0, 50000): data = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], np.int) data[labels[i]] = 1 tmp.append(data) del data del labels for i in range(0, 10000): data = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], np.int) data[t_labels[i]] = 1 tmp_t.append(data) del data del t_labels train(images, np.array(tmp), t_images, tmp_t)
def run_training(): # for mnist # train_data, test_data, validation_data = input_data.read_data_sets("../data/MNIST_data/") # for cifar-10 train_data, test_data, validation_data = input_data.load_data() with tf.Graph().as_default(): image_pl, label_pl, keep_prob_pl = place_holder(FLAGS.batch_size) logits = nn_structure.inference(image_pl, conv_1_params, max_pool_1_params, conv_2_params, max_pool_2_params, full_connected_units, keep_prob_pl) loss = nn_structure.loss(logits, label_pl) train_op = nn_structure.train(loss, FLAGS.learning_rate) eval_correct = nn_structure.evaluation(logits, label_pl, k=1) init = tf.initialize_all_variables() with tf.Session() as sess: sess.run(init) start_time = time.time() for step in range(FLAGS.max_step): feed_dict = fill_feed_dict(train_data, 0.5, image_pl, label_pl, keep_prob_pl) _, loss_value = sess.run([train_op, loss], feed_dict) if step % 100 == 0: duration = time.time() - start_time print("Step: {:d}, Training Loss: {:.4f}, {:.1f}ms/step". format(step, loss_value, duration * 10)) start_time = time.time() if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_step: print("Train Eval:") do_eval(sess, eval_correct, train_data, image_pl, label_pl, keep_prob_pl) print("Validation Eval:") do_eval(sess, eval_correct, validation_data, image_pl, label_pl, keep_prob_pl) print("Test Eval:") do_eval(sess, eval_correct, test_data, image_pl, label_pl, keep_prob_pl)
def format_data(data_source): adj, features, labels = load_data(data_source) # Store original adjacency matrix (without diagonal entries) for later # adj_orig = adj # adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) # adj_orig.eliminate_zeros() # adj = adj_orig if FLAGS.features == 0: features = sp.identity(features.shape[0]) # featureless # Some preprocessing adj_norm = preprocess_graph(adj) num_nodes = adj.shape[0] features = sparse_to_tuple(features.tocoo()) num_features = features[2][1] features_nonzero = features[1].shape[0] pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) adj_label = adj + sp.eye(adj.shape[0]) adj_label = sparse_to_tuple(adj_label) items = [ adj, num_features, num_nodes, features_nonzero, adj_norm, adj_label, features, labels, pos_weight, norm ] feas = {} for item in items: # item_name = [ k for k,v in locals().iteritems() if v == item][0]] item_name = retrieve_name(item) feas[item_name] = item return feas
def train(dataset, weightRate): adj, features, falseEdges = load_data(dataset) #generate training and test data adj_train, train_edges, train_edges_false, test_edges, test_edges_false = make_test_edges( weightRate, adj, falseEdges) print adj_train.shape print train_edges.shape, train_edges_false.shape #embeddings returned by W-VGAE emb = train_gcn(features, adj_train, train_edges, train_edges_false, test_edges, test_edges_false) #generate paired training and test data, similar to GCN X_train, Y_train = generate_data(emb, train_edges, train_edges_false) X_test, Y_test = generate_data(emb, test_edges, test_edges_false) #the final softmax classifier acc = train_nn(X_train, Y_train, X_test, Y_test) print 'accuracy:', acc[0] print 'sensitivity:', acc[1] print 'specificity:', acc[2] print 'precision:', acc[3]
# Lists to collect average results if FLAGS.task == 'link_prediction': mean_roc = [] mean_ap = [] elif FLAGS.task == 'node_clustering': mean_mutual_info = [] if FLAGS.kcore: mean_time_kcore = [] mean_time_train = [] mean_time_expand = [] mean_core_size = [] mean_time = [] # Load graph dataset adj_init, features_init = load_data(FLAGS.dataset) if FLAGS.verbose: print(f"Loading data... {FLAGS.dataset} n: {adj_init.shape[0]}, m: {np.sum(adj_init)//2}") # Load ground-truth labels for node clustering task if FLAGS.task == 'node_clustering': labels = load_label(FLAGS.dataset) # The entire training+test process is repeated FLAGS.nb_run times for i in range(FLAGS.nb_run): if FLAGS.task == 'link_prediction' : if FLAGS.verbose: print("Masking test edges...") # Edge Masking for Link Prediction: compute Train/Validation/Test set adj, val_edges, val_edges_false, test_edges, test_edges_false = \
if FLAGS.dataset == 'yale': flags.DEFINE_integer('epochs', 500, 'Number of iterations.') flags.DEFINE_integer('hidden2', 16, 'Number of units in GCN layer 2.') flags.DEFINE_integer('pri_weight', 1, 'weight of privacy') flags.DEFINE_integer('uti_attr_weight', 10, 'weight of utility_attr') flags.DEFINE_float('link_weight', 1, 'weight of privacy') elif FLAGS.dataset == 'rochester': flags.DEFINE_integer('epochs', 2000, 'Number of iterations.') flags.DEFINE_integer('pri_weight', 10, 'weight of privacy') flags.DEFINE_integer('uti_attr_weight', 1, 'weight of utility_attr') flags.DEFINE_integer('hidden2', 8, 'Number of units in GCN layer 2.') flags.DEFINE_float('link_weight', 1, 'weight of privacy') # Load data adj, features, adj_train, val_edges, val_edges_false, test_edges, test_edges_false, labels = load_data( FLAGS.dataset) # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj = adj_train # Some preprocessing adj_norm = preprocess_graph(adj) features_mat = features.toarray() attr_labels_list, dim_attr, features_rm_privacy = get_attr_list( FLAGS.dataset, labels, features_mat)
def heuristic_delta_weight(input_path, output_path=None, converge_count=2, tolerance=1, d1=100, d2=0): ''' :param input_path: the path for the folder of the input files :param converge_count: the process will stop when the optimal solution isn't update in converge_count rounds. :param tolerance: when abs(last_optimal-current_optimal)<tolerance, the solution are considered as unchanged(converged). :param d1: parameter in formula 40 :param d2: parameter in formula 40 :return: (objective_value, time_cost) will be returned ''' from random import seed seed(13) start = time.clock() global _last_x, _last_CT, _pool, _delta_trace, _historical_delta_weight_idx_map, _result_output_path, _time_limit_per_model, _gap_trace, _round _tardiness_obj_trace.clear() _gap_trace.clear() _delta_trace.clear() _CT_map.clear() _historical_delta_weight_idx_map.clear() _last_x = None _last_CT = None _pool = Pool(2) if output_path is not None: _result_output_path = output_path if not exists(_result_output_path): makedirs(_result_output_path) D = load_data(input_path) _round = 0 # initialization for GA _time_limit_per_model = 3600.0 / (D.project_n + 2) delta_weight = {} for j in range(D.project_n): p = D.project_list[j] for r in sorted([r_ for (r_, p_) in D.resource_project_demand.keys() if p_ == p]): delta_weight[j, r] = 1 # random() # delta_weight[0, 'NK0g2'] = 1 _logger.info(str(delta_weight)) _normalize(delta_weight) for (j, r) in delta_weight.keys(): _weight_dataset.loc[_weight_dataset.shape[0]] = [_round, j, r, delta_weight[j, r]] optimal = 1e10 current_converge_count = 0 with open('trace.log', 'a') as f: while current_converge_count < converge_count: _round += 1 _logger.info('-' * 50) _logger.info('round %d' % _round) delta_weight = _objective_function_for_delta_weight(D, delta_weight, d1, d2) if _tardiness_obj_trace[-1] < optimal: if abs(_tardiness_obj_trace[-1] - optimal) <= tolerance: current_converge_count += 1 else: current_converge_count = 0 optimal = min(optimal, _tardiness_obj_trace[-1]) else: current_converge_count += 1 print("trace:", _tardiness_obj_trace) f.write('%r\n' % _tardiness_obj_trace) f.write("time cost:%r" % (time.clock() - start)) # break # print("current_converge_count:", current_converge_count) # print("delta size:", len(delta_weight)) # print(delta_weight) return min(_tardiness_obj_trace), time.clock() - start, _gap_trace[np.argmin(_tardiness_obj_trace)]
import input_data import tensorflow as tf if __name__ == '__main__': mnist = input_data.load_data()
def format_data(data_name): # Load data #adj, features, y_test, tx, ty, test_maks, true_labels = load_data(data_name) print("&&&&&&&&&&&&&&&&&", data_name) rownetworks, numView, features, truelabels, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data( data_name) adjs_orig = [] for v in range(numView): adj_orig = rownetworks[v] adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) #adj_orig.eliminate_zeros() adjs_orig.append(adj_orig) adjs_label = rownetworks adjs_orig = np.array(adjs_orig) adjs = adjs_orig if FLAGS.features == 0: features = sp.identity(features.shape[0]) # featureless # Some preprocessing adjs_norm = preprocess_graph(adjs) num_nodes = adjs[0].shape[0] features = features num_features = features.shape[1] #features_nonzero = features[1].shape[0] fea_pos_weights = float(features.shape[0] * features.shape[1] - features.sum()) / features.sum() pos_weights = [] norms = [] for v in range(numView): pos_weight = float(adjs[v].shape[0] * adjs[v].shape[0] - adjs[v].sum()) / adjs[v].sum() norm = adjs[v].shape[0] * adjs[v].shape[0] / float( (adjs[v].shape[0] * adjs[v].shape[0] - adjs[v].sum()) * 2) pos_weights.append(pos_weight) norms.append(norm) true_labels = truelabels feas = { 'adjs': adjs_norm, 'adjs_label': adjs_label, 'num_features': num_features, 'num_nodes': num_nodes, 'true_labels': true_labels, 'pos_weights': pos_weights, 'norms': np.array(norms), 'adjs_norm': adjs_norm, 'features': features, 'fea_pos_weights': fea_pos_weights, 'numView': numView } return feas
total = truth.shape[0] * truth.shape[1] seq_err_rate = seqerr[f] / total acc = seqerr[t] / total return seq_err_rate, acc if __name__ == "__main__": # input data nb = 50000 timesteps = 1 nb_samples = timesteps * nb val = 0.1 test = 0.2 data_dim = 102 data = dataset.load_data(nb_samples) x_train, y_train, x_val, y_val, x_test, y_test = split_data(data, nb_samples, val, test) # first approach trainX = reshape_data(x_train, timesteps) trainY = reshape_data(y_train, timesteps) valX = reshape_data(x_val, timesteps) valY = reshape_data(y_val, timesteps) testX = reshape_data(x_test, timesteps) testY = reshape_data(y_test, timesteps) # model paramters results = [] batch_size = 16 nb_epochs = 1000
def original_model(input_path, output_path): if not exists(output_path): makedirs(output_path) supplier_project_shipping, project_list, project_activity, DD, resource_supplier_capacity, \ project_n, resource_project_demand, resource_supplier_list, M, c, B, resource_supplier_release_time, \ review_duration, w = load_data(input_path) start_time = time.clock() m = Model('construction') # m.setParam('OutputFlag', False) ############################################################## # m.params.presolve = 0 m.params.MIPGap = 1e-8 m.params.timelimit = 3600 # m.params.IntFeasTol = 1e-9 # Create variables############################################ #####supplier-project shipping decision x and shipping quality x = {} q = {} for (i, j, k) in supplier_project_shipping: # i resource, j supplier, k project x[i, j, k] = m.addVar(obj=0, vtype=GRB.BINARY, name="x_%s_%s_%s" % (i, j, k)) q[i, j, k] = m.addVar(obj=0, vtype=GRB.CONTINUOUS, name="q_%s_%s_%s" % (i, j, k)) print('add var x,q') #####Project complete data,Project Tadeness,construction completion time DT = {} TD = {} CT = {} DT[-1] = m.addVar(obj=0, vtype=GRB.CONTINUOUS, name="DT_-1") # project start time for j in range(project_n): DT[j] = m.addVar(obj=0, vtype=GRB.CONTINUOUS, name="DT_%d" % j) # project j complete time TD[j] = m.addVar(obj=0, vtype=GRB.CONTINUOUS, name="TD_%d" % j) # project j complete time CT[j] = m.addVar(obj=0, vtype=GRB.CONTINUOUS, name="CT_%d" % j) # project j complete time print('add var DT TD CT') #####Activity start time ST = [] for j in range(project_n): ST.append({}) for row in project_activity[project_list[j]].nodes(): ST[j][row] = m.addVar(obj=0, vtype=GRB.CONTINUOUS, name="ST_%d_%s" % (j, row)) print('add var ST') #####Review sequence z = {} for i in range(project_n): for j in range(project_n): if i != j: z[i, j] = m.addVar(obj=0, vtype=GRB.BINARY, name="z_%d_%d" % (i, j)) for j in range(project_n): z[-1, j] = m.addVar(obj=0, vtype=GRB.BINARY, name="z_%d_%d" % (-1, j)) print('add var z') ##### y = {} for j in range(project_n): for row1 in project_activity[project_list[j]].nodes(): for row2 in project_activity[project_list[j]].nodes(): # print project_activity[project_list[j]].node[row1] if row1 != row2 and len( list(set(project_activity[project_list[j]].node[row1]['rk_resources']).intersection( project_activity[project_list[j]].node[row2]['rk_resources']))) > 0: y[j, row1, row2] = m.addVar(obj=0, vtype=GRB.BINARY, name="y_%d_%s_%s" % (j, row1, row2)) print('add var y') m.update() # create constrains######################################### #####Constrain 2: project complete data>due data for j in range(project_n): m.addConstr(DT[j] - TD[j], GRB.LESS_EQUAL, DD[j], name="constraint_2_project_%d" % j) print('add constr 2') ##### constrain 3: supplier capacity limit for (row1, row2) in resource_supplier_capacity: m.addConstr(quicksum(q[row1, row2, project_list[j]] for j in range(project_n)), GRB.LESS_EQUAL, resource_supplier_capacity[row1, row2], name="constraint_3_resource_%s_supplier_%s" % (row1, row2)) print('add constr 3') #####constrain 4,6: project demand require; each project receive from one supplier for each resource for (row1, row2) in resource_project_demand: m.addConstr(quicksum(x[row1, i, row2] for i in resource_supplier_list[row1]), GRB.EQUAL, 1, name="constraint_6_resource_%s_project_%s" % (row1, row2)) m.addConstr(quicksum(q[row1, i, row2] for i in resource_supplier_list[row1]), GRB.GREATER_EQUAL, resource_project_demand[row1, row2], name="constraint_4_resource_%s_project_%s" % (row1, row2)) print('add constr 4,6') #####constrain 5: shipping constrain for (i, j, k) in q: # i resource, j supplier, k project m.addConstr(q[i, j, k], GRB.LESS_EQUAL, M * x[i, j, k], name="constraint_5_resource_%s_supplier_%s_project_%s" % (i, j, k)) print('add constr 5') #####constrain 7:budget limit expr = LinExpr() for (i, j, k) in q: expr.addTerms(c[i, j, k], q[i, j, k]) m.addConstr(expr, GRB.LESS_EQUAL, B, name="constraint_7") print('add constr 7') #####constrain 8: activity starting constrain for j in range(project_n): for row in project_activity[project_list[j]].nodes(): for row1 in project_activity[project_list[j]].node[row]['resources']: m.addConstr(quicksum(x[row1, i, project_list[j]] * ( resource_supplier_release_time[row1, i] + supplier_project_shipping[row1, i, project_list[j]]) for i in resource_supplier_list[row1]), GRB.LESS_EQUAL, ST[j][row], name="constraint_8_project_%d_activity_%s_resource_%s" % (j, row, row1)) print('add constr 8') #####constrain 9 activity sequence constrain for j in range(project_n): for row1, row2 in project_activity[project_list[j]].edges(): m.addConstr(ST[j][row1] + project_activity[project_list[j]].node[row1]['duration'], GRB.LESS_EQUAL, ST[j][row2], name="constraint_9_project_%d_activity_%s_activity_%s" % (j, row1, row2)) print('add constr 9') #####constrain 10,11 for j in range(project_n): for row1 in project_activity[project_list[j]].nodes(): for row2 in project_activity[project_list[j]].nodes(): if row1 != row2 and len( list(set(project_activity[project_list[j]].node[row1]['rk_resources']).intersection( project_activity[project_list[j]].node[row2]['rk_resources']))) > 0: m.addConstr( ST[j][row1] + project_activity[project_list[j]].node[row1]['duration'] - M * ( 1 - y[j, row1, row2]), GRB.LESS_EQUAL, ST[j][row2], name="constraint_10_project_%d_activity_%s_activity_%s" % (j, row1, row2)) m.addConstr( ST[j][row2] + project_activity[project_list[j]].node[row2]['duration'] - M * (y[j, row1, row2]), GRB.LESS_EQUAL, ST[j][row1], name="constraint_11_project_%d_activity_%s_activity_%s" % (j, row1, row2)) # m.addConstr(y[j,row1,row2]+y[j,row2,row1],GRB.LESS_EQUAL,1) print('add constr 10 11') #####constrain 12 for j in range(project_n): for row in project_activity[project_list[j]].nodes(): m.addConstr(CT[j], GRB.GREATER_EQUAL, ST[j][row] + project_activity[project_list[j]].node[row]['duration'], name="constraint_12_project_%d_activity_%s" % (j, row)) print('add constr 12') #####constrain 13 for j in range(project_n): m.addConstr(DT[j], GRB.GREATER_EQUAL, CT[j] + review_duration[j], name="constraint_13_project_%d" % j) #####constrain 14 for i in range(-1, project_n): for j in range(project_n): if i != j: m.addConstr(DT[j], GRB.GREATER_EQUAL, DT[i] - M * (1 - z[i, j]) + review_duration[j], name="constraint_14_project_%d_project_%d" % (i, j)) print('add constr 14') #####constrain 15 for j in range(project_n): m.addConstr(quicksum(z[i, j] for i in range(-1, project_n) if i != j), GRB.EQUAL, 1, name="constraint_15_project_%d" % j) print('add constr 15') #####constrain 16 m.addConstr(quicksum(z[-1, j] for j in range(project_n)), GRB.EQUAL, 1, name="constraint_16") print('add constr 16') #####constrain 17 for i in range(project_n): m.addConstr(quicksum(z[i, j] for j in range(project_n) if j != i), GRB.LESS_EQUAL, 1, name="constraint_17_project_%d" % i) print('add constr 17') m.update() # for i in range(project_n): # for j in range(project_n): # if i!=j: # m.addConstr(z[i,j]+z[j,i],GRB.LESS_EQUAL,1) # Set optimization objective - minimize sum of expr = LinExpr() for j in range(project_n): expr.addTerms(w[j], TD[j]) print('add obj') m.setObjective(expr, GRB.MINIMIZE) m.update() # Solve m.optimize() print('project_n=%d' % project_n) # for j in range(project_n): # print(len(project_activity[project_list[j]].edges())) time_cost = time.clock() - start_time print('time cost=', time_cost) # Print solution m.write(join(output_path, 'original.lp')) m.write(join(output_path, 'original.sol')) print('objective value=', m.objVal) return m.objVal, time_cost
def pred_link(dataset, epochs): #load samples adj, features, adj_train, val_edges, val_edges_false, test_edges, test_edges_false, labels = load_data( dataset) adj_tuple = sparse_to_tuple(adj) adj_train_tuple = sparse_to_tuple(adj_train) train_edges_false = np.load('./data/' + dataset + '_train_edges_false.npy') train_all_edges = np.concatenate((adj_train_tuple[0], train_edges_false), axis=0) labels = np.zeros(train_all_edges.shape) labels[:int(train_all_edges.shape[0] / 2), 0] = 1 labels[int(train_all_edges.shape[0] / 2):, 1] = 1 permutation = np.random.permutation(train_all_edges.shape[0]) train_all_edges = train_all_edges[permutation, :] labels = labels[permutation, :] #load_embeddings emb = np.load('./data/' + dataset + '_emb.npy') tf.compat.v1.disable_eager_execution() x1 = tf.placeholder('float', [None, 64]) x2 = tf.placeholder('float', [None, 64]) y = tf.placeholder('float', [None, 2]) x11 = tf.nn.relu(tf.layers.dense(inputs=x1, units=32)) x21 = tf.nn.relu(tf.layers.dense(inputs=x2, units=32)) x31 = tf.concat([x11, x21], 1) x41 = tf.nn.relu(tf.layers.dense(inputs=x31, units=16)) x4 = tf.nn.relu(tf.layers.dense(inputs=x41, units=8)) preds = tf.layers.dense(inputs=x4, units=2) cross_entropy = tf.reduce_mean( tf.losses.sigmoid_cross_entropy(logits=preds, multi_class_labels=y)) sess = tf.Session() train_op = tf.train.AdamOptimizer( learning_rate=0.01).minimize(cross_entropy) init = tf.global_variables_initializer() sess.run(init) flag = 0 for epoch in range(epochs): if flag * 100 + 100 > train_all_edges.shape[0]: flag = 0 a = flag * 100 b = a + 100 flag = flag + 1 batch_edges = train_all_edges[a:b, :] batch_y = labels[a:b] batch_x1 = emb[batch_edges[:, 0], :] batch_x2 = emb[batch_edges[:, 1], :] _, loss, preds_ = sess.run([train_op, cross_entropy, preds], feed_dict={ x1: batch_x1, x2: batch_x2, y: batch_y }) # if epoch%1000 == 0: # print(epoch) test_all_edges = np.concatenate((test_edges, test_edges_false), axis=0) test_labels = np.zeros(test_all_edges.shape) test_labels[:int(test_all_edges.shape[0] / 2), 0] = 1 test_labels[int(test_all_edges.shape[0] / 2):, 1] = 1 test_preds = np.empty((0, 2)) flag = 0 for epoch in range(int(test_all_edges.shape[0] / 100)): if flag * 100 + 100 > test_all_edges.shape[0]: flag = 0 a = flag * 100 b = a + 100 flag = flag + 1 batch_edges = test_all_edges[a:b, :] batch_y = test_labels[:100, :] batch_x1 = emb[batch_edges[:, 0], :] batch_x2 = emb[batch_edges[:, 1], :] batch_preds = sess.run(preds, feed_dict={ x1: batch_x1, x2: batch_x2, y: batch_y }) test_preds = np.vstack((test_preds, batch_preds)) test_preds.shape test_labels = test_labels[:int((test_all_edges.shape[0]) / 100) * 100, :] #p = np.where(test_preds>0)[1] p = [] for label in test_preds: if label[0] >= label[1]: p.append(0) else: p.append(1) l = test_labels[:, 1] from sklearn.metrics import f1_score, accuracy_score acc = accuracy_score(l, p) f1 = f1_score(l, p, average='macro') print(acc) print(f1) f = open('./data/' + dataset + '_results.txt', 'r+') content = f.read() f.seek(0, 0) f.write(str(acc) + '\n') f.write(str(f1) + '\n' + content) f.close() return acc, f1
def load_company_data(company_str): company = input_data.load_data(company=company_str) return company
# flags flags = tf.app.flags FLAGS = flags.FLAGS flags.DEFINE_string('data_name', 'SBM', 'name of data set.') flags.DEFINE_float('learning_rate', .5 * 0.001, 'Initial learning rate.') flags.DEFINE_integer('hidden1', 32, 'Number of units in hidden layer 1.') flags.DEFINE_integer('hidden2', 16, 'Number of units in hidden layer 2.') flags.DEFINE_float('dropout', 0., 'Dropout rate (1 - keep probability).') flags.DEFINE_integer('features', 0, 'Whether to use features (1) or not (0).') flags.DEFINE_integer('seed', 50, 'seed for fixing the results.') flags.DEFINE_integer('iterations', 1000, 'number of iterations.') # preprocess adjs, features = load_data(FLAGS.data_name, 0.5) adj = adjs[-1] feature = features[-1] adj_orig = sparse_to_tuple(adj) adj_norm = preprocess_graph(adj) feature = sparse_to_tuple(feature) features_nonzero = feature[1].shape[0] num_node = np.array(adjs[0]).shape[1] feature_dim = np.array(features[0]).shape[1] pos_weight = float(num_node * num_node - adj[1].sum()) / adj[1].sum() norm = num_node * num_node / float((num_node * num_node - adj[1].sum()) * 2) print('num_node: ', num_node, ' feature_dim: ', feature_dim, ' pos_weight: ',
def predict_model(test_x, test_y, parameters): m = test_x.shape[1] num = test_y.shape[0] pre, _ = dnn.L_model_forward(test_x, parameters) pre[pre >= 0.5] = 1 pre[pre < 0.5] = 0 pre = (pre == test_y).astype(int) pre = np.sum(pre, axis=0, keepdims=True) pre[pre < num] = 0 pre[pre == num] = 1 print(pre) return (1 / m) * np.sum(pre) if __name__ == '__main__': train_x, train_y, test_x, test_y = input_data.load_data() train_x_flatten = train_x.reshape(train_x.shape[0], -1).T # preprocessing of data test_x_flatten = test_x.reshape(test_x.shape[0], -1).T train_y = (train_y.T).astype(int) test_y = test_y.T.astype(int) train_x_flatten = train_x_flatten / 255 # standardize test_x_flatten = test_x_flatten / 255 parameters = L_layer_model(train_x_flatten, train_y, (784, 100, 10)) train_accuracy = predict_model(train_x_flatten, train_y, parameters) print(train_accuracy, '\n') test_accuracy = predict_model(test_x_flatten, test_y, parameters) print(test_accuracy, '\n')
# make dirs if FLAGS.output is not None: os.makedirs(FLAGS.output, exist_ok=True) output_dir = os.path.join(FLAGS.output, now) model_path = os.path.join(output_dir, 'checkpoint') prediction_path = os.path.join(output_dir, 'prediction') log_path = os.path.join(output_dir, 'log') create_dir_if_not_exists(model_path) create_dir_if_not_exists(prediction_path) create_dir_if_not_exists(log_path) adj, adata = load_data() adj_orig = adj adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges(adj) features, features_orig, size_factors, val_features, val_features_idx, test_features, test_features_idx = mask_test_express(adata) adj = adj_train adj_norm = preprocess_graph(adj) # Define placeholders placeholders = { 'features': tf.placeholder(tf.float32),
import numpy as np import scipy.sparse as sp #import tensorflow as tf from input_data import load_data from preprocessing import (construct_feed_dict, mask_test_edges, preprocess_graph, sparse_to_tuple) adj, features = load_data('cora') adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges( adj) print(adj.nnz) print(adj_train.nnz) #print(features.shape) ''' a = tf.constant([[1,2,2],[1,2,3]],tf.float32) ses = tf.Session() x = tf.transpose(a) y = tf.matmul(a, x) ys = tf.nn.sigmoid(y) print(ses.run(a)) print(ses.run(x)) print(ses.run(y)) print(ses.run(ys)) def sparse_to_tuple(sparse_mx): if not sp.isspmatrix_coo(sparse_mx): sparse_mx = sparse_mx.tocoo() coords = np.vstack((sparse_mx.row, sparse_mx.col)).transpose()
def runner(self): model_str = FLAGS.model placeholders = [{ 'features': tf.sparse_placeholder(tf.float32), 'adj': tf.sparse_placeholder(tf.float32), 'adj_orig': tf.sparse_placeholder(tf.float32), 'dropout': tf.placeholder_with_default(0., shape=()), 'num_features': tf.placeholder(tf.float32), 'features_nonzero': tf.placeholder(tf.float32), 'pos_weight': tf.placeholder(tf.float32), 'norm': tf.placeholder(tf.float32), 'reward': tf.placeholder(tf.float32), 'D_W1': tf.placeholder_with_default( tf.zeros([FLAGS.g_hidden2, FLAGS.d_hidden1]), shape=[FLAGS.g_hidden2, FLAGS.d_hidden1]), 'D_W2': tf.placeholder_with_default(tf.zeros([FLAGS.d_hidden1, 1]), shape=[FLAGS.d_hidden1, 1]), 'D_b1': tf.placeholder_with_default(tf.zeros([FLAGS.d_hidden1]), shape=[FLAGS.d_hidden1]), 'D_b2': tf.placeholder_with_default(tf.zeros([1]), shape=[1]), }, { 'features': tf.sparse_placeholder(tf.float32), 'adj': tf.sparse_placeholder(tf.float32), 'adj_orig': tf.sparse_placeholder(tf.float32), 'dropout': tf.placeholder_with_default(0., shape=()), 'num_features': tf.sparse_placeholder(tf.float32), 'features_nonzero': tf.placeholder(tf.float32), 'pos_weight': tf.placeholder(tf.float32), 'norm': tf.placeholder(tf.float32), 'reward': tf.placeholder(tf.float32) }] sess = tf.Session() real_X = tf.placeholder(tf.float32, shape=[None, FLAGS.g_hidden2]) fake_X = tf.placeholder(tf.float32, shape=[None, FLAGS.g_hidden2]) self.D_W1 = tf.Variable(xavier_init([FLAGS.g_hidden2, FLAGS.d_hidden1])) self.D_b1 = tf.Variable(xavier_init([FLAGS.d_hidden1])) self.D_W2 = tf.Variable(xavier_init([FLAGS.d_hidden1, 1])) self.D_b2 = tf.Variable(xavier_init([1])) d_vars = [self.D_W1, self.D_b1, self.D_W2, self.D_b2] print('train for the network embedding...') # Load data dataset_str1 = 'Douban_offline' # 1118 nodes dataset_str2 = 'Douban_online' # 3906 nodes adj1, features1, fea_num1 = load_data(dataset_str1) adj2, features2, fea_num2 = load_data(dataset_str2) num_features = [features1.shape[1], features2.shape[1]] model = None if model_str == 'gcn_ae': model = GCNModelAE(placeholders, num_features, sess) elif model_str == 'gcn_vae': model = GCNModelVAE(placeholders, num_features, num_nodes, features_nonzero) # Optimizer with tf.name_scope('optimizer'): opt = OptimizerAE( preds=[model.reconstructions1, model.reconstructions2], labels=[ tf.reshape( tf.sparse_tensor_to_dense(placeholders[0]['adj_orig'], validate_indices=False), [-1]), tf.reshape( tf.sparse_tensor_to_dense(placeholders[1]['adj_orig'], validate_indices=False), [-1]) ], preds_attribute=[ model.attribute_reconstructions1, model.attribute_reconstructions1 ], labels_attribute=[ tf.sparse_tensor_to_dense(placeholders[0]['features']), tf.sparse_tensor_to_dense(placeholders[1]['features']) ], pos_weight=[ placeholders[0]['pos_weight'], placeholders[1]['pos_weight'] ], norm=[placeholders[0]['norm'], placeholders[1]['norm']], fake_logits=model.fake_logits, alpha=FLAGS.AX_alpha) real_X = tf.placeholder(tf.float32, shape=[None, FLAGS.g_hidden2]) fake_X = tf.placeholder(tf.float32, shape=[None, FLAGS.g_hidden2]) real_logits, fake_logits = self.discriminator(real_X, fake_X) real_prob = tf.reduce_mean(real_logits) fake_prob = tf.reduce_mean(fake_logits) D_loss = -real_prob + fake_prob dis_optimizer = tf.train.AdamOptimizer( learning_rate=FLAGS.learning_rate_dis) # Adam Optimizer opt_dis = dis_optimizer.minimize(D_loss, var_list=d_vars) sess.run(tf.global_variables_initializer()) final_emb1 = [] final_emb2 = [] emb1_id = [] emb2_id = [] local_A_1 = adj1 local_X_1 = features1 local_A_2 = adj2 local_X_2 = features2 adj_norm_1 = preprocess_graph(local_A_1) local_X_1 = sparse_to_tuple(local_X_1.tocoo()) pos_weight_1 = float(local_A_1.shape[0] * local_A_1.shape[0] - local_A_1.sum()) / local_A_1.sum() adj_label_1 = local_A_1 + sp.eye(local_A_1.shape[0]) adj_label_1 = sparse_to_tuple(adj_label_1) norm_1 = local_A_1.shape[0] * local_A_1.shape[0] / float( (local_A_1.shape[0] * local_A_1.shape[0] - local_A_1.sum()) * 2) adj_norm_2 = preprocess_graph(local_A_2) local_X_2 = sparse_to_tuple(local_X_2.tocoo()) pos_weight_2 = float(local_A_2.shape[0] * local_A_2.shape[0] - local_A_2.sum()) / local_A_2.sum() adj_label_2 = local_A_2 + sp.eye(local_A_2.shape[0]) adj_label_2 = sparse_to_tuple(adj_label_2) norm_2 = local_A_2.shape[0] * local_A_2.shape[0] / float( (local_A_2.shape[0] * local_A_2.shape[0] - local_A_2.sum()) * 2) self.tmp_count = {} for epoch in range(FLAGS.epoch): for circle_epoch in range(FLAGS.circle_epoch): for G_epoch in range(FLAGS.g_epoch): # ------------------------------------------------------------------------------------------ feed_dict = construct_feed_dict( [adj_norm_2, adj_norm_1], [adj_label_2, adj_label_1], [local_X_2, local_X_1], [pos_weight_2, pos_weight_1], [norm_2, norm_1], placeholders) feed_dict.update( {placeholders[0]['D_W1']: sess.run(self.D_W1)}) feed_dict.update( {placeholders[0]['D_W2']: sess.run(self.D_W2)}) feed_dict.update( {placeholders[0]['D_b1']: sess.run(self.D_b1)}) feed_dict.update( {placeholders[0]['D_b2']: sess.run(self.D_b2)}) _, embeddings1_, embeddings2_, gcn_cost, fake_prob_, attr_cost = sess.run( [ opt.opt_op, model.embeddings1, model.embeddings2_, opt.cost, model.fake_prob, opt.attribute_cost ], feed_dict=feed_dict) for D_epoch in range(FLAGS.d_epoch): feed_dict.update( {placeholders[0]['dropout']: FLAGS.dropout}) emb1, emb2 = sess.run( [model.embeddings1, model.embeddings2_], feed_dict=feed_dict) _, real_prob_, fake_prob_ = sess.run( [opt_dis, real_prob, fake_prob], feed_dict={ real_X: emb1, fake_X: emb2 }) if epoch % 1 == 0: emb1, emb2 = sess.run([model.embeddings1, model.embeddings2_], feed_dict=feed_dict) final_emb1 = np.array(emb1) final_emb2 = np.array(emb2) similar_matrix = cosine_similarity(final_emb1, final_emb2) self.similar_matrix = similar_matrix pair = {} gnd = np.loadtxt("data/douban_truth.emb") count = {} topk = [1, 5, 10, 20, 30, 50] for i in range(len(topk)): pair[topk[i]] = [] count[topk[i]] = 0 self.tmp_count[topk[i]] = 0 for top in topk: for index in range(similar_matrix.shape[0]): top_index = heapq.nlargest( int(top), range(len(similar_matrix[index])), similar_matrix[index].take) top_index = list(map(lambda x: x + 1, top_index)) pair[top].append([index + 1, top_index]) for ele_1 in gnd: for ele_2 in pair[top]: if ele_1[0] == ele_2[0]: if ele_1[1] in ele_2[1]: count[top] += 1 print( f'-----------------------epoch {epoch}------------------------' ) for top in topk: print("top", '%02d' % (top), "count=", '%d' % (count[top]), "precision=", "{:.5f}".format(count[top] / len(gnd))) print( f'-----------------------epoch {epoch}------------------------' )
index = epoch % number_of_slices return edges[index * args.subsample_number:(index + 1) * args.subsample_number] for exp in range(10): args.model = 'NLGF' print('model= ' + str(args.model)) print('dataset=' + str(args.dataset)) print('learning rate= ' + str(args.learning_rate)) print('epoch= ' + str(args.num_epoch)) print('subsample_number=' + str(args.subsample_number)) print('hidden1_dim=' + str(args.hidden1_dim)) adj, features = load_data(args.dataset) # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj_train, train_edges, train_false_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges( adj) adj = adj_train # Some preprocessing adj_norm = preprocess_graph(adj)
import time import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' from constants import batch_size, epochs, dropout, variables_device,\ sequence_length, learning_rate, display_steps,\ prediction_length, processing_device from funcs import defineVariables, preActivation, activation company_str = input("Enter company name for training: ") while not os.path.exists("../csv-data/gainers/" + company_str + ".NS.csv"): print "Company not found" company_str = input("Enter company name for training: ") company = input_data.load_data(company=company_str) # placeholders seq_input = tf.placeholder(tf.float32, shape=(None, sequence_length, 4), name="input_to_lstm") seq_output = tf.placeholder(tf.float32, shape=(None, 4 * prediction_length), name="output_of_model") with tf.device(variables_device): # weights fc_weights = { 'wfc1': defineVariables([120, 80], "wfc1"), 'wfc2': defineVariables([80, 64], "wfc2"),
print ('WeightedCE: ' + str(FLAGS.weighted_ce)) print ('ReconstructX: ' + str(FLAGS.reconstruct_x)) model_str = FLAGS.model dataset_str = FLAGS.dataset print (model_str) if (model_str == 'dglfrm' or model_str == 'dglfrm_b'): if (len(FLAGS.hidden.split('_')) < 2): sys.exit("The truncation parameter missing. Specify '--hidden <layer_1>_<truncation_parameter>'") save_dir = './data/' + dataset_str +'/split_'+ str(FLAGS.split_idx) + '/' + model_str + "/" + FLAGS.hidden + "/" if not os.path.exists(save_dir): os.makedirs(save_dir) # Load data. Raw adj is NxN Matrix and Features is NxF Matrix. Using sparse matrices here (See scipy docs). adj, features, feature_presence = load_data(dataset_str) # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() print ("Adj Original Matrix: " + str(adj_orig.shape)) print ("Features Shape: " + str(features.shape)) features_shape = features.shape[0] if FLAGS.features == 0: features = sp.identity(features_shape) # featureless pos_weight_feats = float(features.shape[0] * features.shape[1] - features.sum()) / features.sum() # (N) / P norm_feats = features.shape[0] * features.shape[1] / float((features.shape[0] * features.shape[1] - features.sum()) * 2) # (N+P) / (N)
def main(data_dir): # load data meta, train_data, test_data = input_data.load_data(data_dir, flatten=True) print 'data loaded. train images: %s. test images: %s' % ( train_data.images.shape[0], test_data.images.shape[0]) LABEL_SIZE = meta['label_size'] IMAGE_WIDTH = meta['width'] IMAGE_HEIGHT = meta['height'] IMAGE_SIZE = IMAGE_WIDTH * IMAGE_HEIGHT print 'label_size: %s, image_size: %s' % (LABEL_SIZE, IMAGE_SIZE) # variable in the graph for input data with tf.name_scope('input'): x = tf.placeholder(tf.float32, [None, IMAGE_SIZE]) y_ = tf.placeholder(tf.float32, [None, LABEL_SIZE]) variable_summaries(x) variable_summaries(y_) # must be 4-D with shape `[batch_size, height, width, channels]` images_shaped_input = tf.reshape(x, [-1, IMAGE_HEIGHT, IMAGE_WIDTH, 1]) tf.summary.image('input', images_shaped_input, max_outputs=LABEL_SIZE * 2) # define the model # Adding a name scope ensures logical grouping of the layers in the graph. with tf.name_scope('linear_model'): with tf.name_scope('W'): W = tf.Variable(tf.zeros([IMAGE_SIZE, LABEL_SIZE])) variable_summaries(W) with tf.name_scope('b'): b = tf.Variable(tf.zeros([LABEL_SIZE])) variable_summaries(b) with tf.name_scope('y'): y = tf.matmul(x, W) + b tf.summary.histogram('y', y) # Define loss and optimizer # Returns: # A 1-D `Tensor` of length `batch_size` # of the same type as `logits` with the softmax cross entropy loss. with tf.name_scope('loss'): diff = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y) cross_entropy = tf.reduce_mean(diff) train_step = tf.train.GradientDescentOptimizer(0.5).minimize( cross_entropy) variable_summaries(diff) # forword prop predict = tf.argmax(y, axis=1) expect = tf.argmax(y_, axis=1) # evaluate accuracy with tf.name_scope('evaluate_accuracy'): correct_prediction = tf.equal(predict, expect) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) variable_summaries(accuracy) with tf.Session() as sess: merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(LOG_DIR + '/train', sess.graph) tf.global_variables_initializer().run() # Train for i in range(MAX_STEPS): batch_xs, batch_ys = train_data.next_batch(BATCH_SIZE) train_summary, _ = sess.run([merged, train_step], feed_dict={ x: batch_xs, y_: batch_ys }) train_writer.add_summary(train_summary, i) if i % 100 == 0: # Test trained model test_summary, r = sess.run([merged, accuracy], feed_dict={ x: test_data.images, y_: test_data.labels }) train_writer.add_summary(test_summary, i) print 'step = %s, accuracy = %.2f%%' % (i, r * 100) train_writer.close() # final check after looping test_summary, r_test = sess.run([merged, accuracy], feed_dict={ x: test_data.images, y_: test_data.labels }) train_writer.add_summary(test_summary, i) print 'testing accuracy = %.2f%%' % (r_test * 100, )
def format_data(data_name, seq_len, time_decay): # Load data adjs, features = load_data(data_name, time_decay) # Store original adjacency matrix (without diagonal entries) for later adj_origs = [] pos_weights = [] norms = [] adj_norms = [] features_sp = [] features_nonzeros = [] num_node = np.array(adjs[0]).shape[1] feature_dim = np.array(features[0]).shape[1] for adj, feature in zip(adjs, features): adj_orig = sparse_to_tuple(adj) pos_weight = float(num_node * num_node - adj_orig[1].sum()) / adj_orig[1].sum() norm = num_node * num_node / float( (num_node * num_node - adj_orig[1].sum()) * 2) feature = sparse_to_tuple(feature) features_nonzero = feature[1].shape[0] adj_norm = preprocess_graph(adj) adj_origs.append(adj_orig) pos_weights.append(pos_weight) norms.append(norm) features_sp.append(feature) features_nonzeros.append(features_nonzero) adj_norms.append(adj_norm) batch_size = len(adj_origs) - seq_len temporal_adj_origs = [] temporal_pos_weights = [] temporal_norms = [] struct_adj_origs = [] struct_pos_weights = [] struct_norms = [] struct_adj_norms = [] struct_features = [] struct_features_nonzeros = [] for i in range(batch_size): temporal_adj_origs.append(adj_origs[i + 1:i + 1 + seq_len]) temporal_pos_weights.append(pos_weights[i + 1:i + 1 + seq_len]) temporal_norms.append(norms[i + 1:i + 1 + seq_len]) struct_adj_origs.append(adj_origs[i:i + seq_len]) struct_pos_weights.append(pos_weights[i:i + seq_len]) struct_norms.append(norms[i:i + seq_len]) struct_adj_norms.append(adj_norms[i:i + seq_len]) struct_features.append(features_sp[i:i + seq_len]) struct_features_nonzeros.append(features_nonzeros[i:i + seq_len]) # temporal_adj_origs = adj_origs[1: 1+seq_len] # temporal_pos_weights = pos_weights[1: 1+seq_len] # temporal_norms = norms[1: 1+seq_len] # # struct_adj_origs = adj_origs[0: 0+seq_len] # struct_pos_weights = pos_weights[0: 0+seq_len] # struct_norms = norms[0: 0+seq_len] # struct_adj_norms = adj_norms[0: 0+seq_len] # struct_features = features_sp[0: 0+seq_len] # struct_features_nonzeros = features_nonzeros[0: 0+seq_len] feas = { 'temporal_adj_origs': temporal_adj_origs, 'temporal_pos_weights': temporal_pos_weights, 'temporal_norms': temporal_norms, 'num_node': num_node, 'feature_dim': feature_dim, 'batch_size': batch_size, 'struct_adj_origs': struct_adj_origs, 'struct_features': struct_features, 'struct_features_nonzeros': struct_features_nonzeros, 'struct_adj_norms': struct_adj_norms, 'struct_pos_weights': struct_pos_weights, 'struct_norms': struct_norms, 'adj_norms': adj_norms, 'features': features_sp } return feas
def web_main(): adj, features = load_data(args.dataset) features = sparse_to_tuple(features.tocoo()) # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) adj_orig.eliminate_zeros() adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges( adj) adj = adj_train # # Create model # graph = dgl.from_scipy(adj) # graph.add_self_loop() # Some preprocessing adj_normalization, adj_norm = preprocess_graph(adj) # Create model graph = dgl.from_scipy(adj_normalization) graph.add_self_loop() # Create Model pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum() norm = adj.shape[0] * adj.shape[0] / float( (adj.shape[0] * adj.shape[0] - adj.sum()) * 2) adj_label = adj_train + sp.eye(adj_train.shape[0]) adj_label = sparse_to_tuple(adj_label) adj_norm = torch.sparse.FloatTensor(torch.LongTensor(adj_norm[0].T), torch.FloatTensor(adj_norm[1]), torch.Size(adj_norm[2])) adj_label = torch.sparse.FloatTensor(torch.LongTensor(adj_label[0].T), torch.FloatTensor(adj_label[1]), torch.Size(adj_label[2])) features = torch.sparse.FloatTensor(torch.LongTensor(features[0].T), torch.FloatTensor(features[1]), torch.Size(features[2])) weight_mask = adj_label.to_dense().view(-1) == 1 weight_tensor = torch.ones(weight_mask.size(0)) weight_tensor[weight_mask] = pos_weight features = features.to_dense() in_dim = features.shape[-1] vgae_model = model.VGAEModel(in_dim, args.hidden1, args.hidden2) # create training component optimizer = torch.optim.Adam(vgae_model.parameters(), lr=args.learning_rate) print('Total Parameters:', sum([p.nelement() for p in vgae_model.parameters()])) def get_scores(edges_pos, edges_neg, adj_rec): def sigmoid(x): return 1 / (1 + np.exp(-x)) # Predict on test set of edges preds = [] pos = [] for e in edges_pos: # print(e) # print(adj_rec[e[0], e[1]]) preds.append(sigmoid(adj_rec[e[0], e[1]].item())) pos.append(adj_orig[e[0], e[1]]) preds_neg = [] neg = [] for e in edges_neg: preds_neg.append(sigmoid(adj_rec[e[0], e[1]].data)) neg.append(adj_orig[e[0], e[1]]) preds_all = np.hstack([preds, preds_neg]) labels_all = np.hstack([np.ones(len(preds)), np.zeros(len(preds_neg))]) roc_score = roc_auc_score(labels_all, preds_all) ap_score = average_precision_score(labels_all, preds_all) return roc_score, ap_score def get_acc(adj_rec, adj_label): labels_all = adj_label.to_dense().view(-1).long() preds_all = (adj_rec > 0.5).view(-1).long() accuracy = (preds_all == labels_all).sum().float() / labels_all.size(0) return accuracy # create training epoch for epoch in range(args.epochs): t = time.time() # Training and validation using a full graph vgae_model.train() logits = vgae_model.forward(graph, features) # compute loss loss = norm * F.binary_cross_entropy(logits.view(-1), adj_label.to_dense().view(-1), weight=weight_tensor) kl_divergence = 0.5 / logits.size(0) * ( 1 + 2 * vgae_model.log_std - vgae_model.mean**2 - torch.exp(vgae_model.log_std)**2).sum(1).mean() loss -= kl_divergence # backward optimizer.zero_grad() loss.backward() optimizer.step() train_acc = get_acc(logits, adj_label) val_roc, val_ap = get_scores(val_edges, val_edges_false, logits) # Print out performance print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(loss.item()), "train_acc=", "{:.5f}".format(train_acc), "val_roc=", "{:.5f}".format(val_roc), "val_ap=", "{:.5f}".format(val_ap), "time=", "{:.5f}".format(time.time() - t)) test_roc, test_ap = get_scores(test_edges, test_edges_false, logits) print("End of training!", "test_roc=", "{:.5f}".format(test_roc), "test_ap=", "{:.5f}".format(test_ap))
def main(_): # load data meta, train_data, test_data = input_data.load_data(FLAGS.data_dir, flatten=False) print 'data loaded' print 'train images: %s. test images: %s' % (train_data.images.shape[0], test_data.images.shape[0]) LABEL_SIZE = meta['label_size'] IMAGE_HEIGHT = meta['height'] IMAGE_WIDTH = meta['width'] IMAGE_SIZE = IMAGE_WIDTH * IMAGE_HEIGHT print 'label_size: %s, image_size: %s' % (LABEL_SIZE, IMAGE_SIZE) # variable in the graph for input data with tf.name_scope('input'): x = tf.placeholder(tf.float32, [None, IMAGE_HEIGHT, IMAGE_WIDTH]) y_ = tf.placeholder(tf.float32, [None, LABEL_SIZE]) # must be 4-D with shape `[batch_size, height, width, channels]` x_image = tf.reshape(x, [-1, IMAGE_HEIGHT, IMAGE_WIDTH, 1]) tf.summary.image('input', x_image, max_outputs=LABEL_SIZE) # define the model with tf.name_scope('convolution-layer-1'): W_conv1 = weight_variable([7, 7, 1, 32]) b_conv1 = bias_variable([32]) h_conv1 = tf.nn.tanh(conv2d(x_image, W_conv1) + b_conv1) h_pool1 = max_pool_2x2(h_conv1) with tf.name_scope('convolution-layer-2'): W_conv2 = weight_variable([7, 7, 32, 64]) b_conv2 = bias_variable([64]) h_conv2 = tf.nn.tanh(conv2d(h_pool1, W_conv2) + b_conv2) h_pool2 = max_pool_2x2(h_conv2) with tf.name_scope('densely-connected'): W_fc1 = weight_variable([IMAGE_WIDTH * IMAGE_HEIGHT * 4, 1024]) b_fc1 = bias_variable([1024]) h_pool2_flat = tf.reshape(h_pool2, [-1, IMAGE_WIDTH * IMAGE_HEIGHT * 4]) h_fc1 = tf.nn.tanh(tf.matmul(h_pool2_flat, W_fc1) + b_fc1) with tf.name_scope('dropout'): # To reduce overfitting, we will apply dropout before the readout layer keep_prob = tf.placeholder(tf.float32) h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) with tf.name_scope('readout'): W_fc2 = weight_variable([1024, LABEL_SIZE]) b_fc2 = bias_variable([LABEL_SIZE]) y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 # Define loss and optimizer # Returns: # A 1-D `Tensor` of length `batch_size` # of the same type as `logits` with the softmax cross entropy loss. with tf.name_scope('loss'): cross_entropy = tf.reduce_mean( # -tf.reduce_sum(y_ * tf.log(y_conv), reduction_indices=[1])) tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv)) train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) variable_summaries(cross_entropy) # forword prop predict = tf.argmax(y_conv, axis=1) expect = tf.argmax(y_, axis=1) # evaluate accuracy with tf.name_scope('evaluate_accuracy'): correct_prediction = tf.equal(predict, expect) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) variable_summaries(accuracy) with tf.Session() as sess: merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(LOG_DIR + '/train', sess.graph) test_writer = tf.summary.FileWriter(LOG_DIR + '/test', sess.graph) tf.global_variables_initializer().run() # Train for i in range(MAX_STEPS): batch_xs, batch_ys = train_data.next_batch(BATCH_SIZE) step_summary, _ = sess.run([merged, train_step], feed_dict={ x: batch_xs, y_: batch_ys, keep_prob: 1.0 }) train_writer.add_summary(step_summary, i) if i % 100 == 0: # Test trained model valid_summary, train_accuracy = sess.run([merged, accuracy], feed_dict={ x: batch_xs, y_: batch_ys, keep_prob: 1.0 }) train_writer.add_summary(valid_summary, i) # final check after looping test_x, test_y = test_data.next_batch(2000) test_summary, test_accuracy = sess.run([merged, accuracy], feed_dict={ x: test_x, y_: test_y, keep_prob: 1.0 }) test_writer.add_summary(test_summary, i) print 'step %s, training accuracy = %.2f%%, testing accuracy = %.2f%%' % ( i, train_accuracy * 100, test_accuracy * 100) train_writer.close() test_writer.close() # final check after looping test_x, test_y = test_data.next_batch(2000) test_accuracy = accuracy.eval(feed_dict={ x: test_x, y_: test_y, keep_prob: 1.0 }) print 'testing accuracy = %.2f%%' % (test_accuracy * 100, )
GAE_l_roc = [] GAE_l_ap = [] GAE_l_acc = [] AGAE_l_roc = [] AGAE_l_ap = [] AGAE_l_acc = [] p = 0.01 attrNoise = 0.2 m = 10 for i in range(FLAGS.num_experiments): # Load data if dataset_str == 'synthetic': adj, features = get_synthetic_data(p=p, attrNoise=attrNoise, m=m) else: adj, features = load_data(dataset_str) # Store original adjacency matrix (without diagonal entries) for later adj_orig = adj # sparse matrix # adj_orig.diagonal()[np.newaxis, :] row vector adj_orig = adj_orig - sp.dia_matrix( (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape) # set the diagnal elements to 0 adj_orig.eliminate_zeros( ) # sparse matrix should not contain entries equals 0. So always call eliminate_zeros() after an update. adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false = mask_test_edges( adj, test_percent=10., val_percent=5.) adj = adj_train # This is the adj matrix that masked out all validation and testing entries. #print(adj_train.shape)
# Default settings class args: data_dir = "BSNIP_left_full/" hidden_dim_1 = 100 hidden_dim_2 = 50 hidden_dim_3 = 5 batch_size = 32 learning_rate = 0.0001 kl_coefficient = 0.0001 activation = 'tanh' dropout = 0. # Load data adj = load_data("./data/" + args.data_dir + "original.npy") for sub in adj: np.fill_diagonal(sub, 1) # Normalize adjacency matrix (i.e. D^(.5)AD^(.5)) adj_norm = normalize_adj(adj) num_nodes = adj.shape[1] # CHANGE TO features.shape[1] LATER num_features = adj.shape[1] # Define placeholders placeholders = { 'features':