def __init__(self, args): self.args = args with open(args.config, 'r') as stream: config = yaml.load(stream, Loader=yaml.SafeLoader) self.config = config[self.args.task] if args.model_type == 'transformer': self.data_utils = DataUtils(self.config, args.train, args.task) elif args.model_type == 'bert': assert args.task == 'seq2seq' self.data_utils = bert_utils(self.config, args.train, args.task) if args.train and args.save_checkpoints: self.model_dir = make_save_dir( os.path.join(args.model_dir, args.task, args.exp_name)) self._disable_comet = args.disable_comet self._model_type = args.model_type self._save_checkpoints = args.save_checkpoints ###### loading .... ###### print("====================") print("start to build model") print('====================') vocab_size = self.data_utils.vocab_size print("Vocab Size: %d" % (vocab_size)) self.model = self.make_model(src_vocab=vocab_size, tgt_vocab=vocab_size, config=self.config['model'])
def train(args): graph_file = './data/%s/%s.npz' % (args.name, args.name) graph_file = graph_file.replace('.npz', '_train.npz') data_loader = DataUtils(graph_file) n = args.n_trials res_hom, res_het = [0] * n, [0] * n tm = [0] * n for i in range(n): tm[i] = TrialManager(args=copy.deepcopy(args), ind=i, data_loader=data_loader) import tensorflow tf = tensorflow.compat.v1 sess = tf.Session() tf.global_variables_initializer().run(session=sess) losses = [] with sess.as_default(): for b in range(1, args.num_batches + 1): fd = {} to_comp = [] for to_comp1, fd1 in map(train_batch_command, tm): to_comp.extend(to_comp1) for k, v in fd1.items(): fd[k] = v res = sess.run(to_comp, feed_dict=fd) losses.append(res[0::2]) if (b % 25) == 0: losses = np.array(losses) for i in range(n): res, val_hom_auc = tm[i].test() best_test_hom_auc, best_test_het_auc = res['hom'], res[ 'het'] res_hom[i], res_het[ i] = best_test_hom_auc * 100, best_test_het_auc * 100 print( f'batch:{b:8} - ' f'time:{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())} - ' f'loss:{np.mean(losses[:, i]):.4f} - ' f'val(hom):{val_hom_auc*100:.4f} - ' f'test(by best val):[hom:{best_test_hom_auc:.4f},het:{best_test_het_auc:.4f}]' ) losses = [] print('finished') def stats(x): return f'{np.mean(x):.2f}, {np.std(x) / np.sqrt(len(x)):.2f}' print('hom', stats(res_hom), [f'{xx:.2f}' for xx in res_hom]) print('het', stats(res_het), [f'{xx:.2f}' for xx in res_het])
from algorithms import * from utils import DataUtils if __name__ == '__main__': logging.basicConfig(level=logging.INFO) data = DataUtils('data/基站经纬度.csv', 'data/上网信息输出表(日表)6月15号之后.csv') mip_placer = MIPServerPlacer(data.base_stations, data.distances) mip_placer.place_server(10, 2) print(mip_placer.objective_latency(), mip_placer.objective_workload()) # kmeans_placer = KMeansServerPlacement(data.base_stations, data.distances) # kmeans_placer.place_server(300, 30) # print(kmeans_placer.objective_latency(), kmeans_placer.objective_workload()) # top_k_placer = TopKServerPlacement(data.base_stations, data.distances) # top_k_placer.place_server(300, 30) # print(top_k_placer.objective_latency(), top_k_placer.objective_workload()) # random_placer = RandomServerPlacement(data.base_stations, data.distances) # random_placer.place_server(300, 30) # print(random_placer.objective_latency(), random_placer.objective_workload()) pass
def load_data(self): du = DataUtils(self.cfg) self.train = du.train self.dev = du.dev
def train(args): graph_file = '/Users/bhagya/PycharmProjects/Old data/line-master data/%s/%s.npz' % (args.name, args.name) graph_file = graph_file.replace('.npz', '_train.npz') if not args.is_all else graph_file data_loader = DataUtils(graph_file, args.is_all) suffix = args.proximity args.X = data_loader.X if args.suf != 'oh' else sp.identity(data_loader.X.shape[0]) if not args.is_all: args.val_edges = data_loader.val_edges args.val_ground_truth = data_loader.val_ground_truth m = args.model name = m + '_' + args.name if m == 'lace': model = LACE(args) elif 'glace' == m: model = GLACE(args) with tf.Session() as sess: print('-------------------------- ' + m + ' --------------------------') if model.val_set: print('batches\tloss\tval_auc\tval_ap\tsampling time\ttraining_time\tdatetime') else: print('batches\tloss\tsampling time\ttraining_time\tdatetime') tf.global_variables_initializer().run() sampling_time, training_time = 0, 0 for b in range(args.num_batches): t1 = time.time() u_i, u_j, label, w = data_loader.fetch_next_batch(batch_size=args.batch_size, K=args.K) feed_dict = {model.u_i: u_i, model.u_j: u_j, model.label: label} t2 = time.time() sampling_time += t2 - t1 loss, _ = sess.run([model.loss, model.train_op], feed_dict=feed_dict) training_time += time.time() - t2 if model.val_set: if b % 50 == 0: val_energy = sess.run(model.neg_val_energy) val_auc, val_ap = score_link_prediction(data_loader.val_ground_truth, val_energy) print('%d\t%f\t%f\t%f\t%0.2f\t%0.2f\t%s' % (b, loss, val_auc, val_ap, sampling_time, training_time, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) sampling_time, training_time = 0, 0 else: if b % 50 == 0: print('%d\t%f\t%0.2f\t%0.2f\t%s' % (b, loss, sampling_time, training_time, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) sampling_time, training_time = 0, 0 if b % 50 == 0 or b == (args.num_batches - 1): if m == 'glace': mu, sigma = sess.run([model.embedding, model.sigma]) pickle.dump({'mu': data_loader.embedding_mapping(mu), 'sigma': data_loader.embedding_mapping(sigma)}, open('emb/%s%s_embedding_%s.pkl' % (name, '_all' if args.is_all else '', suffix), 'wb')) # if model.val_set: # r = kl_link_pred(mu, sigma, test_edges) # print('{:.4f}, {:.4f}'.format(r[0], r[1])) else: embedding = sess.run(model.embedding) pickle.dump(data_loader.embedding_mapping(embedding), open('emb/%s%s_embedding_%s.pkl' % (name, '_all' if args.is_all else '', suffix), 'wb'))