Ejemplo n.º 1
0
    def __init__(self, args):
        self.args = args
        with open(args.config, 'r') as stream:
            config = yaml.load(stream, Loader=yaml.SafeLoader)
            self.config = config[self.args.task]
        if args.model_type == 'transformer':
            self.data_utils = DataUtils(self.config, args.train, args.task)
        elif args.model_type == 'bert':
            assert args.task == 'seq2seq'
            self.data_utils = bert_utils(self.config, args.train, args.task)
        if args.train and args.save_checkpoints:
            self.model_dir = make_save_dir(
                os.path.join(args.model_dir, args.task, args.exp_name))
        self._disable_comet = args.disable_comet
        self._model_type = args.model_type
        self._save_checkpoints = args.save_checkpoints

        ###### loading .... ######
        print("====================")
        print("start to build model")
        print('====================')
        vocab_size = self.data_utils.vocab_size
        print("Vocab Size: %d" % (vocab_size))
        self.model = self.make_model(src_vocab=vocab_size,
                                     tgt_vocab=vocab_size,
                                     config=self.config['model'])
def train(args):
    graph_file = './data/%s/%s.npz' % (args.name, args.name)
    graph_file = graph_file.replace('.npz', '_train.npz')
    data_loader = DataUtils(graph_file)

    n = args.n_trials
    res_hom, res_het = [0] * n, [0] * n
    tm = [0] * n
    for i in range(n):
        tm[i] = TrialManager(args=copy.deepcopy(args),
                             ind=i,
                             data_loader=data_loader)
    import tensorflow
    tf = tensorflow.compat.v1

    sess = tf.Session()
    tf.global_variables_initializer().run(session=sess)
    losses = []

    with sess.as_default():
        for b in range(1, args.num_batches + 1):
            fd = {}
            to_comp = []
            for to_comp1, fd1 in map(train_batch_command, tm):
                to_comp.extend(to_comp1)
                for k, v in fd1.items():
                    fd[k] = v
            res = sess.run(to_comp, feed_dict=fd)
            losses.append(res[0::2])
            if (b % 25) == 0:
                losses = np.array(losses)
                for i in range(n):
                    res, val_hom_auc = tm[i].test()
                    best_test_hom_auc, best_test_het_auc = res['hom'], res[
                        'het']
                    res_hom[i], res_het[
                        i] = best_test_hom_auc * 100, best_test_het_auc * 100
                    print(
                        f'batch:{b:8} - '
                        f'time:{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())} - '
                        f'loss:{np.mean(losses[:, i]):.4f} - '
                        f'val(hom):{val_hom_auc*100:.4f} - '
                        f'test(by best val):[hom:{best_test_hom_auc:.4f},het:{best_test_het_auc:.4f}]'
                    )
                losses = []
        print('finished')

    def stats(x):
        return f'{np.mean(x):.2f}, {np.std(x) / np.sqrt(len(x)):.2f}'

    print('hom', stats(res_hom), [f'{xx:.2f}' for xx in res_hom])
    print('het', stats(res_het), [f'{xx:.2f}' for xx in res_het])
Ejemplo n.º 3
0
from algorithms import *
from utils import DataUtils

if __name__ == '__main__':
    logging.basicConfig(level=logging.INFO)

    data = DataUtils('data/基站经纬度.csv', 'data/上网信息输出表(日表)6月15号之后.csv')
    mip_placer = MIPServerPlacer(data.base_stations, data.distances)
    mip_placer.place_server(10, 2)
    print(mip_placer.objective_latency(), mip_placer.objective_workload())
    # kmeans_placer = KMeansServerPlacement(data.base_stations, data.distances)
    # kmeans_placer.place_server(300, 30)
    # print(kmeans_placer.objective_latency(), kmeans_placer.objective_workload())
    # top_k_placer = TopKServerPlacement(data.base_stations, data.distances)
    # top_k_placer.place_server(300, 30)
    # print(top_k_placer.objective_latency(), top_k_placer.objective_workload())
    # random_placer = RandomServerPlacement(data.base_stations, data.distances)
    # random_placer.place_server(300, 30)
    # print(random_placer.objective_latency(), random_placer.objective_workload())
    pass
Ejemplo n.º 4
0
 def load_data(self):
     du = DataUtils(self.cfg)
     self.train = du.train
     self.dev = du.dev
Ejemplo n.º 5
0
def train(args):
    graph_file = '/Users/bhagya/PycharmProjects/Old data/line-master data/%s/%s.npz' % (args.name, args.name)
    graph_file = graph_file.replace('.npz', '_train.npz') if not args.is_all else graph_file
    data_loader = DataUtils(graph_file, args.is_all)

    suffix = args.proximity
    args.X = data_loader.X if args.suf != 'oh' else sp.identity(data_loader.X.shape[0])
    if not args.is_all:
        args.val_edges = data_loader.val_edges
        args.val_ground_truth = data_loader.val_ground_truth

    m = args.model
    name = m + '_' + args.name
    if m == 'lace':
        model = LACE(args)
    elif 'glace' == m:
        model = GLACE(args)

    with tf.Session() as sess:
        print('-------------------------- ' + m + ' --------------------------')
        if model.val_set:
            print('batches\tloss\tval_auc\tval_ap\tsampling time\ttraining_time\tdatetime')
        else:
            print('batches\tloss\tsampling time\ttraining_time\tdatetime')

        tf.global_variables_initializer().run()
        sampling_time, training_time = 0, 0

        for b in range(args.num_batches):
            t1 = time.time()
            u_i, u_j, label, w = data_loader.fetch_next_batch(batch_size=args.batch_size, K=args.K)
            feed_dict = {model.u_i: u_i, model.u_j: u_j, model.label: label}
            t2 = time.time()
            sampling_time += t2 - t1

            loss, _ = sess.run([model.loss, model.train_op], feed_dict=feed_dict)

            training_time += time.time() - t2

            if model.val_set:
                if b % 50 == 0:
                    val_energy = sess.run(model.neg_val_energy)
                    val_auc, val_ap = score_link_prediction(data_loader.val_ground_truth, val_energy)
                    print('%d\t%f\t%f\t%f\t%0.2f\t%0.2f\t%s' % (b, loss, val_auc, val_ap, sampling_time, training_time,
                                                                    time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
                    sampling_time, training_time = 0, 0
            else:
                if b % 50 == 0:
                    print('%d\t%f\t%0.2f\t%0.2f\t%s' % (b, loss, sampling_time, training_time,
                                                        time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())))
                    sampling_time, training_time = 0, 0

            if b % 50 == 0 or b == (args.num_batches - 1):
                if m == 'glace':
                    mu, sigma = sess.run([model.embedding, model.sigma])
                    pickle.dump({'mu': data_loader.embedding_mapping(mu),
                                 'sigma': data_loader.embedding_mapping(sigma)},
                                open('emb/%s%s_embedding_%s.pkl' % (name, '_all' if args.is_all else '', suffix), 'wb'))
                    # if model.val_set:
                    #     r = kl_link_pred(mu, sigma, test_edges)
                    #     print('{:.4f}, {:.4f}'.format(r[0], r[1]))
                else:
                    embedding = sess.run(model.embedding)
                    pickle.dump(data_loader.embedding_mapping(embedding),
                                open('emb/%s%s_embedding_%s.pkl' % (name, '_all' if args.is_all else '', suffix), 'wb'))