コード例 #1
0
def train_embed(data_dir, params, model_name):
    # ハイパラ読み込み
    embedding_dim = params['embedding_dim']
    batch_size = params['batch_size']
    lr = params['lr']
    weight_decay = params['weight_decay']
    #warmup = params['warmup']
    warmup = 350
    #lr_decay_every = params['lr_decay_every']
    lr_decay_every = 2
    lr_decay_rate = params['lr_decay_rate']
    if model_name == 'SparseTransE':
        alpha = params['alpha']
    
    # dataload
    dataset = AmazonDataset(data_dir, model_name='TransE')
    relation_size = len(set(list(dataset.triplet_df['relation'].values)))
    entity_size = len(dataset.entity_list)
    if model_name == 'TransE':
        model = TransE(int(embedding_dim), relation_size, entity_size).to(device)
    elif model_name == 'SparseTransE':
        model = SparseTransE(int(embedding_dim), relation_size, entity_size, alpha=alpha).to(device)
    iterater = TrainIterater(batch_size=int(batch_size), data_dir=data_dir, model_name=model_name)
    #iterater.iterate_epoch(model, lr=lr, epoch=3000, weight_decay=weight_decay, warmup=warmup,
    #                       lr_decay_rate=lr_decay_rate, lr_decay_every=lr_decay_every, eval_every=1e+5)
    iterater.iterate_epoch(model, lr=lr, epoch=3000, weight_decay=weight_decay, warmup=warmup,
                           lr_decay_rate=lr_decay_rate, lr_decay_every=lr_decay_every, eval_every=1e+5, 
                           early_stop=True)
    return model
コード例 #2
0
def objective(trial):
    start = time.time()
    import gc
    gc.collect()

    data_dir = ['../' + data_path + '/valid1', '../' + data_path + '/valid2']
    score_sum = 0

    # hyper para
    embedding_dim = trial.suggest_discrete_uniform('embedding_dim', 16, 128,
                                                   16)
    #alpha = trial.suggest_loguniform('alpha', 1e-6, 1e-2) #SparseTransEの時だけ
    batch_size = trial.suggest_int('batch_size', 128, 512, 128)
    lr = trial.suggest_loguniform('lr', 1e-4, 1e-2)
    weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-2)
    #warmup = trial.suggest_int('warmup', 100, 500)
    warmup = trial.suggest_int('warmup', 10, 100)
    #warmup = 350
    #lr_decay_every = trial.suggest_int('lr_decay_every', 1, 10)
    lr_decay_every = 2
    lr_decay_rate = trial.suggest_uniform('lr_decay_rate', 0.5, 1)

    for dir_path in data_dir:
        # データ読み込み
        dataset = AmazonDataset(dir_path, model_name='SparseTransE')

        relation_size = len(set(list(dataset.triplet_df['relation'].values)))
        entity_size = len(dataset.entity_list)
        model = TransE(int(embedding_dim), relation_size,
                       entity_size).to(device)
        iterater = TrainIterater(batch_size=int(batch_size),
                                 data_dir=dir_path,
                                 model_name=model_name)

        score = iterater.iterate_epoch(model,
                                       lr=lr,
                                       epoch=3000,
                                       weight_decay=weight_decay,
                                       warmup=warmup,
                                       lr_decay_rate=lr_decay_rate,
                                       lr_decay_every=lr_decay_every,
                                       eval_every=1e+5,
                                       early_stop=False)

        score_sum += score

    torch.cuda.empty_cache()

    mi, sec = time_since(time.time() - start)
    print('{}m{}sec'.format(mi, sec))

    return -1 * score_sum / 2
コード例 #3
0
def main(_):
    # Init
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    np.random.seed(1)
    random.seed(a=1, version=2)

    # Load configuration
    config = get_parameters(reproduce=None, gpu=-1)
    print(config)

    # Load Dataset
    data = KBDataset(config.data)
    print(data)

    # tensorflow config
    #tf_config = tf.ConfigProto(gpu_options=tf.GPUOptions(
    #    visible_device_list=str(config.gpu)))
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    sess = tf.Session(config=tf_config)
    

    # Model Loading
    if config.model == "transe":
        model = TransE(config, data.nent, data.nrel)
    else:
        model = TorusE(config, data.nent, data.nrel)
    optimizer = tf.train.GradientDescentOptimizer(config.lr)
    # global_step = tf.Variable(0, name="gb", trainable=False)
    cal_gradient = optimizer.compute_gradients(model.loss)
    train_opt = optimizer.apply_gradients(cal_gradient)

    # Config Saver and Session
    saver = tf.train.Saver(max_to_keep=100)
    sess.run(tf.global_variables_initializer())

    # Training
    base.train(data, model, train_opt, config, sess, saver)

    # Testing
    base.test(data, model, sess)
コード例 #4
0
        valid_data.extend(reci_valid)
        reci_test = DataManager.add_reciprocals(test_data, config)
        test_data.extend(reci_test)
    else:
        train_data_gcn, valid_data_gcn, test_data_gcn = None, None, None

    print(f"Training on {n_entities} entities")
    print(f"Evaluating on {n_entities - len(ent_excluded_from_corr)} entities")

    """
        Make the model.
    """
    config['DEVICE'] = torch.device(config['DEVICE'])

    if config['MODEL_NAME'].lower() == 'transe':
        model = TransE(config)
    elif config['MODEL_NAME'].lower() == 'convkb':
        model = ConvKB(config)
    elif config['MODEL_NAME'].lower() == 'kbgat':
        if config['PRETRAINED_DIRNUM'] != '':   # @TODO: how do we pull the models
            pretrained_models = ...
            raise NotImplementedError
        else:
            pretrained_models = None
        model = KBGat(config, pretrained_models)
    elif config['MODEL_NAME'].lower().startswith('compgcn'):
        # TODO when USE_TEST is true training data should include the validation set as well
        if config['MODEL_NAME'].lower().endswith('transe'):
            model = CompGCNTransE(train_data_gcn, config)
        elif config['MODEL_NAME'].lower().endswith('conve'):
            model = CompGCNConvE(train_data_gcn, config)
コード例 #5
0
ファイル: multi_run.py プロジェクト: creepdaz/DiCGRL
def main():
    args.data_dir = os.path.join(args.data_dir, args.dataset)
    args.output_dir = os.path.join(args.output_dir, args.dataset)

    if os.path.exists(args.output_dir) and os.listdir(args.output_dir):
        print("Output directory ({}) already exists and is not empty.".format(
            args.output_dir))
    else:
        os.makedirs(args.output_dir, exist_ok=True)

    CUDA = torch.cuda.is_available()
    if CUDA:
        print("using CUDA")

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    print("args = ", args)

    ori_model = 'None'
    ori_load = True

    for idx in range(args.s_N, args.N):
        data_idx = idx

        if args.all_data or args.up_bound:
            train_data, validation_data, test_data, entity2id, relation2id, sub_entity2id, test_sub_triples, valid_triples_list, valid_train_triples_list = \
                build_all_data(args.data_dir, seed=args.seed, up_bound=args.up_bound, data_idx=data_idx)
        else:
            train_data, validation_data, test_data, entity2id, relation2id, sub_entity2id, test_sub_triples, valid_triples_list, valid_train_triples_list = \
                build_data(args.data_dir, seed=args.seed, data_idx=data_idx,
                           test_idx=args.test_idx, process=args.process, low_th=args.low_th)

        entity_embeddings = np.random.randn(
            len(entity2id), args.embedding_size * args.k_factors)
        if "_" in args.model_name:
            relation_embeddings = np.random.randn(
                len(relation2id), args.embedding_size * args.top_n)
        else:
            relation_embeddings = np.random.randn(len(relation2id),
                                                  args.embedding_size)
        print("Initialised relations and entities randomly")

        entity_embeddings = torch.FloatTensor(entity_embeddings)
        relation_embeddings = torch.FloatTensor(relation_embeddings)
        print("Initial entity dimensions {} , relation dimensions {}".format(
            entity_embeddings.size(), relation_embeddings.size()))

        train_loader = Corpus(args, train_data, validation_data, test_data,
                              sub_entity2id, relation2id, args.batch_size,
                              args.valid_invalid_ratio, valid_triples_list,
                              valid_train_triples_list)

        file_name = "model_name_" + str(
            args.model_name
        ) + "_embedding_size_" + str(args.embedding_size) + "_lr_" + str(
            args.lr) + "_epochs_" + str(args.epochs) + "_k_factors_" + str(
                args.k_factors) + "_batch_size_" + str(
                    args.batch_size) + "_step_size_" + str(
                        args.step_size) + "_l1_" + str(
                            args.l1) + "_use_second_nei_" + str(
                                args.use_second_nei) + "_w1_" + str(
                                    args.w1) + "_up_bound_" + str(
                                        args.up_bound) + "_top_n_" + str(
                                            args.top_n) + "_att_lr_" + str(
                                                args.att_lr)

        if args.all_data:
            model_path = os.path.join(args.output_dir, file_name)
        else:
            model_path = os.path.join(args.output_dir, str(data_idx),
                                      file_name)

        if not os.path.exists(model_path):
            os.makedirs(model_path)

        if args.model_name == 'ConvKB':
            model = ConvKB(entity_embeddings, relation_embeddings, config=args)
        elif args.model_name == 'TransE':
            model = TransE(entity_embeddings, relation_embeddings, config=args)
        elif args.model_name == 'ConvKB_2':
            model = ConvKB_2(entity_embeddings,
                             relation_embeddings,
                             config=args)
        elif args.model_name == 'TransE_2':
            model = TransE_2(entity_embeddings,
                             relation_embeddings,
                             config=args)
        else:
            print("no such model name")

        print("load path", args.load)
        if args.load != 'None' and ori_load:
            model = load_model(model, args.load)
            print("model loaded")
            ori_load = False

        if ori_model != 'None':
            model = copy.deepcopy(ori_model)
            print("load model from", idx - 1)

        model.cuda()

        for name, param in model.named_parameters():
            #print("name", name)
            if param.requires_grad == False:
                print("False", name)
                param.requires_grad = True

        best_epoch = 0
        if args.evaluate == 0:
            best_epoch = train(args, train_loader, model, model_path, data_idx)
            ori_model = copy.deepcopy(model)
        evaluate(args,
                 model,
                 model_path,
                 train_loader,
                 file_name,
                 data_idx,
                 best_epoch=best_epoch,
                 test_sub_triples=test_sub_triples)
        evaluate(args,
                 model,
                 model_path,
                 train_loader,
                 file_name,
                 data_idx,
                 best_epoch=best_epoch,
                 test_sub_triples=test_sub_triples,
                 best_or_final='final')

        args.load = os.path.join(model_path, 'trained_final.pth')
コード例 #6
0
    elif amazon_data[0] == 'l':
        data_path = 'data_' + amazon_data + '_5core'

    params = load_params()

    import gc
    gc.collect()

    # dataload
    data_dir = '../' + data_path + '/test/'
    dataset = AmazonDataset(data_dir, model_name='TransE')

    relation_size = len(set(list(dataset.triplet_df['relation'].values)))
    entity_size = len(dataset.entity_list)
    embedding_dim = params['embedding_dim']
    model = TransE(int(embedding_dim), relation_size, entity_size).to(device)

    batch_size = params['batch_size']
    iterater = TrainIterater(batch_size=int(batch_size),
                             data_dir=data_dir,
                             model_name=model_name)

    lr = params['lr']
    weight_decay = params['weight_decay']

    warmup = 350
    lr_decay_every = 2
    lr_decay_rate = params['lr_decay_rate']

    score = iterater.iterate_epoch(model,
                                   lr=lr,
コード例 #7
0
ファイル: run.py プロジェクト: diweiqiang/DisenE
def main():

    args.data_dir = os.path.join(args.data_dir, args.dataset)
    args.output_dir = os.path.join(args.output_dir, args.dataset)

    if os.path.exists(args.output_dir) and os.listdir(args.output_dir):
        print("Output directory ({}) already exists and is not empty.".format(
            args.output_dir))
    else:
        os.makedirs(args.output_dir, exist_ok=True)

    CUDA = torch.cuda.is_available()

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    print("args = ", args)

    train_data, validation_data, test_data, entity2id, relation2id = build_data(
        args.data_dir)

    if args.pretrained_emb:
        entity_embeddings, relation_embeddings = init_embeddings(
            os.path.join(args.data_dir, 'entity2vec.txt'),
            os.path.join(args.data_dir, 'relation2vec.txt'), args.k_factors,
            args.embedding_size)
        print("Initialised relations and entities from TransE")

    else:
        entity_embeddings = np.random.randn(
            len(entity2id), args.embedding_size * args.k_factors)
        relation_embeddings = np.random.randn(len(relation2id),
                                              args.embedding_size)
        print("Initialised relations and entities randomly")

    entity_embeddings = torch.FloatTensor(entity_embeddings)
    relation_embeddings = torch.FloatTensor(relation_embeddings)
    print("Initial entity dimensions {} , relation dimensions {}".format(
        entity_embeddings.size(), relation_embeddings.size()))

    train_loader = Corpus(args, train_data, validation_data, test_data,
                          entity2id, relation2id, args.batch_size,
                          args.valid_invalid_ratio)

    file_name = "model_name_" + str(
        args.model_name
    ) + "_embedding_size_" + str(args.embedding_size) + "_k_factors_" + str(
        args.k_factors) + "_lr_" + str(args.lr) + "_epochs_" + str(
            args.epochs
        ) + "_out_channels_" + str(args.out_channels) + "_batch_size_" + str(
            args.batch_size) + "_dropout_" + str(
                args.dropout) + "_pretrained_emb_" + str(
                    args.pretrained_emb) + "_step_size_" + str(
                        args.step_size) + "_gamma_" + str(
                            args.gamma) + "_w1_" + str(args.w1) + "_w2_" + str(
                                args.w2) + "_sample_num_" + str(
                                    args.sample_num) + "_top_n_" + str(
                                        args.top_n)

    model_path = os.path.join(args.output_dir, file_name)
    output_file = os.path.join(args.output_dir,
                               "results_" + file_name + ".txt")

    if not os.path.exists(model_path):
        os.makedirs(model_path)

    if args.model_name == 'ConvKB':
        model = ConvKB(entity_embeddings, relation_embeddings, config=args)
    elif args.model_name == 'TransE':
        model = TransE(entity_embeddings, relation_embeddings, config=args)
    elif args.model_name == 'DisenE':
        model = DisenE(entity_embeddings, relation_embeddings, config=args)
    elif args.model_name == 'DisenE_Trans':
        model = DisenE_Trans(entity_embeddings,
                             relation_embeddings,
                             config=args)

    else:
        print("no such model name")

    if args.load != 'None':
        model.load_state_dict(torch.load(args.load))
        print("model loaded")

    if CUDA:
        print("using CUDA")
        model.cuda()

    best_epoch = 0
    if args.evaluate == 0:
        best_epoch = train(args, train_loader, model, CUDA, model_path)
    evaluate(args,
             model,
             model_path,
             train_loader,
             output_file,
             best_epoch=best_epoch,
             best_or_final='best')
    evaluate(args,
             model,
             model_path,
             train_loader,
             output_file,
             best_epoch=best_epoch,
             best_or_final='final')
コード例 #8
0
ファイル: run.py プロジェクト: mashabelyi/SubjectiveKB
	config['numTest'] = len(test)
	config['numEntities'] = len(all_ents)
	config['numRelations'] = len(all_rels)
	with open(os.path.join(args.name, 'config.json'), 'w') as f:
		json.dump(config, f, indent=2)
else:
	#TODO load config into config variable
	with open(os.path.join(args.name, 'config.json'), 'r') as f:
		config = json.load(f)


ent_pretrained, rel_pretrained = None, None
if args.pretrained:
	# load pretrained weights from transe checkpoint 
	print("loading pretrained weights")
	model = TransE(len(rel2id), len(ent2id), dim=config['embedding_dim'], norm=config['norm'])
	model.load_state_dict(torch.load(args.pretrained))

	ent_pretrained = model.ents.weight.data
	rel_pretrained = model.rels.weight.data


# =========================================
# Initialize MODEL
# =========================================
if args.model == 'transE':
	model = TransE(len(rel2id), len(ent2id), dim=config['embedding_dim'], norm=config['norm'], margin=config['margin'], l2reg=config['l2reg'])
if args.model == 'transH':
	model = TransH(len(rel2id), len(ent2id), dim=config['embedding_dim'], norm=config['norm'], margin=config['margin'], l2reg=config['l2reg'])
elif args.model == 'subjD':
	model = SubjKB_Deviation(len(rel2id), len(ent2id), len(src2id), dim=config['embedding_dim'], norm=config['norm'], margin=config['margin'], l2reg=config['l2reg'], relPretrained=rel_pretrained, entPretrained=ent_pretrained)
コード例 #9
0
 def __init__(self, args):
     super(TransEModel, self).__init__(args)
     self.args = args
     self.model = TransE.TransE(args.modelparam)
     print(self.model)