コード例 #1
0
ファイル: multi_run.py プロジェクト: creepdaz/DiCGRL
def main():
    args.data_dir = os.path.join(args.data_dir, args.dataset)
    args.output_dir = os.path.join(args.output_dir, args.dataset)

    if os.path.exists(args.output_dir) and os.listdir(args.output_dir):
        print("Output directory ({}) already exists and is not empty.".format(
            args.output_dir))
    else:
        os.makedirs(args.output_dir, exist_ok=True)

    CUDA = torch.cuda.is_available()
    if CUDA:
        print("using CUDA")

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    print("args = ", args)

    ori_model = 'None'
    ori_load = True

    for idx in range(args.s_N, args.N):
        data_idx = idx

        if args.all_data or args.up_bound:
            train_data, validation_data, test_data, entity2id, relation2id, sub_entity2id, test_sub_triples, valid_triples_list, valid_train_triples_list = \
                build_all_data(args.data_dir, seed=args.seed, up_bound=args.up_bound, data_idx=data_idx)
        else:
            train_data, validation_data, test_data, entity2id, relation2id, sub_entity2id, test_sub_triples, valid_triples_list, valid_train_triples_list = \
                build_data(args.data_dir, seed=args.seed, data_idx=data_idx,
                           test_idx=args.test_idx, process=args.process, low_th=args.low_th)

        entity_embeddings = np.random.randn(
            len(entity2id), args.embedding_size * args.k_factors)
        if "_" in args.model_name:
            relation_embeddings = np.random.randn(
                len(relation2id), args.embedding_size * args.top_n)
        else:
            relation_embeddings = np.random.randn(len(relation2id),
                                                  args.embedding_size)
        print("Initialised relations and entities randomly")

        entity_embeddings = torch.FloatTensor(entity_embeddings)
        relation_embeddings = torch.FloatTensor(relation_embeddings)
        print("Initial entity dimensions {} , relation dimensions {}".format(
            entity_embeddings.size(), relation_embeddings.size()))

        train_loader = Corpus(args, train_data, validation_data, test_data,
                              sub_entity2id, relation2id, args.batch_size,
                              args.valid_invalid_ratio, valid_triples_list,
                              valid_train_triples_list)

        file_name = "model_name_" + str(
            args.model_name
        ) + "_embedding_size_" + str(args.embedding_size) + "_lr_" + str(
            args.lr) + "_epochs_" + str(args.epochs) + "_k_factors_" + str(
                args.k_factors) + "_batch_size_" + str(
                    args.batch_size) + "_step_size_" + str(
                        args.step_size) + "_l1_" + str(
                            args.l1) + "_use_second_nei_" + str(
                                args.use_second_nei) + "_w1_" + str(
                                    args.w1) + "_up_bound_" + str(
                                        args.up_bound) + "_top_n_" + str(
                                            args.top_n) + "_att_lr_" + str(
                                                args.att_lr)

        if args.all_data:
            model_path = os.path.join(args.output_dir, file_name)
        else:
            model_path = os.path.join(args.output_dir, str(data_idx),
                                      file_name)

        if not os.path.exists(model_path):
            os.makedirs(model_path)

        if args.model_name == 'ConvKB':
            model = ConvKB(entity_embeddings, relation_embeddings, config=args)
        elif args.model_name == 'TransE':
            model = TransE(entity_embeddings, relation_embeddings, config=args)
        elif args.model_name == 'ConvKB_2':
            model = ConvKB_2(entity_embeddings,
                             relation_embeddings,
                             config=args)
        elif args.model_name == 'TransE_2':
            model = TransE_2(entity_embeddings,
                             relation_embeddings,
                             config=args)
        else:
            print("no such model name")

        print("load path", args.load)
        if args.load != 'None' and ori_load:
            model = load_model(model, args.load)
            print("model loaded")
            ori_load = False

        if ori_model != 'None':
            model = copy.deepcopy(ori_model)
            print("load model from", idx - 1)

        model.cuda()

        for name, param in model.named_parameters():
            #print("name", name)
            if param.requires_grad == False:
                print("False", name)
                param.requires_grad = True

        best_epoch = 0
        if args.evaluate == 0:
            best_epoch = train(args, train_loader, model, model_path, data_idx)
            ori_model = copy.deepcopy(model)
        evaluate(args,
                 model,
                 model_path,
                 train_loader,
                 file_name,
                 data_idx,
                 best_epoch=best_epoch,
                 test_sub_triples=test_sub_triples)
        evaluate(args,
                 model,
                 model_path,
                 train_loader,
                 file_name,
                 data_idx,
                 best_epoch=best_epoch,
                 test_sub_triples=test_sub_triples,
                 best_or_final='final')

        args.load = os.path.join(model_path, 'trained_final.pth')
コード例 #2
0
def main():
    args.data_dir = os.path.join(args.data_dir, args.dataset)
    args.output_dir = os.path.join(args.output_dir, args.dataset)

    if os.path.exists(args.output_dir) and os.listdir(args.output_dir):
        print("Output directory ({}) already exists and is not empty.".format(
            args.output_dir))
    else:
        os.makedirs(args.output_dir, exist_ok=True)

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if CUDA:
        args.use_cuda = CUDA
        torch.cuda.manual_seed_all(args.seed)
    torch.backends.cudnn.deterministic = True
    print("args = ", args)

    ori_model = 'None'
    ori_load = True

    for idx in range(args.N):
        data_idx = idx
        # Load data
        adj, features, labels, idx_train, idx_val, idx_test, test_sub_idx, ori_adj, ori_idx_train, ori_idx_valid = \
            load_data(args, data_idx, base_path=args.data_dir, dataset=args.dataset)

        file_name = "model_name_" + str(args.model_name) + "_lr_" + str(
            args.lr) + "_epochs_" + str(args.epochs) + "_k_factors_" + str(
                args.k_factors) + "_up_bound_" + str(
                    args.up_bound) + "_top_n_" + str(
                        args.top_n) + "_att_lr_" + str(
                            args.att_lr) + "_hidden_" + str(
                                args.hidden) + "_w1_" + str(args.w1)

        if args.all_data:
            model_path = os.path.join(args.output_dir, file_name)
        else:
            model_path = os.path.join(args.output_dir, str(data_idx),
                                      file_name)
        if not os.path.exists(model_path):
            os.makedirs(model_path)

        # Model and optimizer
        if args.model_name == "SpGAT":
            model = SpGAT(nfeat=features.shape[1],
                          nhid=args.hidden,
                          nclass=int(labels.max()) + 1,
                          dropout=args.dropout,
                          nheads=args.nb_heads,
                          alpha=args.alpha)
        elif args.model_name == "SpGAT_2":
            model = SpGAT_2(nfeat=features.shape[1],
                            nclass=int(labels.max()) + 1,
                            config=args)
        elif args.model_name == "SpGAT2":
            model = SpGAT_2(nfeat=features.shape[1],
                            nclass=int(labels.max()) + 1,
                            config=args)
        else:
            model = GAT(nfeat=features.shape[1],
                        nhid=args.hidden,
                        nclass=int(labels.max()) + 1,
                        dropout=args.dropout,
                        nheads=args.nb_heads,
                        alpha=args.alpha)

        print("load path", args.load)
        if args.load != 'None' and ori_load:
            model = load_model(model, args.load)
            print("model loaded")
            ori_load = False

        if ori_model != 'None':
            model = copy.deepcopy(ori_model)
            print("load model from", idx - 1)

        print(model.state_dict().keys())

        if CUDA:
            model.cuda()
            features = Variable(features.cuda())
            adj = Variable(adj.cuda())
            labels = Variable(labels.cuda())
            idx_train = idx_train.cuda()
            idx_val = idx_val.cuda()
            idx_test = idx_test.cuda()
            if "_" in args.model_name and not args.all_data and data_idx > 0 and ori_adj is not None:
                ori_adj = Variable(ori_adj.cuda())
                ori_idx_train = ori_idx_train.cuda()
                ori_idx_valid = ori_idx_valid.cuda()

        loader = Corpus(features, adj, labels, idx_train, idx_val, idx_test,
                        ori_adj, ori_idx_train, ori_idx_valid)

        for name, param in model.named_parameters():
            if param.requires_grad == False:
                print("False", name)
                param.requires_grad = True

        best_epoch = 0
        if args.evaluate == 0:
            best_epoch = train(model, model_path, loader, data_idx)
            ori_model = copy.deepcopy(model)
        evaluate(model,
                 model_path,
                 loader,
                 data_idx,
                 best_epoch=best_epoch,
                 test_sub_idx=test_sub_idx)
        evaluate(model,
                 model_path,
                 loader,
                 data_idx,
                 best_epoch=best_epoch,
                 test_sub_idx=test_sub_idx,
                 best_or_final='final')

        args.load = os.path.join(model_path, 'trained_final.pth')
コード例 #3
0
ファイル: train.py プロジェクト: YangLifromXJTU/NASE
def main():

    args.data_dir = os.path.join(args.data_dir, args.dataset)
    args.output_dir = os.path.join(args.output_dir, args.dataset)

    if os.path.exists(args.output_dir) and os.listdir(args.output_dir):
        print("Output directory ({}) already exists and is not empty.".format(
            args.output_dir))
    else:
        os.makedirs(args.output_dir, exist_ok=True)

    CUDA = torch.cuda.is_available()
    if CUDA:
        print("using CUDA")

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    print("args = ", args)

    train_data, validation_data, test_data, entity2id, relation2id = build_data(
        args.data_dir)

    if args.pretrained_emb:
        entity_embeddings, relation_embeddings = init_embeddings(
            os.path.join(args.data_dir, 'entity2vec.txt'),
            os.path.join(args.data_dir, 'relation2vec.txt'), args.k_factors,
            args.embedding_size)
        print("Initialised relations and entities from TransE")

    else:
        entity_embeddings = np.random.randn(len(entity2id),
                                            args.embedding_size)
        relation_embeddings = np.random.randn(len(relation2id),
                                              args.embedding_size)
        print("Initialised relations and entities randomly")

    entity_embeddings = torch.FloatTensor(entity_embeddings)
    relation_embeddings = torch.FloatTensor(relation_embeddings)
    print("Initial entity dimensions {} , relation dimensions {}".format(
        entity_embeddings.size(), relation_embeddings.size()))

    train_loader = Corpus(args, train_data, validation_data, test_data,
                          entity2id, relation2id, args.batch_size,
                          args.valid_invalid_ratio)

    file_name = "train_" + str(args.model_name) + "_embedding_size_" + str(
        args.embedding_size) + "_lr_" + str(args.lr) + "_epochs_" + str(
            args.epochs) + "_batch_size_" + str(
                args.batch_size) + "_dropout_" + str(
                    args.dropout) + "_step_size_" + str(
                        args.step_size) + "_arch_" + str(
                            args.arch) + "_layers_" + str(
                                args.layers) + "_margin_" + str(
                                    args.margin) + "_do_margin_loss_" + str(
                                        args.do_margin_loss)

    model_path = os.path.join(args.output_dir, file_name)
    output_file = os.path.join(args.output_dir,
                               "results_" + file_name + ".txt")

    if not os.path.exists(model_path):
        os.makedirs(model_path)

    if args.model_name == 'NASE':
        arc = eval("genotypes.%s" % args.arch)
        print("\narc:", arc)
        model = NASE(entity_embeddings, relation_embeddings, arc, config=args)
    else:
        print("no such model name")

    if args.load != 'None':
        model.load_state_dict(torch.load(args.load))
        print("model loaded")

    model.cuda()

    best_epoch = 0
    if args.evaluate == 0:
        best_epoch = train(args, train_loader, model, model_path)
    evaluate(args,
             model,
             model_path,
             train_loader,
             output_file,
             best_epoch=best_epoch,
             best_or_final='best')
    evaluate(args,
             model,
             model_path,
             train_loader,
             output_file,
             best_epoch=best_epoch,
             best_or_final='final')
コード例 #4
0
ファイル: search.py プロジェクト: YangLifromXJTU/NASE
def main():

    args.data_dir = os.path.join(args.data_dir, args.dataset)
    args.output_dir = os.path.join(args.output_dir, args.dataset)

    if os.path.exists(args.output_dir) and os.listdir(args.output_dir):
        print("Output directory ({}) already exists and is not empty.".format(args.output_dir))
    else:
        os.makedirs(args.output_dir, exist_ok=True)

    CUDA = torch.cuda.is_available()
    if CUDA:
        print("using CUDA")

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    print("args = ", args)

    train_data, validation_data, test_data, entity2id, relation2id = build_data(args.data_dir)

    if args.pretrained_emb:
        entity_embeddings, relation_embeddings = init_embeddings(os.path.join(args.data_dir, 'entity2vec.txt'),
                                                                 os.path.join(args.data_dir, 'relation2vec.txt'),
                                                                 args.k_factors, args.embedding_size)
        print("Initialised relations and entities from TransE")

    else:
        entity_embeddings = np.random.randn(len(entity2id), args.embedding_size)
        relation_embeddings = np.random.randn(len(relation2id), args.embedding_size)
        print("Initialised relations and entities randomly")

    entity_embeddings = torch.FloatTensor(entity_embeddings)
    relation_embeddings = torch.FloatTensor(relation_embeddings)
    print("Initial entity dimensions {} , relation dimensions {}".format(entity_embeddings.size(),
                                                                         relation_embeddings.size()))

    train_loader = Corpus(args, train_data, validation_data, test_data, entity2id, relation2id,
                    args.batch_size, args.valid_invalid_ratio)


    file_name = "search_" + str(args.model_name) + "_embedding_size_" + str(args.embedding_size) + "_lr_" + str(
        args.lr) + "_epochs_" + str(args.epochs) + "_batch_size_" + str(args.batch_size) + "_dropout_" + str(
        args.dropout) + "_step_size_" + str(args.step_size) + "_layers_" + str(args.layers) + "_margin_" + str(args.margin)

    model_path = os.path.join(args.output_dir, file_name)
    output_file = os.path.join(args.output_dir, "results_" + file_name + ".txt")

    if not os.path.exists(model_path):
        os.makedirs(model_path)

    if args.model_name == 'NASE':
        model = KG_search(entity_embeddings, relation_embeddings, config=args)
    else:
        print("no such model name")

    if args.load != 'None':
        model.load_state_dict(torch.load(args.load))
        print("model loaded")

    model.cuda()

    architect = Architect(model, args)

    cnt_params = np.sum(np.prod(v.size()) for name, v in model.named_parameters() if "auxiliary" not in name)/1e6
    print("param size = ", cnt_params, "MB")

    for name, param in model.named_parameters():
      if param.requires_grad == False:
        print("name",name)
        param.requires_grad = True

    #print("arch_parameters", model.arch_parameters())

    best_epoch = 0
    if args.evaluate == 0:
        best_epoch = train(args, train_loader, model, model_path, architect)
    evaluate(args, model, model_path, train_loader, output_file, best_epoch=best_epoch, best_or_final='best')
    evaluate(args, model, model_path, train_loader, output_file, best_epoch=best_epoch, best_or_final='final')
コード例 #5
0
ファイル: run.py プロジェクト: diweiqiang/DisenE
def main():

    args.data_dir = os.path.join(args.data_dir, args.dataset)
    args.output_dir = os.path.join(args.output_dir, args.dataset)

    if os.path.exists(args.output_dir) and os.listdir(args.output_dir):
        print("Output directory ({}) already exists and is not empty.".format(
            args.output_dir))
    else:
        os.makedirs(args.output_dir, exist_ok=True)

    CUDA = torch.cuda.is_available()

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    print("args = ", args)

    train_data, validation_data, test_data, entity2id, relation2id = build_data(
        args.data_dir)

    if args.pretrained_emb:
        entity_embeddings, relation_embeddings = init_embeddings(
            os.path.join(args.data_dir, 'entity2vec.txt'),
            os.path.join(args.data_dir, 'relation2vec.txt'), args.k_factors,
            args.embedding_size)
        print("Initialised relations and entities from TransE")

    else:
        entity_embeddings = np.random.randn(
            len(entity2id), args.embedding_size * args.k_factors)
        relation_embeddings = np.random.randn(len(relation2id),
                                              args.embedding_size)
        print("Initialised relations and entities randomly")

    entity_embeddings = torch.FloatTensor(entity_embeddings)
    relation_embeddings = torch.FloatTensor(relation_embeddings)
    print("Initial entity dimensions {} , relation dimensions {}".format(
        entity_embeddings.size(), relation_embeddings.size()))

    train_loader = Corpus(args, train_data, validation_data, test_data,
                          entity2id, relation2id, args.batch_size,
                          args.valid_invalid_ratio)

    file_name = "model_name_" + str(
        args.model_name
    ) + "_embedding_size_" + str(args.embedding_size) + "_k_factors_" + str(
        args.k_factors) + "_lr_" + str(args.lr) + "_epochs_" + str(
            args.epochs
        ) + "_out_channels_" + str(args.out_channels) + "_batch_size_" + str(
            args.batch_size) + "_dropout_" + str(
                args.dropout) + "_pretrained_emb_" + str(
                    args.pretrained_emb) + "_step_size_" + str(
                        args.step_size) + "_gamma_" + str(
                            args.gamma) + "_w1_" + str(args.w1) + "_w2_" + str(
                                args.w2) + "_sample_num_" + str(
                                    args.sample_num) + "_top_n_" + str(
                                        args.top_n)

    model_path = os.path.join(args.output_dir, file_name)
    output_file = os.path.join(args.output_dir,
                               "results_" + file_name + ".txt")

    if not os.path.exists(model_path):
        os.makedirs(model_path)

    if args.model_name == 'ConvKB':
        model = ConvKB(entity_embeddings, relation_embeddings, config=args)
    elif args.model_name == 'TransE':
        model = TransE(entity_embeddings, relation_embeddings, config=args)
    elif args.model_name == 'DisenE':
        model = DisenE(entity_embeddings, relation_embeddings, config=args)
    elif args.model_name == 'DisenE_Trans':
        model = DisenE_Trans(entity_embeddings,
                             relation_embeddings,
                             config=args)

    else:
        print("no such model name")

    if args.load != 'None':
        model.load_state_dict(torch.load(args.load))
        print("model loaded")

    if CUDA:
        print("using CUDA")
        model.cuda()

    best_epoch = 0
    if args.evaluate == 0:
        best_epoch = train(args, train_loader, model, CUDA, model_path)
    evaluate(args,
             model,
             model_path,
             train_loader,
             output_file,
             best_epoch=best_epoch,
             best_or_final='best')
    evaluate(args,
             model,
             model_path,
             train_loader,
             output_file,
             best_epoch=best_epoch,
             best_or_final='final')
コード例 #6
0
                        default='../data/',
                        help='path to corpus dir')
    parser.add_argument('-src',
                        '--corpus-name',
                        type=str,
                        default='data.txt',
                        help='path to corpus data')
    parser.add_argument('--save-dir',
                        type=str,
                        default='./data/',
                        help='path to save processed data')
    parser.add_argument('--pre-w2v', type=str, default='../data/w2v')
    args = parser.parse_args()

    args.corpus_data = args.corpus_dir + args.corpus_name
    corpus = Corpus(args.corpus_data, args.pre_w2v, args.save_dir, train_dev=0)
    dl = DataLoader(args.save_dir, batch_size=128, train_dev=0)()
    # dl_train, dl_test = train_test_split(dl, test_size=0.33)
    pre_w2v = torch.load(args.save_dir + 'pre_w2v')
    pre_w2v = torch.Tensor(pre_w2v).to(device)

    model_ckpt = torch.load(os.path.join(
        args.save_dir, '{}.pyt'.format("Transformer_NER_best")),
                            map_location=torch.device(device))
    config = load_obj(args.save_dir + 'Config.json')
    model = Transformer_Mix(config, pre_w2v).to(device)
    model.load_state_dict(model_ckpt['model'])

    # pred_tags = []
    # true_tags = []