def test_samples_independence_in_batch(): vocab = {'cat': 1, 'dog': 2, 'bird': 3} # 0 reserved for padding model = HAN(vocab=vocab, freeze_emb=True, load_glove=False) model.eval() def get_rand_sample() -> torch.Tensor: return torch.randint(low=1, high=len(vocab), size=(1, 12, 10), dtype=torch.int64) n_test = 5 for _ in range(n_test): x_sample = get_rand_sample() y_sample = get_rand_sample() z_sample = get_rand_sample() batch_a = torch.cat([x_sample, z_sample]) batch_b = torch.cat([y_sample, z_sample]) output_a = model(batch_a) output_b = model(batch_b) for key in ['logits', 'w_scores', 's_scores']: torch.allclose(output_a[key][1], output_b[key][1])
def test_batch_size() -> None: bs = 384 vocab = ImdbReviewsDataset.get_imdb_vocab(IMBD_ROOT) model = HAN(vocab=vocab, freeze_emb=True, load_glove=False) model.cuda() batch = torch.ones((bs, TXT_CLIP, SNT_CLIP), dtype=torch.int64).cuda() model(batch)
def main(args: Namespace) -> None: set_global_seed(args.seed) is_wandb = setup_wandb() train_loader, test_loader, vocab = get_loaders(batch_size=args.batch_size) loaders = OrderedDict([('train', train_loader), ('valid', test_loader)]) model = HAN(vocab=vocab, freeze_emb=args.freeze_emb) criterion = torch.nn.BCELoss() optimizer = torch.optim.SGD(lr=1e-2, momentum=.9, params=model.parameters()) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer) if is_wandb: Runner = runner_pkg.SupervisedWandbRunner extra_args = {'monitoring_params': {'project': 'neuro_imdb'}} else: Runner = runner_pkg.SupervisedRunner extra_args = {} runner = Runner(input_key='features', output_key=None, input_target_key='targets', device=args.device if is_available() else tdevice('cpu')) callbacks = [ clb.AccuracyCallback(prefix='accuracy', input_key='targets', output_key='logits', accuracy_args=[1], threshold=.5, num_classes=1, activation=None), clb.EarlyStoppingCallback(patience=5, minimize=False, min_delta=0.02, metric='accuracy01') ] runner.train(model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, loaders=loaders, logdir=str(args.logdir), num_epochs=args.n_epoch, verbose=True, main_metric='accuracy01', valid_loader='valid', callbacks=callbacks, minimize_metric=False, checkpoint_data={'params': model.init_params}, **extra_args)
def test_model() -> None: vocab = {'cat': 1, 'dog': 2, 'bird': 3} # 0 reserved for padding model = HAN(vocab=vocab, freeze_emb=True, load_glove=False) batch = torch.randint(low=1, high=len(vocab), size=(16, 12, 10), dtype=torch.int64) logits = model(batch)['logits'] assert torch.all(0 <= logits) assert torch.all(1 >= logits)
def test_forward_for_dataset() -> None: # data dataset = get_test_dataset() ids = [2, 5] docs = collate_docs([dataset[i] for i in ids])['features'] n_doc, n_snt, n_wrd = docs.shape # model model = HAN(vocab=dataset.vocab, freeze_emb=True, load_glove=False) # forward output = model(docs) pred = output['logits'] w_scores = output['w_scores'] s_scores = output['s_scores'] assert pred.numel() == n_doc assert w_scores.shape == docs.shape assert s_scores.shape == (n_doc, n_snt)
def main(): args = init_para() config_file = ["./src/config.ini"] config = Config(config_file, args) g_hin = HIN(config.input_fold, config.data_type, config.relation_list) # Model selection if args.model == "RHINE": g_hin.load_matrix() g_hin.generate_matrix(config.combination) RHINEdp = RHINEDataProcess(config, g_hin) RHINEdp.generate_triples() RHINEdp.merge_triples(config.relation_category) print("Train") TrainRHINE(config, g_hin.node2id_dict) elif args.model == "Metapath2vec": config.temp_file += args.dataset + '_' + config.metapath + '.txt' config.out_emd_file += args.dataset + '_' + config.metapath + '.txt' random_walk_based_mp(g_hin, config.metapath, config.num_walks, config.walk_length, config.temp_file) m2v = Metapath2VecTrainer(config, g_hin) m2v.train() elif args.model == "HeteSpaceyWalk": config.temp_file += args.dataset + '_' + config.metapath + '.txt' config.out_emd_file += args.dataset + '_' + config.metapath + '.txt' random_walk_spacey_mp(g_hin, config.metapath, config.data_type, config.num_walks, config.walk_length, config.temp_file, config.beta) m2v = Metapath2VecTrainer(config) m2v.train() elif args.model == "DHNE": hyper_edge_sample(g_hin, output_datafold=config.temp_file, scale=config.scale, tup=config.triple_hyper) dataset = read_data_sets(train_dir=config.temp_file) dim_feature = [ sum(dataset.train.nums_type) - n for n in dataset.train.nums_type ] Process(dataset, dim_feature, embedding_size=config.dim, hidden_size=config.hidden_size, learning_rate=config.alpha, alpha=config.alpha, batch_size=config.batch_size, num_neg_samples=config.neg_num, epochs_to_train=config.epochs, output_embfold=config.out_emd_file, output_modelfold=config.output_modelfold, prefix_path=config.prefix_path, reflect=g_hin.matrix2id_dict) # elif args.model == "HHNE": # random_walk_txt = config.temp_file + args.dataset + '-' + config.metapath + '.txt' # node_type_mapping_txt = config.temp_file + 'node_type_mapping.txt' # config.out_emd_file += args.dataset + '-' + config.metapath + '.txt' # print("Metapath walking!") # if len(config.metapath) == 3: # # data = random_walk_three(config.num_walks, config.walk_length, config.metapath, g_hin, random_walk_txt) # data = random_walk_three(1, 5, config.metapath, g_hin, random_walk_txt) # elif len(config.metapath) == 5: # data = random_walk_five(config.num_walks, config.walk_length, config.metapath, g_hin, random_walk_txt) # # node_type_mapping_txt = g_hin.node_type_mapping(node_type_mapping_txt) # dataset = HHNE.Dataset(random_walk_txt=random_walk_txt,window_size=config.window_size) # print("Train" + str(len(dataset.index2nodeid))) # pos_holder, tar_holder, tag_holder, pro_holder, grad_pos, grad_tar = HHNE.bulid_model(EMBED_SIZE=config.dim) # HHNE.TrainHHNE(pos_holder, tar_holder, tag_holder, pro_holder, grad_pos, grad_tar, dataset, # BATCH_SIZE=config.batch_size, NUM_EPOCHS=config.epochs, NUM_SAMPLED=config.neg_num, # VOCAB_SIZE=len(dataset.nodeid2index), EMBED_SIZE=config.dim, startingAlpha=config.alpha, # lr_decay=config.lr_decay, output_embfold=config.out_emd_file) elif args.model == "MetaGraph2vec": config.temp_file += 'graph_rw.txt' config.out_emd_file += args.dataset + '_node.txt' mgg = MetaGraphGenerator() if args.dataset == "acm": mgg.generate_random_three(config.temp_file, config.num_walks, config.walk_length, g_hin.node, g_hin.relation_dict) elif args.dataset == "dblp": mgg.generate_random_four(config.temp_file, config.num_walks, config.walk_length, g_hin.node, g_hin.relation_dict) model = Metapath2VecTrainer(config, g_hin) print("Training") model.train() # elif args.model == "PME": # pme = PME( # g_hin.input_edge, # g_hin.node2id_dict, # g_hin.relation2id_dict, # config.dim, # config.dimensionR, # config.loadBinaryFlag, # config.outBinaryFlag, # config.num_workers, # config.nbatches, # config.epochs, # config.no_validate, # config.alpha, # config.margin, # config.M, # config.out_emd_file # ) # # pme.load() # pme.train() # pme.out() elif args.model == "PTE": config.temp_file += args.dataset + '.txt' config.out_emd_file += args.dataset + '_node.txt' print('PTE') data = PTEDataReader(g_hin, config) alias_table = AliasSampling(data) pte = PTETrainer(g_hin, config, data, alias_table) print('Training') pte.train() elif args.model == "HERec": mp_list = config.metapath_list.split("|") for mp in mp_list: HERec_gen_neighbour(g_hin, mp, config.temp_file) config.input = config.temp_file + mp + ".txt" config.out_put = config.out_emd_file + mp + ".txt" DW(config) HERec_union_metapth(config.out_emd_file, mp_list, len(g_hin.node[mp_list[0][0]]), config.dim) elif args.model == "HIN2vec": HIN2vec(g_hin, config.out_emd_file, config) elif args.model == "HAN": data_process = HAN_process(g_hin, config.mp_list, args.dataset, config.featype) config.out_emd_file += args.dataset + '_node.txt' m = HAN(config, data_process) m.train() elif args.model == "HeGAN": model = HeGAN(g_hin, args, config) model.train(config, g_hin.node2id_dict) else: pass
def main(): args = init_para() config_file = ["./src/config.ini"] config = Config(config_file, args) g_hin = HIN(args.dataset, config.data_type, config.relation_list) # Model selection if args.model == "RHINE": g_hin.load_matrix() g_hin.generate_matrix(config.combination) RHINEdp = RHINEDataProcess(config, g_hin) RHINEdp.generate_triples() RHINEdp.merge_triples(config.relation_category) print("Train") TrainRHINE(config, g_hin.node2id_dict) elif args.model == "Metapath2vec": config.temp_file += args.dataset + '-' + config.metapath + '.txt' config.out_emd_file += args.dataset + '-' + config.metapath + '.txt' print("Metapath walking!") if len(config.metapath) == 3: data = random_walk_three(config.num_walks, config.walk_length, config.metapath, g_hin, config.temp_file) elif len(config.metapath) == 5: data = random_walk_five(config.num_walks, config.walk_length, config.metapath, g_hin, config.temp_file) m2v = Metapath2VecTrainer(config) print("Training") m2v.train() elif args.model == "DHNE": hyper_edge_sample(g_hin, output_datafold=config.temp_file, scale=config.scale, tup=config.triple_hyper) dataset = read_data_sets(train_dir=config.temp_file) dim_feature = [sum(dataset.train.nums_type) - n for n in dataset.train.nums_type] Process(dataset, dim_feature, embedding_size=config.dim, hidden_size=config.hidden_size, learning_rate=config.alpha, alpha=config.alpha, batch_size=config.batch_size, num_neg_samples=config.neg_num, epochs_to_train=config.epochs, output_embfold=config.out_emd_file, output_modelfold=config.output_modelfold, prefix_path=config.prefix_path, reflect=g_hin.matrix2id_dict) elif args.model == "MetaGraph2vec": config.temp_file += 'graph_rw.txt' config.out_emd_file += 'node.txt' mgg = MetaGraphGenerator() if args.dataset == "acm": mgg.generate_random_three(config.temp_file, config.num_walks, config.walk_length, g_hin.node, g_hin.relation_dict) elif args.dataset == "dblp": mgg.generate_random_four(config.temp_file, config.num_walks, config.walk_length, g_hin.node, g_hin.relation_dict) model = Metapath2VecTrainer(config) print("Training") model.train() elif args.model == "HERec": mp_list = config.metapath_list.split("|") for mp in mp_list: # HERec_gen_neighbour(g_hin, mp, config.temp_file) config.input = config.temp_file + mp + ".txt" config.out_put = config.out_emd_file + mp + ".txt" DW(config) HERec_union_metapth(config.out_emd_file, mp_list, len(g_hin.node[mp_list[0][0]]), config.dim) elif args.model == "HIN2vec": HIN2vec(g_hin, config.out_emd_file, config) elif args.model == "HAN": data_process = HAN_process(g_hin, config.mp_list, args.dataset) config.out_emd_file += 'node.txt' m = HAN(config, data_process) m.train() elif args.model == "HeGAN": model = HeGAN(g_hin, args, config) model.train(config, g_hin.node2id_dict) else: pass