def process_data(): data_dir = args.data_dir raw_kb_path = os.path.join(data_dir, 'raw.kb') train_path = data_utils.get_train_path(args) dev_path = os.path.join(data_dir, 'dev.triples') test_path = os.path.join(data_dir, 'test.triples') data_utils.prepare_kb_envrioment(raw_kb_path, train_path, dev_path, test_path, args.test, args.add_reverse_relations)
def train(lf): train_path = data_utils.get_train_path(args) dev_path = os.path.join(args.data_dir, 'dev.triples') test_path = os.path.join(args.data_dir, 'test.triples') entity_index_path = os.path.join(args.data_dir, 'entity2id.txt') relation_index_path = os.path.join(args.data_dir, 'relation2id.txt') train_data = data_utils.load_triples( train_path, entity_index_path, relation_index_path, group_examples_by_query=args.group_examples_by_query, add_reverse_relations=args.add_reversed_training_edges) if 'NELL' in args.data_dir: adj_list_path = os.path.join(args.data_dir, 'adj_list.pkl') seen_entities = data_utils.load_seen_entities(adj_list_path, entity_index_path) else: seen_entities = set() dev_data = data_utils.load_triples(dev_path, entity_index_path, relation_index_path, seen_entities=seen_entities) test_data = data_utils.load_triples(test_path, entity_index_path, relation_index_path, seen_entities=seen_entities) if args.checkpoint_path is not None: lf.load_checkpoint(args.checkpoint_path) lf.run_train(train_data, dev_data, test_data)
def train(lf): train_path = data_utils.get_train_path(args) dev_path = os.path.join(args.data_dir, 'dev.triples') entity_index_path = os.path.join(args.data_dir, 'entity2id.txt') relation_index_path = os.path.join(args.data_dir, 'relation2id.txt') if args.few_shot or args.adaptation: normal_train_data, few_train_data = data_utils.load_triples( train_path, entity_index_path, relation_index_path, group_examples_by_query=args.group_examples_by_query, add_reverse_relations=args.add_reversed_training_edges, few_shot=True, lf=lf) else: train_data = data_utils.load_triples( train_path, entity_index_path, relation_index_path, group_examples_by_query=args.group_examples_by_query, add_reverse_relations=args.add_reversed_training_edges) if 'NELL' in args.data_dir: adj_list_path = os.path.join(args.data_dir, 'adj_list.pkl') seen_entities = data_utils.load_seen_entities(adj_list_path, entity_index_path) else: seen_entities = set() if args.few_shot or args.adaptation: normal_dev_data, few_dev_data = data_utils.load_triples( dev_path, entity_index_path, relation_index_path, seen_entities=seen_entities, few_shot=True, lf=lf) else: dev_data = data_utils.load_triples(dev_path, entity_index_path, relation_index_path, seen_entities=seen_entities) if args.adaptation: for x in few_train_data: if args.checkpoint_path is not None: lf.load_checkpoint(args.checkpoint_path, adaptation=True) lf.run_train(few_train_data[x], few_dev_data, adaptation=True, adaptation_relation=x) elif args.few_shot: if args.checkpoint_path is not None: lf.load_checkpoint(args.checkpoint_path) lf.run_train(normal_train_data, normal_dev_data, few_shot=True) else: if args.checkpoint_path is not None: if args.emb_few: lf.load_checkpoint(args.checkpoint_path, emb_few=True) else: lf.load_checkpoint(args.checkpoint_path) lf.run_train(train_data, dev_data)
relation_only = False run_analysis = False theta = 0.2 checkpoint_path = None if __name__ == '__main__': args = Args() with torch.enable_grad(): initialize_model_directory(args) lf: LFramework = construct_model(args) to_cuda(lf) train_path = data_utils.get_train_path(args) dev_path = os.path.join(args.data_dir, 'dev.triples') entity_index_path = os.path.join(args.data_dir, 'entity2id.txt') relation_index_path = os.path.join(args.data_dir, 'relation2id.txt') train_data = data_utils.load_triples( train_path, entity_index_path, relation_index_path, group_examples_by_query=args.group_examples_by_query, add_reverse_relations=args.add_reversed_training_edges) seen_entities = set() dev_data = data_utils.load_triples(dev_path, entity_index_path, relation_index_path, seen_entities=seen_entities)
def train(lf): train_path = data_utils.get_train_path(args) dev_path = os.path.join(args.data_dir, "dev.triples") entity_index_path = os.path.join(args.data_dir, "entity2id.txt") relation_index_path = os.path.join(args.data_dir, "relation2id.txt") if args.few_shot or args.adaptation: # NOTE: train_data: {"11": [(362, 11, 57), (246, 11, 42), ...], ...} normal_train_data, few_train_data = data_utils.load_triples( train_path, entity_index_path, relation_index_path, group_examples_by_query=args. group_examples_by_query, # NOTE: False in meta-learning add_reverse_relations=args.add_reversed_training_edges, few_shot=True, lf=lf, ) else: # NOTE: train_data: [(36221, [11], 57), (4203, [7, 8, 13, 15, 48], 3), ...] print("jxtu: load all train data...") train_data = data_utils.load_triples( train_path, entity_index_path, relation_index_path, group_examples_by_query=args. group_examples_by_query, # NOTE: True in embedding training add_reverse_relations=args.add_reversed_training_edges, ) if "NELL" in args.data_dir: adj_list_path = os.path.join(args.data_dir, "adj_list.pkl") seen_entities = data_utils.load_seen_entities(adj_list_path, entity_index_path) else: seen_entities = set() if args.few_shot or args.adaptation: normal_dev_data, few_dev_data = data_utils.load_triples( dev_path, entity_index_path, relation_index_path, seen_entities=seen_entities, few_shot=True, lf=lf, ) else: dev_data = data_utils.load_triples( dev_path, entity_index_path, relation_index_path, seen_entities=seen_entities, ) if args.adaptation: for x in few_train_data: if args.checkpoint_path is not None: lf.load_checkpoint(args.checkpoint_path, adaptation=True) lf.run_train(few_train_data[x], few_dev_data, adaptation=True, adaptation_relation=x) elif args.few_shot: if args.checkpoint_path is not None: lf.load_checkpoint(args.checkpoint_path) lf.run_train(normal_train_data, normal_dev_data, few_shot=True) else: if args.checkpoint_path is not None: if args.emb_few: lf.load_checkpoint(args.checkpoint_path, emb_few=True) else: lf.load_checkpoint(args.checkpoint_path) lf.run_train(train_data, dev_data)